{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 9036, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.6764705882352945e-08, "logits/chosen": -0.32980820536613464, "logits/rejected": -0.28433364629745483, "logps/chosen": -131.85997009277344, "logps/rejected": -214.78375244140625, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 7.352941176470589e-08, "logits/chosen": -0.3155045211315155, "logits/rejected": -0.31215861439704895, "logps/chosen": -129.69313049316406, "logps/rejected": -193.845458984375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.1029411764705884e-07, "logits/chosen": -0.8325361013412476, "logits/rejected": -0.9143111705780029, "logps/chosen": -99.84840393066406, "logps/rejected": -25.540142059326172, "loss": 0.7116, "rewards/accuracies": 1.0, "rewards/chosen": 0.01596832275390625, "rewards/margins": 0.0019191736355423927, "rewards/rejected": 0.014049149118363857, "step": 3 }, { "epoch": 0.0, "learning_rate": 1.4705882352941178e-07, "logits/chosen": -0.3899770975112915, "logits/rejected": -0.38498425483703613, "logps/chosen": -91.48783874511719, "logps/rejected": -77.10614776611328, "loss": 0.7027, "rewards/accuracies": 0.0, "rewards/chosen": 0.0017265320057049394, "rewards/margins": -0.03976059332489967, "rewards/rejected": 0.04148712381720543, "step": 4 }, { "epoch": 0.0, "learning_rate": 1.8382352941176472e-07, "logits/chosen": -0.547710657119751, "logits/rejected": -0.5207879543304443, "logps/chosen": -107.52085876464844, "logps/rejected": -105.01902770996094, "loss": 0.6853, "rewards/accuracies": 1.0, "rewards/chosen": 0.0040077208541333675, "rewards/margins": 0.0048622130416333675, "rewards/rejected": -0.0008544921875, "step": 5 }, { "epoch": 0.0, "learning_rate": 2.2058823529411768e-07, "logits/chosen": -0.5564070343971252, "logits/rejected": -0.5836220979690552, "logps/chosen": -132.30023193359375, "logps/rejected": -114.33777618408203, "loss": 0.6812, "rewards/accuracies": 1.0, "rewards/chosen": 0.036224365234375, "rewards/margins": 0.035810090601444244, "rewards/rejected": 0.0004142761172261089, "step": 6 }, { "epoch": 0.0, "learning_rate": 2.573529411764706e-07, "logits/chosen": -0.7022334933280945, "logits/rejected": -0.6964675784111023, "logps/chosen": -95.41545867919922, "logps/rejected": -121.41693878173828, "loss": 0.6921, "rewards/accuracies": 1.0, "rewards/chosen": 0.01045913714915514, "rewards/margins": 0.031658172607421875, "rewards/rejected": -0.02119903638958931, "step": 7 }, { "epoch": 0.0, "learning_rate": 2.9411764705882356e-07, "logits/chosen": -0.7327532172203064, "logits/rejected": -0.8516826629638672, "logps/chosen": -223.0664520263672, "logps/rejected": -190.06613159179688, "loss": 0.7206, "rewards/accuracies": 1.0, "rewards/chosen": 0.02066497877240181, "rewards/margins": 0.02515411376953125, "rewards/rejected": -0.004489135928452015, "step": 8 }, { "epoch": 0.0, "learning_rate": 3.308823529411765e-07, "logits/chosen": -0.46916303038597107, "logits/rejected": -0.5009225606918335, "logps/chosen": -141.59768676757812, "logps/rejected": -101.18876647949219, "loss": 0.7173, "rewards/accuracies": 0.0, "rewards/chosen": -0.0515594482421875, "rewards/margins": -0.06764602661132812, "rewards/rejected": 0.016086578369140625, "step": 9 }, { "epoch": 0.0, "learning_rate": 3.6764705882352943e-07, "logits/chosen": -0.2732281982898712, "logits/rejected": -0.2732281982898712, "logps/chosen": -145.75363159179688, "logps/rejected": -145.75363159179688, "loss": 0.7015, "rewards/accuracies": 0.0, "rewards/chosen": -0.008670044131577015, "rewards/margins": 0.0, "rewards/rejected": -0.008670044131577015, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.044117647058824e-07, "logits/chosen": -0.469175785779953, "logits/rejected": -0.45727813243865967, "logps/chosen": -104.689697265625, "logps/rejected": -92.37355041503906, "loss": 0.6891, "rewards/accuracies": 0.0, "rewards/chosen": -0.0027313232421875, "rewards/margins": -0.006159210577607155, "rewards/rejected": 0.003427887102589011, "step": 11 }, { "epoch": 0.0, "learning_rate": 4.4117647058823536e-07, "logits/chosen": -0.8299985527992249, "logits/rejected": -0.8468886017799377, "logps/chosen": -117.99163055419922, "logps/rejected": -84.94738006591797, "loss": 0.6843, "rewards/accuracies": 0.0, "rewards/chosen": -0.00916976947337389, "rewards/margins": -0.01039886474609375, "rewards/rejected": 0.0012290955055505037, "step": 12 }, { "epoch": 0.0, "learning_rate": 4.779411764705882e-07, "logits/chosen": -0.3737908601760864, "logits/rejected": -0.39420372247695923, "logps/chosen": -91.18988037109375, "logps/rejected": -125.59150695800781, "loss": 0.7068, "rewards/accuracies": 0.0, "rewards/chosen": 0.013299561105668545, "rewards/margins": -0.04794921725988388, "rewards/rejected": 0.061248779296875, "step": 13 }, { "epoch": 0.0, "learning_rate": 5.147058823529412e-07, "logits/chosen": -0.32310450077056885, "logits/rejected": -0.3266475200653076, "logps/chosen": -109.78984069824219, "logps/rejected": -104.80775451660156, "loss": 0.7051, "rewards/accuracies": 1.0, "rewards/chosen": 0.003755951067432761, "rewards/margins": 0.053705595433712006, "rewards/rejected": -0.04994964599609375, "step": 14 }, { "epoch": 0.0, "learning_rate": 5.514705882352942e-07, "logits/chosen": -0.4205392897129059, "logits/rejected": -0.4496947228908539, "logps/chosen": -191.4027557373047, "logps/rejected": -98.88038635253906, "loss": 0.6972, "rewards/accuracies": 0.0, "rewards/chosen": -0.02796783484518528, "rewards/margins": -0.03745879977941513, "rewards/rejected": 0.009490966796875, "step": 15 }, { "epoch": 0.0, "learning_rate": 5.882352941176471e-07, "logits/chosen": -0.8300701975822449, "logits/rejected": -0.03929199278354645, "logps/chosen": -57.601280212402344, "logps/rejected": -146.1296844482422, "loss": 0.6832, "rewards/accuracies": 1.0, "rewards/chosen": 0.014810181222856045, "rewards/margins": 0.00566558912396431, "rewards/rejected": 0.009144592098891735, "step": 16 }, { "epoch": 0.0, "learning_rate": 6.25e-07, "logits/chosen": -0.3644140958786011, "logits/rejected": -0.3821362853050232, "logps/chosen": -89.29336547851562, "logps/rejected": -75.03744506835938, "loss": 0.7166, "rewards/accuracies": 0.0, "rewards/chosen": -0.0029800415504723787, "rewards/margins": -0.03639679029583931, "rewards/rejected": 0.033416748046875, "step": 17 }, { "epoch": 0.0, "learning_rate": 6.61764705882353e-07, "logits/chosen": -1.045906662940979, "logits/rejected": -1.125693917274475, "logps/chosen": -161.69822692871094, "logps/rejected": -95.882568359375, "loss": 0.706, "rewards/accuracies": 1.0, "rewards/chosen": 0.02354278601706028, "rewards/margins": 0.023720551282167435, "rewards/rejected": -0.00017776490130927414, "step": 18 }, { "epoch": 0.0, "learning_rate": 6.985294117647059e-07, "logits/chosen": -0.3193252384662628, "logits/rejected": -0.3070049285888672, "logps/chosen": -51.2008171081543, "logps/rejected": -158.01089477539062, "loss": 0.6607, "rewards/accuracies": 1.0, "rewards/chosen": 0.018130112439393997, "rewards/margins": 0.021633530035614967, "rewards/rejected": -0.0035034180618822575, "step": 19 }, { "epoch": 0.0, "learning_rate": 7.352941176470589e-07, "logits/chosen": -0.32406944036483765, "logits/rejected": -0.3140714168548584, "logps/chosen": -107.989990234375, "logps/rejected": -91.7331771850586, "loss": 0.6657, "rewards/accuracies": 1.0, "rewards/chosen": -0.01024398859590292, "rewards/margins": 0.00150375347584486, "rewards/rejected": -0.01174774207174778, "step": 20 }, { "epoch": 0.0, "learning_rate": 7.720588235294119e-07, "logits/chosen": -0.3444879949092865, "logits/rejected": -0.34132179617881775, "logps/chosen": -147.43038940429688, "logps/rejected": -220.1051025390625, "loss": 0.6806, "rewards/accuracies": 1.0, "rewards/chosen": 0.05758056789636612, "rewards/margins": 0.0064544677734375, "rewards/rejected": 0.05112610012292862, "step": 21 }, { "epoch": 0.0, "learning_rate": 8.088235294117648e-07, "logits/chosen": -0.40751126408576965, "logits/rejected": -0.26570242643356323, "logps/chosen": -235.1617431640625, "logps/rejected": -153.4332275390625, "loss": 0.7277, "rewards/accuracies": 0.0, "rewards/chosen": -0.00970458984375, "rewards/margins": -0.02338409423828125, "rewards/rejected": 0.01367950439453125, "step": 22 }, { "epoch": 0.01, "learning_rate": 8.455882352941178e-07, "logits/chosen": -0.4058782756328583, "logits/rejected": 0.4974176585674286, "logps/chosen": -101.16447448730469, "logps/rejected": -96.2204818725586, "loss": 0.7009, "rewards/accuracies": 0.0, "rewards/chosen": 0.0058746337890625, "rewards/margins": -0.0013511660508811474, "rewards/rejected": 0.007225799839943647, "step": 23 }, { "epoch": 0.01, "learning_rate": 8.823529411764707e-07, "logits/chosen": -0.2928709089756012, "logits/rejected": -0.3074270486831665, "logps/chosen": -109.67442321777344, "logps/rejected": -109.4026107788086, "loss": 0.6529, "rewards/accuracies": 1.0, "rewards/chosen": 0.04715423658490181, "rewards/margins": 0.11504364013671875, "rewards/rejected": -0.06788940727710724, "step": 24 }, { "epoch": 0.01, "learning_rate": 9.191176470588237e-07, "logits/chosen": -0.5360602736473083, "logits/rejected": -0.5360602736473083, "logps/chosen": -41.94109344482422, "logps/rejected": -41.94109344482422, "loss": 0.7048, "rewards/accuracies": 0.0, "rewards/chosen": -0.0258941650390625, "rewards/margins": 0.0, "rewards/rejected": -0.0258941650390625, "step": 25 }, { "epoch": 0.01, "learning_rate": 9.558823529411764e-07, "logits/chosen": -0.2310425043106079, "logits/rejected": -0.2310425043106079, "logps/chosen": -74.77581787109375, "logps/rejected": -74.77581787109375, "loss": 0.6844, "rewards/accuracies": 0.0, "rewards/chosen": -0.036505889147520065, "rewards/margins": 0.0, "rewards/rejected": -0.036505889147520065, "step": 26 }, { "epoch": 0.01, "learning_rate": 9.926470588235295e-07, "logits/chosen": -0.641738772392273, "logits/rejected": -0.7081177830696106, "logps/chosen": -237.91464233398438, "logps/rejected": -155.5331268310547, "loss": 0.6874, "rewards/accuracies": 0.0, "rewards/chosen": 0.0010513305896893144, "rewards/margins": -0.03675384819507599, "rewards/rejected": 0.03780517727136612, "step": 27 }, { "epoch": 0.01, "learning_rate": 1.0294117647058825e-06, "logits/chosen": -0.680713951587677, "logits/rejected": -0.6923359036445618, "logps/chosen": -167.57101440429688, "logps/rejected": -149.577880859375, "loss": 0.6704, "rewards/accuracies": 1.0, "rewards/chosen": 0.03587036207318306, "rewards/margins": 0.01082763634622097, "rewards/rejected": 0.02504272572696209, "step": 28 }, { "epoch": 0.01, "learning_rate": 1.0661764705882354e-06, "logits/chosen": -0.522035539150238, "logits/rejected": -0.5160204172134399, "logps/chosen": -275.4082946777344, "logps/rejected": -97.77120971679688, "loss": 0.65, "rewards/accuracies": 1.0, "rewards/chosen": 0.05755615234375, "rewards/margins": 0.08126983791589737, "rewards/rejected": -0.02371368370950222, "step": 29 }, { "epoch": 0.01, "learning_rate": 1.1029411764705884e-06, "logits/chosen": -0.4436159133911133, "logits/rejected": -0.4728371798992157, "logps/chosen": -150.3328857421875, "logps/rejected": -192.78958129882812, "loss": 0.6694, "rewards/accuracies": 1.0, "rewards/chosen": 0.05979614332318306, "rewards/margins": 0.07010803371667862, "rewards/rejected": -0.010311889462172985, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.1397058823529413e-06, "logits/chosen": -0.15197913348674774, "logits/rejected": -0.17644396424293518, "logps/chosen": -178.97500610351562, "logps/rejected": -202.63946533203125, "loss": 0.6699, "rewards/accuracies": 0.0, "rewards/chosen": -0.0074371336959302425, "rewards/margins": -0.0381011962890625, "rewards/rejected": 0.03066406212747097, "step": 31 }, { "epoch": 0.01, "learning_rate": 1.1764705882352942e-06, "logits/chosen": -0.6598098278045654, "logits/rejected": -0.6678532958030701, "logps/chosen": -132.4288787841797, "logps/rejected": -118.3171157836914, "loss": 0.6962, "rewards/accuracies": 0.0, "rewards/chosen": -0.013194275088608265, "rewards/margins": -0.059108734130859375, "rewards/rejected": 0.045914459973573685, "step": 32 }, { "epoch": 0.01, "learning_rate": 1.2132352941176472e-06, "logits/chosen": -0.7643871307373047, "logits/rejected": -0.9506406188011169, "logps/chosen": -230.11154174804688, "logps/rejected": -128.79013061523438, "loss": 0.6787, "rewards/accuracies": 1.0, "rewards/chosen": 0.05800781399011612, "rewards/margins": 0.08507385849952698, "rewards/rejected": -0.02706604078412056, "step": 33 }, { "epoch": 0.01, "learning_rate": 1.25e-06, "logits/chosen": -0.4416276812553406, "logits/rejected": -0.005404059309512377, "logps/chosen": -258.8676452636719, "logps/rejected": -167.0272216796875, "loss": 0.6879, "rewards/accuracies": 0.0, "rewards/chosen": 0.01780395582318306, "rewards/margins": -0.01864929124712944, "rewards/rejected": 0.0364532470703125, "step": 34 }, { "epoch": 0.01, "learning_rate": 1.2867647058823528e-06, "logits/chosen": -0.4488449990749359, "logits/rejected": -0.512721061706543, "logps/chosen": -278.23876953125, "logps/rejected": -83.90056610107422, "loss": 0.645, "rewards/accuracies": 1.0, "rewards/chosen": 0.09224548190832138, "rewards/margins": 0.06477279216051102, "rewards/rejected": 0.027472687885165215, "step": 35 }, { "epoch": 0.01, "learning_rate": 1.323529411764706e-06, "logits/chosen": -0.5636940002441406, "logits/rejected": -0.524304986000061, "logps/chosen": -124.6471939086914, "logps/rejected": -178.60633850097656, "loss": 0.704, "rewards/accuracies": 0.0, "rewards/chosen": -0.0025566101539880037, "rewards/margins": -0.019405366852879524, "rewards/rejected": 0.01684875600039959, "step": 36 }, { "epoch": 0.01, "learning_rate": 1.360294117647059e-06, "logits/chosen": -0.3198443651199341, "logits/rejected": -0.3198443651199341, "logps/chosen": -109.32261657714844, "logps/rejected": -109.32261657714844, "loss": 0.6801, "rewards/accuracies": 0.0, "rewards/chosen": -0.03293914720416069, "rewards/margins": 0.0, "rewards/rejected": -0.03293914720416069, "step": 37 }, { "epoch": 0.01, "learning_rate": 1.3970588235294119e-06, "logits/chosen": -0.6789208650588989, "logits/rejected": -0.6755853891372681, "logps/chosen": -86.95452880859375, "logps/rejected": -81.64019012451172, "loss": 0.6897, "rewards/accuracies": 0.0, "rewards/chosen": -0.02429657056927681, "rewards/margins": -0.013901520520448685, "rewards/rejected": -0.010395050048828125, "step": 38 }, { "epoch": 0.01, "learning_rate": 1.4338235294117648e-06, "logits/chosen": -0.4052613079547882, "logits/rejected": -0.4017999768257141, "logps/chosen": -82.64262390136719, "logps/rejected": -106.52456665039062, "loss": 0.676, "rewards/accuracies": 1.0, "rewards/chosen": -0.0010589599842205644, "rewards/margins": 0.01906738243997097, "rewards/rejected": -0.0201263427734375, "step": 39 }, { "epoch": 0.01, "learning_rate": 1.4705882352941177e-06, "logits/chosen": -0.30525293946266174, "logits/rejected": -0.31912389397621155, "logps/chosen": -71.88336944580078, "logps/rejected": -90.6526107788086, "loss": 0.6713, "rewards/accuracies": 1.0, "rewards/chosen": 0.03220520168542862, "rewards/margins": 0.04713287577033043, "rewards/rejected": -0.014927673153579235, "step": 40 }, { "epoch": 0.01, "learning_rate": 1.5073529411764707e-06, "logits/chosen": -0.5801286697387695, "logits/rejected": -0.5220912098884583, "logps/chosen": -119.00859069824219, "logps/rejected": -180.9591827392578, "loss": 0.6393, "rewards/accuracies": 1.0, "rewards/chosen": 0.02898864820599556, "rewards/margins": 0.04502411186695099, "rewards/rejected": -0.01603546179831028, "step": 41 }, { "epoch": 0.01, "learning_rate": 1.5441176470588238e-06, "logits/chosen": -0.5318131446838379, "logits/rejected": -0.5779324173927307, "logps/chosen": -115.04417419433594, "logps/rejected": -113.36618041992188, "loss": 0.688, "rewards/accuracies": 0.0, "rewards/chosen": -0.0016113281017169356, "rewards/margins": -0.0037513733841478825, "rewards/rejected": 0.002140045166015625, "step": 42 }, { "epoch": 0.01, "learning_rate": 1.5808823529411765e-06, "logits/chosen": -0.6197996735572815, "logits/rejected": -0.6100823879241943, "logps/chosen": -83.37503051757812, "logps/rejected": -93.98330688476562, "loss": 0.6447, "rewards/accuracies": 1.0, "rewards/chosen": 0.016950225457549095, "rewards/margins": 0.01251525804400444, "rewards/rejected": 0.0044349669478833675, "step": 43 }, { "epoch": 0.01, "learning_rate": 1.6176470588235297e-06, "logits/chosen": -0.3019031882286072, "logits/rejected": -0.35986578464508057, "logps/chosen": -78.45884704589844, "logps/rejected": -90.53435516357422, "loss": 0.7135, "rewards/accuracies": 0.0, "rewards/chosen": -0.03986206278204918, "rewards/margins": -0.10629120469093323, "rewards/rejected": 0.06642913818359375, "step": 44 }, { "epoch": 0.01, "learning_rate": 1.6544117647058824e-06, "logits/chosen": -0.8422847390174866, "logits/rejected": -0.8801093697547913, "logps/chosen": -80.70468139648438, "logps/rejected": -65.0257339477539, "loss": 0.654, "rewards/accuracies": 1.0, "rewards/chosen": 0.027587890625, "rewards/margins": 0.03906860202550888, "rewards/rejected": -0.01148071326315403, "step": 45 }, { "epoch": 0.01, "learning_rate": 1.6911764705882356e-06, "logits/chosen": -0.756164014339447, "logits/rejected": -0.07513577491044998, "logps/chosen": -90.81619262695312, "logps/rejected": -142.5826416015625, "loss": 0.6423, "rewards/accuracies": 1.0, "rewards/chosen": 0.023906707763671875, "rewards/margins": 0.040572360157966614, "rewards/rejected": -0.01666565053164959, "step": 46 }, { "epoch": 0.01, "learning_rate": 1.7279411764705883e-06, "logits/chosen": -0.6579495072364807, "logits/rejected": -0.6381164789199829, "logps/chosen": -99.19829559326172, "logps/rejected": -127.9170913696289, "loss": 0.7187, "rewards/accuracies": 0.0, "rewards/chosen": 0.028717804700136185, "rewards/margins": -0.02863311767578125, "rewards/rejected": 0.057350922375917435, "step": 47 }, { "epoch": 0.01, "learning_rate": 1.7647058823529414e-06, "logits/chosen": -0.7217267751693726, "logits/rejected": -0.768308699131012, "logps/chosen": -106.59515380859375, "logps/rejected": -117.68304443359375, "loss": 0.6596, "rewards/accuracies": 1.0, "rewards/chosen": 0.08762359619140625, "rewards/margins": 0.11426086723804474, "rewards/rejected": -0.02663726918399334, "step": 48 }, { "epoch": 0.01, "learning_rate": 1.8014705882352942e-06, "logits/chosen": -0.45807480812072754, "logits/rejected": -0.4597833454608917, "logps/chosen": -299.3675537109375, "logps/rejected": -251.9890594482422, "loss": 0.6012, "rewards/accuracies": 1.0, "rewards/chosen": 0.171600341796875, "rewards/margins": 0.07955779880285263, "rewards/rejected": 0.09204254299402237, "step": 49 }, { "epoch": 0.01, "learning_rate": 1.8382352941176473e-06, "logits/chosen": -0.5038400888442993, "logits/rejected": -0.5490981340408325, "logps/chosen": -93.10464477539062, "logps/rejected": -94.42666625976562, "loss": 0.672, "rewards/accuracies": 1.0, "rewards/chosen": 0.0008224487537518144, "rewards/margins": 0.01234893873333931, "rewards/rejected": -0.01152648963034153, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.8750000000000003e-06, "logits/chosen": -0.35678234696388245, "logits/rejected": -0.3804377317428589, "logps/chosen": -173.13943481445312, "logps/rejected": -107.16101837158203, "loss": 0.6931, "rewards/accuracies": 1.0, "rewards/chosen": 0.16385804116725922, "rewards/margins": 0.10071411728858948, "rewards/rejected": 0.06314392387866974, "step": 51 }, { "epoch": 0.01, "learning_rate": 1.9117647058823528e-06, "logits/chosen": -0.3976994454860687, "logits/rejected": -0.3865515887737274, "logps/chosen": -85.75074005126953, "logps/rejected": -102.68356323242188, "loss": 0.7065, "rewards/accuracies": 1.0, "rewards/chosen": -0.005539703648537397, "rewards/margins": 0.026041410863399506, "rewards/rejected": -0.03158111497759819, "step": 52 }, { "epoch": 0.01, "learning_rate": 1.948529411764706e-06, "logits/chosen": -0.4810889661312103, "logits/rejected": -0.4810889661312103, "logps/chosen": -227.9818572998047, "logps/rejected": -227.9818572998047, "loss": 0.6719, "rewards/accuracies": 0.0, "rewards/chosen": -0.02416229248046875, "rewards/margins": 0.0, "rewards/rejected": -0.02416229248046875, "step": 53 }, { "epoch": 0.01, "learning_rate": 1.985294117647059e-06, "logits/chosen": -0.3854374885559082, "logits/rejected": -0.3649199903011322, "logps/chosen": -298.61529541015625, "logps/rejected": -269.5350036621094, "loss": 0.5649, "rewards/accuracies": 1.0, "rewards/chosen": 0.41227418184280396, "rewards/margins": 0.18150940537452698, "rewards/rejected": 0.23076477646827698, "step": 54 }, { "epoch": 0.01, "learning_rate": 2.022058823529412e-06, "logits/chosen": -0.7039018869400024, "logits/rejected": -0.698373019695282, "logps/chosen": -59.12899398803711, "logps/rejected": -106.76908111572266, "loss": 0.6898, "rewards/accuracies": 1.0, "rewards/chosen": 0.016756821423768997, "rewards/margins": 0.013508988544344902, "rewards/rejected": 0.0032478333450853825, "step": 55 }, { "epoch": 0.01, "learning_rate": 2.058823529411765e-06, "logits/chosen": -0.4816644489765167, "logits/rejected": -0.5330668687820435, "logps/chosen": -78.42379760742188, "logps/rejected": -88.32994079589844, "loss": 0.6707, "rewards/accuracies": 1.0, "rewards/chosen": 0.09040451049804688, "rewards/margins": 0.07700805366039276, "rewards/rejected": 0.01339645404368639, "step": 56 }, { "epoch": 0.01, "learning_rate": 2.095588235294118e-06, "logits/chosen": -0.5764182209968567, "logits/rejected": -0.6138105988502502, "logps/chosen": -188.88922119140625, "logps/rejected": -97.36262512207031, "loss": 0.5549, "rewards/accuracies": 1.0, "rewards/chosen": 0.1516372710466385, "rewards/margins": 0.2764747738838196, "rewards/rejected": -0.1248374953866005, "step": 57 }, { "epoch": 0.01, "learning_rate": 2.132352941176471e-06, "logits/chosen": -0.35108354687690735, "logits/rejected": -0.4100345969200134, "logps/chosen": -164.8845672607422, "logps/rejected": -87.75856018066406, "loss": 0.604, "rewards/accuracies": 1.0, "rewards/chosen": 0.14620819687843323, "rewards/margins": 0.16139984130859375, "rewards/rejected": -0.015191650949418545, "step": 58 }, { "epoch": 0.01, "learning_rate": 2.1691176470588238e-06, "logits/chosen": -0.20624466240406036, "logits/rejected": -0.22595465183258057, "logps/chosen": -96.31391143798828, "logps/rejected": -81.0405502319336, "loss": 0.68, "rewards/accuracies": 1.0, "rewards/chosen": 0.035117339342832565, "rewards/margins": 0.014604948461055756, "rewards/rejected": 0.02051239088177681, "step": 59 }, { "epoch": 0.01, "learning_rate": 2.2058823529411767e-06, "logits/chosen": -0.40925168991088867, "logits/rejected": -0.35999980568885803, "logps/chosen": -149.88148498535156, "logps/rejected": -204.82601928710938, "loss": 0.7208, "rewards/accuracies": 0.0, "rewards/chosen": 0.17076873779296875, "rewards/margins": -0.11704865097999573, "rewards/rejected": 0.2878173887729645, "step": 60 }, { "epoch": 0.01, "learning_rate": 2.2426470588235296e-06, "logits/chosen": -0.7423839569091797, "logits/rejected": -0.8158808350563049, "logps/chosen": -113.7049560546875, "logps/rejected": -56.3421630859375, "loss": 0.6529, "rewards/accuracies": 1.0, "rewards/chosen": 0.03591156005859375, "rewards/margins": 0.049478910863399506, "rewards/rejected": -0.013567352667450905, "step": 61 }, { "epoch": 0.01, "learning_rate": 2.2794117647058826e-06, "logits/chosen": -0.498331218957901, "logits/rejected": -0.5892601609230042, "logps/chosen": -199.5576171875, "logps/rejected": -105.00997924804688, "loss": 0.5567, "rewards/accuracies": 1.0, "rewards/chosen": 0.24790345132350922, "rewards/margins": 0.19861602783203125, "rewards/rejected": 0.04928741604089737, "step": 62 }, { "epoch": 0.01, "learning_rate": 2.3161764705882355e-06, "logits/chosen": -0.4030540883541107, "logits/rejected": -0.4030540883541107, "logps/chosen": -144.30511474609375, "logps/rejected": -144.30511474609375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.02227935753762722, "rewards/margins": 0.0, "rewards/rejected": 0.02227935753762722, "step": 63 }, { "epoch": 0.01, "learning_rate": 2.3529411764705885e-06, "logits/chosen": -0.4710099399089813, "logits/rejected": -0.4476467967033386, "logps/chosen": -112.05198669433594, "logps/rejected": -96.88166046142578, "loss": 0.5725, "rewards/accuracies": 1.0, "rewards/chosen": 0.09160614013671875, "rewards/margins": 0.10335922241210938, "rewards/rejected": -0.011753082275390625, "step": 64 }, { "epoch": 0.01, "learning_rate": 2.3897058823529414e-06, "logits/chosen": -0.41669878363609314, "logits/rejected": -0.4330277442932129, "logps/chosen": -94.6707534790039, "logps/rejected": -116.10039520263672, "loss": 0.6078, "rewards/accuracies": 0.0, "rewards/chosen": 0.041158296167850494, "rewards/margins": -0.00812835618853569, "rewards/rejected": 0.049286652356386185, "step": 65 }, { "epoch": 0.01, "learning_rate": 2.4264705882352943e-06, "logits/chosen": -0.2039545476436615, "logits/rejected": -0.21618491411209106, "logps/chosen": -91.73161315917969, "logps/rejected": -30.561063766479492, "loss": 0.6506, "rewards/accuracies": 1.0, "rewards/chosen": 0.07469101250171661, "rewards/margins": 0.0998113676905632, "rewards/rejected": -0.02512035332620144, "step": 66 }, { "epoch": 0.01, "learning_rate": 2.4632352941176473e-06, "logits/chosen": -0.8045746088027954, "logits/rejected": -0.8055669665336609, "logps/chosen": -126.58316040039062, "logps/rejected": -100.21990966796875, "loss": 0.5891, "rewards/accuracies": 1.0, "rewards/chosen": 0.14199066162109375, "rewards/margins": 0.08680801093578339, "rewards/rejected": 0.055182646960020065, "step": 67 }, { "epoch": 0.02, "learning_rate": 2.5e-06, "logits/chosen": -0.5169475078582764, "logits/rejected": -0.5345978736877441, "logps/chosen": -56.32733154296875, "logps/rejected": -13.213312149047852, "loss": 0.6058, "rewards/accuracies": 1.0, "rewards/chosen": 0.016204072162508965, "rewards/margins": 0.03762874752283096, "rewards/rejected": -0.021424675360322, "step": 68 }, { "epoch": 0.02, "learning_rate": 2.536764705882353e-06, "logits/chosen": -0.30095893144607544, "logits/rejected": -0.25333309173583984, "logps/chosen": -154.99465942382812, "logps/rejected": -215.8319549560547, "loss": 0.6214, "rewards/accuracies": 0.0, "rewards/chosen": 0.21257324516773224, "rewards/margins": -0.09905548393726349, "rewards/rejected": 0.3116287291049957, "step": 69 }, { "epoch": 0.02, "learning_rate": 2.5735294117647057e-06, "logits/chosen": -0.24417996406555176, "logits/rejected": -0.24417996406555176, "logps/chosen": -44.23639678955078, "logps/rejected": -44.23639678955078, "loss": 0.6047, "rewards/accuracies": 0.0, "rewards/chosen": -0.06427230685949326, "rewards/margins": 0.0, "rewards/rejected": -0.06427230685949326, "step": 70 }, { "epoch": 0.02, "learning_rate": 2.610294117647059e-06, "logits/chosen": -0.7732327580451965, "logits/rejected": -0.7732327580451965, "logps/chosen": -32.14384460449219, "logps/rejected": -32.14384460449219, "loss": 0.7587, "rewards/accuracies": 0.0, "rewards/chosen": -0.06797485798597336, "rewards/margins": 0.0, "rewards/rejected": -0.06797485798597336, "step": 71 }, { "epoch": 0.02, "learning_rate": 2.647058823529412e-06, "logits/chosen": -0.3025524318218231, "logits/rejected": -0.3210960626602173, "logps/chosen": -192.67092895507812, "logps/rejected": -151.53353881835938, "loss": 0.5796, "rewards/accuracies": 1.0, "rewards/chosen": 0.25249025225639343, "rewards/margins": 0.21437379717826843, "rewards/rejected": 0.038116455078125, "step": 72 }, { "epoch": 0.02, "learning_rate": 2.683823529411765e-06, "logits/chosen": -0.610137403011322, "logits/rejected": -0.6184305548667908, "logps/chosen": -69.60263061523438, "logps/rejected": -106.89649963378906, "loss": 0.5927, "rewards/accuracies": 1.0, "rewards/chosen": 0.04686889797449112, "rewards/margins": 0.2391510009765625, "rewards/rejected": -0.19228211045265198, "step": 73 }, { "epoch": 0.02, "learning_rate": 2.720588235294118e-06, "logits/chosen": -0.785830020904541, "logits/rejected": -0.8760372400283813, "logps/chosen": -154.41017150878906, "logps/rejected": -110.24497985839844, "loss": 0.4781, "rewards/accuracies": 1.0, "rewards/chosen": 0.32268983125686646, "rewards/margins": 0.4707695245742798, "rewards/rejected": -0.14807967841625214, "step": 74 }, { "epoch": 0.02, "learning_rate": 2.757352941176471e-06, "logits/chosen": -0.8245435953140259, "logits/rejected": -0.9619941711425781, "logps/chosen": -186.83877563476562, "logps/rejected": -41.35696029663086, "loss": 0.5801, "rewards/accuracies": 1.0, "rewards/chosen": 0.1923370361328125, "rewards/margins": 0.21722793579101562, "rewards/rejected": -0.024890899658203125, "step": 75 }, { "epoch": 0.02, "learning_rate": 2.7941176470588237e-06, "logits/chosen": -0.4464176297187805, "logits/rejected": -0.4464176297187805, "logps/chosen": -85.75053405761719, "logps/rejected": -85.75053405761719, "loss": 0.7135, "rewards/accuracies": 0.0, "rewards/chosen": -0.10865173488855362, "rewards/margins": 0.0, "rewards/rejected": -0.10865173488855362, "step": 76 }, { "epoch": 0.02, "learning_rate": 2.8308823529411766e-06, "logits/chosen": -0.3672924041748047, "logits/rejected": -0.41314980387687683, "logps/chosen": -72.95207214355469, "logps/rejected": -91.17633056640625, "loss": 0.6395, "rewards/accuracies": 1.0, "rewards/chosen": -0.11236800998449326, "rewards/margins": 0.028279878199100494, "rewards/rejected": -0.14064788818359375, "step": 77 }, { "epoch": 0.02, "learning_rate": 2.8676470588235296e-06, "logits/chosen": -0.29809504747390747, "logits/rejected": -0.3221423029899597, "logps/chosen": -72.46641540527344, "logps/rejected": -101.83423614501953, "loss": 0.6243, "rewards/accuracies": 1.0, "rewards/chosen": 0.06385193020105362, "rewards/margins": 0.09303513169288635, "rewards/rejected": -0.029183197766542435, "step": 78 }, { "epoch": 0.02, "learning_rate": 2.904411764705883e-06, "logits/chosen": -0.6528931856155396, "logits/rejected": -0.6435877084732056, "logps/chosen": -109.84074401855469, "logps/rejected": -79.70954132080078, "loss": 0.6765, "rewards/accuracies": 1.0, "rewards/chosen": -0.14172592759132385, "rewards/margins": 0.05780029296875, "rewards/rejected": -0.19952622056007385, "step": 79 }, { "epoch": 0.02, "learning_rate": 2.9411764705882355e-06, "logits/chosen": -0.3710818290710449, "logits/rejected": -0.339682936668396, "logps/chosen": -114.08587646484375, "logps/rejected": -145.5526123046875, "loss": 0.5247, "rewards/accuracies": 1.0, "rewards/chosen": 0.22502441704273224, "rewards/margins": 0.31839293241500854, "rewards/rejected": -0.0933685302734375, "step": 80 }, { "epoch": 0.02, "learning_rate": 2.9779411764705884e-06, "logits/chosen": -0.6678609848022461, "logits/rejected": -0.6238557696342468, "logps/chosen": -83.45916748046875, "logps/rejected": -179.80770874023438, "loss": 0.5624, "rewards/accuracies": 0.0, "rewards/chosen": 0.09553833305835724, "rewards/margins": -0.193736270070076, "rewards/rejected": 0.2892746031284332, "step": 81 }, { "epoch": 0.02, "learning_rate": 3.0147058823529413e-06, "logits/chosen": -0.33775123953819275, "logits/rejected": -0.33994850516319275, "logps/chosen": -148.88973999023438, "logps/rejected": -116.49528503417969, "loss": 0.4512, "rewards/accuracies": 1.0, "rewards/chosen": 0.4525085389614105, "rewards/margins": 0.5247436761856079, "rewards/rejected": -0.072235107421875, "step": 82 }, { "epoch": 0.02, "learning_rate": 3.0514705882352947e-06, "logits/chosen": -0.6556121706962585, "logits/rejected": -0.668944776058197, "logps/chosen": -127.29495239257812, "logps/rejected": -178.005126953125, "loss": 0.6892, "rewards/accuracies": 0.0, "rewards/chosen": 0.10438232868909836, "rewards/margins": -0.04305420070886612, "rewards/rejected": 0.14743652939796448, "step": 83 }, { "epoch": 0.02, "learning_rate": 3.0882352941176476e-06, "logits/chosen": -0.4490647315979004, "logits/rejected": -0.4490647315979004, "logps/chosen": -83.72900390625, "logps/rejected": -83.72900390625, "loss": 0.5247, "rewards/accuracies": 0.0, "rewards/chosen": -0.08099517971277237, "rewards/margins": 0.0, "rewards/rejected": -0.08099517971277237, "step": 84 }, { "epoch": 0.02, "learning_rate": 3.125e-06, "logits/chosen": -0.22212596237659454, "logits/rejected": -0.18644331395626068, "logps/chosen": -315.31170654296875, "logps/rejected": -96.28602600097656, "loss": 0.4786, "rewards/accuracies": 1.0, "rewards/chosen": 0.5189880728721619, "rewards/margins": 0.6366890072822571, "rewards/rejected": -0.117700956761837, "step": 85 }, { "epoch": 0.02, "learning_rate": 3.161764705882353e-06, "logits/chosen": -0.509087860584259, "logits/rejected": -0.5158196091651917, "logps/chosen": -126.11315155029297, "logps/rejected": -141.87164306640625, "loss": 0.5485, "rewards/accuracies": 1.0, "rewards/chosen": 0.031687166541814804, "rewards/margins": 0.22667618095874786, "rewards/rejected": -0.19498901069164276, "step": 86 }, { "epoch": 0.02, "learning_rate": 3.198529411764706e-06, "logits/chosen": -0.4638284742832184, "logits/rejected": -0.43079957365989685, "logps/chosen": -109.90370178222656, "logps/rejected": -79.18667602539062, "loss": 0.5773, "rewards/accuracies": 1.0, "rewards/chosen": -0.019899750128388405, "rewards/margins": 0.23803633451461792, "rewards/rejected": -0.2579360902309418, "step": 87 }, { "epoch": 0.02, "learning_rate": 3.2352941176470594e-06, "logits/chosen": -0.6068167686462402, "logits/rejected": -0.5972204208374023, "logps/chosen": -183.9478302001953, "logps/rejected": -134.04269409179688, "loss": 0.4366, "rewards/accuracies": 1.0, "rewards/chosen": 0.3751632869243622, "rewards/margins": 0.6361892819404602, "rewards/rejected": -0.261025995016098, "step": 88 }, { "epoch": 0.02, "learning_rate": 3.272058823529412e-06, "logits/chosen": -0.2647368311882019, "logits/rejected": -0.2939533293247223, "logps/chosen": -182.90658569335938, "logps/rejected": -133.8359375, "loss": 0.5068, "rewards/accuracies": 1.0, "rewards/chosen": 0.39635011553764343, "rewards/margins": 0.3227173089981079, "rewards/rejected": 0.07363281399011612, "step": 89 }, { "epoch": 0.02, "learning_rate": 3.308823529411765e-06, "logits/chosen": -0.6717187762260437, "logits/rejected": -0.5567804574966431, "logps/chosen": -143.98419189453125, "logps/rejected": -252.28680419921875, "loss": 0.7352, "rewards/accuracies": 0.0, "rewards/chosen": -0.02416687086224556, "rewards/margins": -0.6839508414268494, "rewards/rejected": 0.6597839593887329, "step": 90 }, { "epoch": 0.02, "learning_rate": 3.3455882352941178e-06, "logits/chosen": -0.529348611831665, "logits/rejected": -0.5525179505348206, "logps/chosen": -68.48983001708984, "logps/rejected": -70.07778930664062, "loss": 0.6767, "rewards/accuracies": 0.0, "rewards/chosen": 0.06827926635742188, "rewards/margins": -0.0203857421875, "rewards/rejected": 0.08866500854492188, "step": 91 }, { "epoch": 0.02, "learning_rate": 3.382352941176471e-06, "logits/chosen": -0.4396819770336151, "logits/rejected": -0.4165799915790558, "logps/chosen": -117.03112030029297, "logps/rejected": -129.37374877929688, "loss": 0.5411, "rewards/accuracies": 0.0, "rewards/chosen": 0.07278366386890411, "rewards/margins": -0.020572662353515625, "rewards/rejected": 0.09335632622241974, "step": 92 }, { "epoch": 0.02, "learning_rate": 3.419117647058824e-06, "logits/chosen": -0.573398768901825, "logits/rejected": -0.603301465511322, "logps/chosen": -59.92571258544922, "logps/rejected": -45.335052490234375, "loss": 0.6817, "rewards/accuracies": 1.0, "rewards/chosen": 0.012929153628647327, "rewards/margins": 0.059001922607421875, "rewards/rejected": -0.04607276991009712, "step": 93 }, { "epoch": 0.02, "learning_rate": 3.4558823529411766e-06, "logits/chosen": -0.9107815027236938, "logits/rejected": -0.899071216583252, "logps/chosen": -125.85453796386719, "logps/rejected": -125.05064392089844, "loss": 0.5056, "rewards/accuracies": 1.0, "rewards/chosen": -0.24891053140163422, "rewards/margins": 0.030020132660865784, "rewards/rejected": -0.2789306640625, "step": 94 }, { "epoch": 0.02, "learning_rate": 3.4926470588235295e-06, "logits/chosen": -0.34298092126846313, "logits/rejected": -0.46567097306251526, "logps/chosen": -160.01834106445312, "logps/rejected": -96.04492950439453, "loss": 0.5597, "rewards/accuracies": 1.0, "rewards/chosen": 0.5086212158203125, "rewards/margins": 0.4169349670410156, "rewards/rejected": 0.09168624877929688, "step": 95 }, { "epoch": 0.02, "learning_rate": 3.529411764705883e-06, "logits/chosen": -0.4671597480773926, "logits/rejected": -0.474499374628067, "logps/chosen": -187.0517578125, "logps/rejected": -219.66693115234375, "loss": 0.4068, "rewards/accuracies": 1.0, "rewards/chosen": 0.8055450320243835, "rewards/margins": 0.36970824003219604, "rewards/rejected": 0.4358367919921875, "step": 96 }, { "epoch": 0.02, "learning_rate": 3.566176470588236e-06, "logits/chosen": -0.4514496624469757, "logits/rejected": -0.4597274959087372, "logps/chosen": -117.81414031982422, "logps/rejected": -71.05074310302734, "loss": 0.5402, "rewards/accuracies": 1.0, "rewards/chosen": 0.23003005981445312, "rewards/margins": 0.1474357545375824, "rewards/rejected": 0.08259429782629013, "step": 97 }, { "epoch": 0.02, "learning_rate": 3.6029411764705883e-06, "logits/chosen": -0.590225875377655, "logits/rejected": -0.6662089824676514, "logps/chosen": -235.12216186523438, "logps/rejected": -183.19398498535156, "loss": 0.4769, "rewards/accuracies": 1.0, "rewards/chosen": 0.38836669921875, "rewards/margins": 0.18884123861789703, "rewards/rejected": 0.19952546060085297, "step": 98 }, { "epoch": 0.02, "learning_rate": 3.6397058823529413e-06, "logits/chosen": -0.745010256767273, "logits/rejected": -0.7532031536102295, "logps/chosen": -151.944091796875, "logps/rejected": -86.12252044677734, "loss": 0.4566, "rewards/accuracies": 1.0, "rewards/chosen": 0.425872802734375, "rewards/margins": 0.30676499009132385, "rewards/rejected": 0.11910782009363174, "step": 99 }, { "epoch": 0.02, "learning_rate": 3.6764705882352946e-06, "logits/chosen": -0.305853933095932, "logits/rejected": -0.30561351776123047, "logps/chosen": -137.13568115234375, "logps/rejected": -111.18033599853516, "loss": 0.5162, "rewards/accuracies": 1.0, "rewards/chosen": -0.2122451812028885, "rewards/margins": 0.05098344385623932, "rewards/rejected": -0.2632286250591278, "step": 100 }, { "epoch": 0.02, "learning_rate": 3.7132352941176476e-06, "logits/chosen": -0.5533117651939392, "logits/rejected": -0.5594286322593689, "logps/chosen": -107.89281463623047, "logps/rejected": -87.45777130126953, "loss": 0.454, "rewards/accuracies": 1.0, "rewards/chosen": 0.2671974301338196, "rewards/margins": 0.3960510492324829, "rewards/rejected": -0.12885360419750214, "step": 101 }, { "epoch": 0.02, "learning_rate": 3.7500000000000005e-06, "logits/chosen": -0.5365679860115051, "logits/rejected": 0.11688588559627533, "logps/chosen": -206.2764129638672, "logps/rejected": -152.6446533203125, "loss": 0.1925, "rewards/accuracies": 1.0, "rewards/chosen": 0.6409622430801392, "rewards/margins": 1.3850371837615967, "rewards/rejected": -0.7440750002861023, "step": 102 }, { "epoch": 0.02, "learning_rate": 3.786764705882353e-06, "logits/chosen": -0.3030529022216797, "logits/rejected": -0.2534787952899933, "logps/chosen": -87.59855651855469, "logps/rejected": -149.56146240234375, "loss": 0.4122, "rewards/accuracies": 1.0, "rewards/chosen": 0.18845978379249573, "rewards/margins": 0.74876868724823, "rewards/rejected": -0.5603088736534119, "step": 103 }, { "epoch": 0.02, "learning_rate": 3.8235294117647055e-06, "logits/chosen": -0.3998459577560425, "logits/rejected": -0.4482194483280182, "logps/chosen": -146.846435546875, "logps/rejected": -60.318267822265625, "loss": 0.341, "rewards/accuracies": 1.0, "rewards/chosen": 0.6289642453193665, "rewards/margins": 0.5984771847724915, "rewards/rejected": 0.030487060546875, "step": 104 }, { "epoch": 0.02, "learning_rate": 3.860294117647059e-06, "logits/chosen": -0.7100638151168823, "logits/rejected": -0.7231948375701904, "logps/chosen": -62.661651611328125, "logps/rejected": -114.52027893066406, "loss": 0.4306, "rewards/accuracies": 1.0, "rewards/chosen": -0.1834724396467209, "rewards/margins": 0.3704185485839844, "rewards/rejected": -0.5538910031318665, "step": 105 }, { "epoch": 0.02, "learning_rate": 3.897058823529412e-06, "logits/chosen": -0.444804310798645, "logits/rejected": -0.40184855461120605, "logps/chosen": -93.85975646972656, "logps/rejected": -227.29042053222656, "loss": 0.7165, "rewards/accuracies": 0.0, "rewards/chosen": 0.07262267917394638, "rewards/margins": -0.8899338245391846, "rewards/rejected": 0.9625564813613892, "step": 106 }, { "epoch": 0.02, "learning_rate": 3.933823529411765e-06, "logits/chosen": -0.31817418336868286, "logits/rejected": -0.2895990312099457, "logps/chosen": -211.88626098632812, "logps/rejected": -216.60189819335938, "loss": 0.4381, "rewards/accuracies": 1.0, "rewards/chosen": 1.0716370344161987, "rewards/margins": 0.13077396154403687, "rewards/rejected": 0.9408630728721619, "step": 107 }, { "epoch": 0.02, "learning_rate": 3.970588235294118e-06, "logits/chosen": -0.6843553185462952, "logits/rejected": -0.7048678398132324, "logps/chosen": -113.13320922851562, "logps/rejected": -147.15097045898438, "loss": 0.3452, "rewards/accuracies": 1.0, "rewards/chosen": 0.3786155879497528, "rewards/margins": 0.5747856497764587, "rewards/rejected": -0.19617004692554474, "step": 108 }, { "epoch": 0.02, "learning_rate": 4.007352941176471e-06, "logits/chosen": -0.49975746870040894, "logits/rejected": -0.527105450630188, "logps/chosen": -83.398681640625, "logps/rejected": -87.54402160644531, "loss": 0.5279, "rewards/accuracies": 1.0, "rewards/chosen": 0.1575614959001541, "rewards/margins": 0.10673065483570099, "rewards/rejected": 0.050830841064453125, "step": 109 }, { "epoch": 0.02, "learning_rate": 4.044117647058824e-06, "logits/chosen": -0.5353729128837585, "logits/rejected": -0.5618705749511719, "logps/chosen": -238.18447875976562, "logps/rejected": -68.89195251464844, "loss": 0.3851, "rewards/accuracies": 1.0, "rewards/chosen": 0.682049572467804, "rewards/margins": 0.6842331290245056, "rewards/rejected": -0.0021835328079760075, "step": 110 }, { "epoch": 0.02, "learning_rate": 4.080882352941177e-06, "logits/chosen": -0.3803827166557312, "logits/rejected": -0.4083646833896637, "logps/chosen": -82.35920715332031, "logps/rejected": -176.72314453125, "loss": 0.8892, "rewards/accuracies": 0.0, "rewards/chosen": -0.05724792554974556, "rewards/margins": -1.0837692022323608, "rewards/rejected": 1.0265213251113892, "step": 111 }, { "epoch": 0.02, "learning_rate": 4.11764705882353e-06, "logits/chosen": -0.7775117754936218, "logits/rejected": -0.6513740420341492, "logps/chosen": -121.5003662109375, "logps/rejected": -235.8935546875, "loss": 0.6748, "rewards/accuracies": 0.0, "rewards/chosen": -0.02243957482278347, "rewards/margins": -0.6201797723770142, "rewards/rejected": 0.5977401733398438, "step": 112 }, { "epoch": 0.03, "learning_rate": 4.154411764705883e-06, "logits/chosen": -0.1325114369392395, "logits/rejected": -0.11147481203079224, "logps/chosen": -81.55459594726562, "logps/rejected": -72.4926528930664, "loss": 0.422, "rewards/accuracies": 1.0, "rewards/chosen": 0.11834564059972763, "rewards/margins": 0.13697203993797302, "rewards/rejected": -0.01862640492618084, "step": 113 }, { "epoch": 0.03, "learning_rate": 4.191176470588236e-06, "logits/chosen": -0.20508414506912231, "logits/rejected": -0.23426103591918945, "logps/chosen": -101.0279541015625, "logps/rejected": -141.97581481933594, "loss": 0.4299, "rewards/accuracies": 1.0, "rewards/chosen": 0.1745964139699936, "rewards/margins": 0.39010849595069885, "rewards/rejected": -0.21551208198070526, "step": 114 }, { "epoch": 0.03, "learning_rate": 4.227941176470589e-06, "logits/chosen": -0.4007163643836975, "logits/rejected": -0.4137006998062134, "logps/chosen": -125.29844665527344, "logps/rejected": -104.00473022460938, "loss": 0.3425, "rewards/accuracies": 1.0, "rewards/chosen": 0.4069534242153168, "rewards/margins": 0.8847015500068665, "rewards/rejected": -0.4777481257915497, "step": 115 }, { "epoch": 0.03, "learning_rate": 4.264705882352942e-06, "logits/chosen": -0.4502127766609192, "logits/rejected": -0.48410552740097046, "logps/chosen": -74.35551452636719, "logps/rejected": -189.85671997070312, "loss": 0.3917, "rewards/accuracies": 1.0, "rewards/chosen": 0.15394440293312073, "rewards/margins": 0.011036679148674011, "rewards/rejected": 0.14290772378444672, "step": 116 }, { "epoch": 0.03, "learning_rate": 4.301470588235295e-06, "logits/chosen": -0.4727005958557129, "logits/rejected": -0.5244357585906982, "logps/chosen": -139.49029541015625, "logps/rejected": -213.09658813476562, "loss": 0.4343, "rewards/accuracies": 1.0, "rewards/chosen": 0.7342147827148438, "rewards/margins": 0.2789062559604645, "rewards/rejected": 0.4553085267543793, "step": 117 }, { "epoch": 0.03, "learning_rate": 4.3382352941176475e-06, "logits/chosen": -0.3844261169433594, "logits/rejected": -0.3827964961528778, "logps/chosen": -107.18197631835938, "logps/rejected": -147.58963012695312, "loss": 0.3143, "rewards/accuracies": 1.0, "rewards/chosen": 0.3447433412075043, "rewards/margins": 0.4900527894496918, "rewards/rejected": -0.1453094482421875, "step": 118 }, { "epoch": 0.03, "learning_rate": 4.3750000000000005e-06, "logits/chosen": -0.31514543294906616, "logits/rejected": 0.19829782843589783, "logps/chosen": -172.7956085205078, "logps/rejected": -127.77312469482422, "loss": 0.2713, "rewards/accuracies": 1.0, "rewards/chosen": 0.49353334307670593, "rewards/margins": 1.1400810480117798, "rewards/rejected": -0.6465477347373962, "step": 119 }, { "epoch": 0.03, "learning_rate": 4.411764705882353e-06, "logits/chosen": -0.3999476730823517, "logits/rejected": -0.413646936416626, "logps/chosen": -136.7646942138672, "logps/rejected": -46.852561950683594, "loss": 0.4452, "rewards/accuracies": 1.0, "rewards/chosen": -0.02268829382956028, "rewards/margins": 0.1268085390329361, "rewards/rejected": -0.14949683845043182, "step": 120 }, { "epoch": 0.03, "learning_rate": 4.448529411764706e-06, "logits/chosen": -0.5595365762710571, "logits/rejected": -0.5796701908111572, "logps/chosen": -176.0860595703125, "logps/rejected": -214.43991088867188, "loss": 0.5446, "rewards/accuracies": 0.0, "rewards/chosen": 0.788116455078125, "rewards/margins": -0.4503997564315796, "rewards/rejected": 1.2385162115097046, "step": 121 }, { "epoch": 0.03, "learning_rate": 4.485294117647059e-06, "logits/chosen": -0.19474399089813232, "logits/rejected": -0.1607821136713028, "logps/chosen": -79.65670013427734, "logps/rejected": -71.51028442382812, "loss": 0.6277, "rewards/accuracies": 1.0, "rewards/chosen": 0.09579239040613174, "rewards/margins": 0.1628318727016449, "rewards/rejected": -0.06703948974609375, "step": 122 }, { "epoch": 0.03, "learning_rate": 4.522058823529412e-06, "logits/chosen": -0.6723535656929016, "logits/rejected": 0.28279873728752136, "logps/chosen": -71.15763854980469, "logps/rejected": -156.24639892578125, "loss": 0.5998, "rewards/accuracies": 1.0, "rewards/chosen": -0.059996794909238815, "rewards/margins": 0.892865777015686, "rewards/rejected": -0.952862560749054, "step": 123 }, { "epoch": 0.03, "learning_rate": 4.558823529411765e-06, "logits/chosen": -0.5016076564788818, "logits/rejected": -0.5005203485488892, "logps/chosen": -90.05779266357422, "logps/rejected": -94.62655639648438, "loss": 0.3159, "rewards/accuracies": 1.0, "rewards/chosen": 0.3578025996685028, "rewards/margins": 0.736212968826294, "rewards/rejected": -0.37841033935546875, "step": 124 }, { "epoch": 0.03, "learning_rate": 4.595588235294118e-06, "logits/chosen": -0.6543691158294678, "logits/rejected": -0.6119896173477173, "logps/chosen": -115.4276123046875, "logps/rejected": -134.31353759765625, "loss": 0.3316, "rewards/accuracies": 1.0, "rewards/chosen": 0.6947723627090454, "rewards/margins": 1.8100464344024658, "rewards/rejected": -1.1152740716934204, "step": 125 }, { "epoch": 0.03, "learning_rate": 4.632352941176471e-06, "logits/chosen": -0.38231784105300903, "logits/rejected": -0.40912875533103943, "logps/chosen": -54.05651092529297, "logps/rejected": -98.07064819335938, "loss": 0.4772, "rewards/accuracies": 1.0, "rewards/chosen": -0.028418732807040215, "rewards/margins": 0.7415336966514587, "rewards/rejected": -0.7699524164199829, "step": 126 }, { "epoch": 0.03, "learning_rate": 4.669117647058824e-06, "logits/chosen": -0.209579735994339, "logits/rejected": -0.21306702494621277, "logps/chosen": -51.25556945800781, "logps/rejected": -14.024785995483398, "loss": 0.5605, "rewards/accuracies": 0.0, "rewards/chosen": -0.15274544060230255, "rewards/margins": -0.12964649498462677, "rewards/rejected": -0.02309894561767578, "step": 127 }, { "epoch": 0.03, "learning_rate": 4.705882352941177e-06, "logits/chosen": -0.05200060456991196, "logits/rejected": -0.06353829056024551, "logps/chosen": -74.42794036865234, "logps/rejected": -193.15536499023438, "loss": 0.716, "rewards/accuracies": 0.0, "rewards/chosen": 0.10969772189855576, "rewards/margins": -0.8457008600234985, "rewards/rejected": 0.9553985595703125, "step": 128 }, { "epoch": 0.03, "learning_rate": 4.74264705882353e-06, "logits/chosen": -0.6785653233528137, "logits/rejected": -0.769312858581543, "logps/chosen": -190.947021484375, "logps/rejected": -76.77362060546875, "loss": 0.2559, "rewards/accuracies": 1.0, "rewards/chosen": 0.7353378534317017, "rewards/margins": 1.1953903436660767, "rewards/rejected": -0.460052490234375, "step": 129 }, { "epoch": 0.03, "learning_rate": 4.779411764705883e-06, "logits/chosen": -0.24532227218151093, "logits/rejected": -0.20408010482788086, "logps/chosen": -87.873291015625, "logps/rejected": -62.317020416259766, "loss": 0.7214, "rewards/accuracies": 0.0, "rewards/chosen": -0.5853340029716492, "rewards/margins": -0.5937083959579468, "rewards/rejected": 0.008374405093491077, "step": 130 }, { "epoch": 0.03, "learning_rate": 4.816176470588236e-06, "logits/chosen": -0.5454286932945251, "logits/rejected": -0.5761781930923462, "logps/chosen": -120.54266357421875, "logps/rejected": -89.10572052001953, "loss": 0.3967, "rewards/accuracies": 1.0, "rewards/chosen": -0.2855720520019531, "rewards/margins": 0.17257919907569885, "rewards/rejected": -0.458151251077652, "step": 131 }, { "epoch": 0.03, "learning_rate": 4.852941176470589e-06, "logits/chosen": -0.3122742772102356, "logits/rejected": -0.027094129472970963, "logps/chosen": -212.59747314453125, "logps/rejected": -296.24896240234375, "loss": 0.3382, "rewards/accuracies": 1.0, "rewards/chosen": 0.5140060782432556, "rewards/margins": 0.07072299718856812, "rewards/rejected": 0.4432830810546875, "step": 132 }, { "epoch": 0.03, "learning_rate": 4.889705882352942e-06, "logits/chosen": -0.34041255712509155, "logits/rejected": -0.3324529826641083, "logps/chosen": -177.27755737304688, "logps/rejected": -225.02394104003906, "loss": 0.6381, "rewards/accuracies": 0.0, "rewards/chosen": 0.6815552115440369, "rewards/margins": -0.2969207763671875, "rewards/rejected": 0.9784759879112244, "step": 133 }, { "epoch": 0.03, "learning_rate": 4.9264705882352945e-06, "logits/chosen": -0.4335279166698456, "logits/rejected": -0.4309585988521576, "logps/chosen": -79.28226470947266, "logps/rejected": -139.18153381347656, "loss": 0.3541, "rewards/accuracies": 1.0, "rewards/chosen": 0.0028656006325036287, "rewards/margins": 0.18650969862937927, "rewards/rejected": -0.183644101023674, "step": 134 }, { "epoch": 0.03, "learning_rate": 4.9632352941176475e-06, "logits/chosen": -0.5272752642631531, "logits/rejected": -0.5248451828956604, "logps/chosen": -235.00210571289062, "logps/rejected": -130.02459716796875, "loss": 0.1321, "rewards/accuracies": 1.0, "rewards/chosen": 1.2603988647460938, "rewards/margins": 1.6660339832305908, "rewards/rejected": -0.4056350886821747, "step": 135 }, { "epoch": 0.03, "learning_rate": 5e-06, "logits/chosen": -0.6197192072868347, "logits/rejected": -0.6230770945549011, "logps/chosen": -91.56920623779297, "logps/rejected": -134.37188720703125, "loss": 0.4611, "rewards/accuracies": 1.0, "rewards/chosen": 0.6793319582939148, "rewards/margins": 0.02013087272644043, "rewards/rejected": 0.6592010855674744, "step": 136 }, { "epoch": 0.03, "learning_rate": 5.036764705882353e-06, "logits/chosen": -0.20177263021469116, "logits/rejected": -0.1982543170452118, "logps/chosen": -133.95071411132812, "logps/rejected": -85.10946655273438, "loss": 0.6357, "rewards/accuracies": 1.0, "rewards/chosen": 0.42483216524124146, "rewards/margins": 0.5409988760948181, "rewards/rejected": -0.11616668850183487, "step": 137 }, { "epoch": 0.03, "learning_rate": 5.073529411764706e-06, "logits/chosen": -0.5146775245666504, "logits/rejected": 0.023097148165106773, "logps/chosen": -84.92279052734375, "logps/rejected": -104.97755432128906, "loss": 0.4202, "rewards/accuracies": 1.0, "rewards/chosen": -0.15085144340991974, "rewards/margins": 1.0365020036697388, "rewards/rejected": -1.187353491783142, "step": 138 }, { "epoch": 0.03, "learning_rate": 5.110294117647059e-06, "logits/chosen": -0.2629229426383972, "logits/rejected": -0.2629229426383972, "logps/chosen": -25.506607055664062, "logps/rejected": -25.506607055664062, "loss": 0.9162, "rewards/accuracies": 0.0, "rewards/chosen": -0.21128903329372406, "rewards/margins": 0.0, "rewards/rejected": -0.21128903329372406, "step": 139 }, { "epoch": 0.03, "learning_rate": 5.147058823529411e-06, "logits/chosen": -0.4988695979118347, "logits/rejected": -0.5926430821418762, "logps/chosen": -221.26263427734375, "logps/rejected": -133.41500854492188, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/chosen": 1.9055191278457642, "rewards/margins": 3.084437608718872, "rewards/rejected": -1.178918480873108, "step": 140 }, { "epoch": 0.03, "learning_rate": 5.183823529411766e-06, "logits/chosen": -0.5308619141578674, "logits/rejected": -0.5175336003303528, "logps/chosen": -112.3912353515625, "logps/rejected": -103.40599822998047, "loss": 0.2298, "rewards/accuracies": 1.0, "rewards/chosen": 0.1955406218767166, "rewards/margins": 0.8082626461982727, "rewards/rejected": -0.6127220392227173, "step": 141 }, { "epoch": 0.03, "learning_rate": 5.220588235294118e-06, "logits/chosen": -0.31046026945114136, "logits/rejected": -0.32547035813331604, "logps/chosen": -105.98588562011719, "logps/rejected": -113.0163345336914, "loss": 0.1992, "rewards/accuracies": 1.0, "rewards/chosen": 0.2231132537126541, "rewards/margins": 0.8332580327987671, "rewards/rejected": -0.6101447939872742, "step": 142 }, { "epoch": 0.03, "learning_rate": 5.257352941176471e-06, "logits/chosen": -0.19056537747383118, "logits/rejected": -0.19047702848911285, "logps/chosen": -91.91300201416016, "logps/rejected": -118.30514526367188, "loss": 0.4802, "rewards/accuracies": 0.0, "rewards/chosen": -0.058261871337890625, "rewards/margins": -0.04360656812787056, "rewards/rejected": -0.01465530414134264, "step": 143 }, { "epoch": 0.03, "learning_rate": 5.294117647058824e-06, "logits/chosen": -0.27071356773376465, "logits/rejected": -0.2593061327934265, "logps/chosen": -95.23098754882812, "logps/rejected": -103.17041778564453, "loss": 0.3909, "rewards/accuracies": 1.0, "rewards/chosen": -0.11890335381031036, "rewards/margins": 0.2067466825246811, "rewards/rejected": -0.32565003633499146, "step": 144 }, { "epoch": 0.03, "learning_rate": 5.330882352941177e-06, "logits/chosen": -0.5829676985740662, "logits/rejected": -0.5416656732559204, "logps/chosen": -98.57406616210938, "logps/rejected": -141.4609375, "loss": 0.4006, "rewards/accuracies": 1.0, "rewards/chosen": 0.010202026925981045, "rewards/margins": 0.44564056396484375, "rewards/rejected": -0.4354385435581207, "step": 145 }, { "epoch": 0.03, "learning_rate": 5.36764705882353e-06, "logits/chosen": -0.7868320941925049, "logits/rejected": -0.7760535478591919, "logps/chosen": -106.95344543457031, "logps/rejected": -210.459228515625, "loss": 0.6241, "rewards/accuracies": 0.0, "rewards/chosen": 0.4348030090332031, "rewards/margins": -0.3527641296386719, "rewards/rejected": 0.787567138671875, "step": 146 }, { "epoch": 0.03, "learning_rate": 5.404411764705883e-06, "logits/chosen": -0.4313804507255554, "logits/rejected": -0.38531914353370667, "logps/chosen": -97.37757110595703, "logps/rejected": -207.84796142578125, "loss": 0.414, "rewards/accuracies": 0.0, "rewards/chosen": 0.14546890556812286, "rewards/margins": -0.06364364922046661, "rewards/rejected": 0.20911255478858948, "step": 147 }, { "epoch": 0.03, "learning_rate": 5.441176470588236e-06, "logits/chosen": -0.5680505037307739, "logits/rejected": -0.6380565166473389, "logps/chosen": -236.51251220703125, "logps/rejected": -130.18324279785156, "loss": 0.2372, "rewards/accuracies": 1.0, "rewards/chosen": 0.6772125363349915, "rewards/margins": 2.243898868560791, "rewards/rejected": -1.5666862726211548, "step": 148 }, { "epoch": 0.03, "learning_rate": 5.4779411764705894e-06, "logits/chosen": -0.8254972696304321, "logits/rejected": -0.895983874797821, "logps/chosen": -152.12551879882812, "logps/rejected": -124.9244384765625, "loss": 0.0761, "rewards/accuracies": 1.0, "rewards/chosen": 1.0719131231307983, "rewards/margins": 2.09716796875, "rewards/rejected": -1.0252548456192017, "step": 149 }, { "epoch": 0.03, "learning_rate": 5.514705882352942e-06, "logits/chosen": -0.45426928997039795, "logits/rejected": -0.47623196244239807, "logps/chosen": -260.8736572265625, "logps/rejected": -268.1824951171875, "loss": 0.1321, "rewards/accuracies": 1.0, "rewards/chosen": 1.7016419172286987, "rewards/margins": 1.4494751691818237, "rewards/rejected": 0.252166748046875, "step": 150 }, { "epoch": 0.03, "learning_rate": 5.5514705882352945e-06, "logits/chosen": -0.7337218523025513, "logits/rejected": -0.7307431697845459, "logps/chosen": -130.9072723388672, "logps/rejected": -126.93038940429688, "loss": 0.2877, "rewards/accuracies": 1.0, "rewards/chosen": 0.4600357115268707, "rewards/margins": 0.5543922185897827, "rewards/rejected": -0.09435653686523438, "step": 151 }, { "epoch": 0.03, "learning_rate": 5.588235294117647e-06, "logits/chosen": -0.33226338028907776, "logits/rejected": -0.3615563213825226, "logps/chosen": -236.9210205078125, "logps/rejected": -120.43761444091797, "loss": 0.0367, "rewards/accuracies": 1.0, "rewards/chosen": 2.4880828857421875, "rewards/margins": 2.649195909500122, "rewards/rejected": -0.161112979054451, "step": 152 }, { "epoch": 0.03, "learning_rate": 5.625e-06, "logits/chosen": -0.7068774700164795, "logits/rejected": -0.7717236876487732, "logps/chosen": -185.19308471679688, "logps/rejected": -35.8733024597168, "loss": 0.4863, "rewards/accuracies": 0.0, "rewards/chosen": -0.4811966121196747, "rewards/margins": -0.41106951236724854, "rewards/rejected": -0.07012710720300674, "step": 153 }, { "epoch": 0.03, "learning_rate": 5.661764705882353e-06, "logits/chosen": -0.5425491333007812, "logits/rejected": -0.5739151239395142, "logps/chosen": -102.52064514160156, "logps/rejected": -100.697265625, "loss": 0.4652, "rewards/accuracies": 0.0, "rewards/chosen": -0.5455002188682556, "rewards/margins": -0.2609451711177826, "rewards/rejected": -0.284555047750473, "step": 154 }, { "epoch": 0.03, "learning_rate": 5.698529411764706e-06, "logits/chosen": -0.6448688507080078, "logits/rejected": -0.6465761065483093, "logps/chosen": -277.7326354980469, "logps/rejected": -127.40608978271484, "loss": 0.1278, "rewards/accuracies": 1.0, "rewards/chosen": 1.572778344154358, "rewards/margins": 2.6779298782348633, "rewards/rejected": -1.1051514148712158, "step": 155 }, { "epoch": 0.03, "learning_rate": 5.735294117647059e-06, "logits/chosen": -0.6743716597557068, "logits/rejected": -0.6689947247505188, "logps/chosen": -85.70053100585938, "logps/rejected": -118.29496002197266, "loss": 0.4607, "rewards/accuracies": 0.0, "rewards/chosen": -0.003034973284229636, "rewards/margins": -0.04636077955365181, "rewards/rejected": 0.04332580789923668, "step": 156 }, { "epoch": 0.03, "learning_rate": 5.772058823529412e-06, "logits/chosen": -0.6883677840232849, "logits/rejected": -0.7044680714607239, "logps/chosen": -86.97732543945312, "logps/rejected": -107.16864776611328, "loss": 0.5698, "rewards/accuracies": 1.0, "rewards/chosen": -0.021569062024354935, "rewards/margins": 1.1922317743301392, "rewards/rejected": -1.2138007879257202, "step": 157 }, { "epoch": 0.03, "learning_rate": 5.808823529411766e-06, "logits/chosen": -0.5351256728172302, "logits/rejected": -0.515920102596283, "logps/chosen": -215.88055419921875, "logps/rejected": -206.208984375, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 1.5468109846115112, "rewards/margins": 2.49521803855896, "rewards/rejected": -0.948406994342804, "step": 158 }, { "epoch": 0.04, "learning_rate": 5.845588235294119e-06, "logits/chosen": -0.5894219279289246, "logits/rejected": -0.5426807999610901, "logps/chosen": -151.3828125, "logps/rejected": -187.3560028076172, "loss": 0.4331, "rewards/accuracies": 1.0, "rewards/chosen": 0.7923583984375, "rewards/margins": 0.06809234619140625, "rewards/rejected": 0.7242660522460938, "step": 159 }, { "epoch": 0.04, "learning_rate": 5.882352941176471e-06, "logits/chosen": -0.5583988428115845, "logits/rejected": -0.5495744943618774, "logps/chosen": -180.41024780273438, "logps/rejected": -228.6873016357422, "loss": 0.1005, "rewards/accuracies": 1.0, "rewards/chosen": 2.475978136062622, "rewards/margins": 3.858987331390381, "rewards/rejected": -1.3830093145370483, "step": 160 }, { "epoch": 0.04, "learning_rate": 5.919117647058824e-06, "logits/chosen": -0.6581497192382812, "logits/rejected": -0.6653083562850952, "logps/chosen": -78.29176330566406, "logps/rejected": -85.44549560546875, "loss": 1.0611, "rewards/accuracies": 0.0, "rewards/chosen": 0.12101135402917862, "rewards/margins": -0.5707198977470398, "rewards/rejected": 0.6917312741279602, "step": 161 }, { "epoch": 0.04, "learning_rate": 5.955882352941177e-06, "logits/chosen": -0.512898862361908, "logits/rejected": -0.47249242663383484, "logps/chosen": -164.06234741210938, "logps/rejected": -129.99276733398438, "loss": 0.1598, "rewards/accuracies": 1.0, "rewards/chosen": 0.9236847162246704, "rewards/margins": 1.8053299188613892, "rewards/rejected": -0.8816452026367188, "step": 162 }, { "epoch": 0.04, "learning_rate": 5.99264705882353e-06, "logits/chosen": -0.3494291305541992, "logits/rejected": -0.3494291305541992, "logps/chosen": -55.75497817993164, "logps/rejected": -55.75497817993164, "loss": 0.5334, "rewards/accuracies": 0.0, "rewards/chosen": -0.21338997781276703, "rewards/margins": 0.0, "rewards/rejected": -0.21338997781276703, "step": 163 }, { "epoch": 0.04, "learning_rate": 6.029411764705883e-06, "logits/chosen": -0.30453115701675415, "logits/rejected": -0.2818511128425598, "logps/chosen": -75.44458770751953, "logps/rejected": -97.2886962890625, "loss": 0.1063, "rewards/accuracies": 1.0, "rewards/chosen": 0.4127647578716278, "rewards/margins": 1.6055359840393066, "rewards/rejected": -1.1927711963653564, "step": 164 }, { "epoch": 0.04, "learning_rate": 6.066176470588236e-06, "logits/chosen": -0.1786222606897354, "logits/rejected": -0.1786222606897354, "logps/chosen": -54.06779861450195, "logps/rejected": -54.06779861450195, "loss": 0.4375, "rewards/accuracies": 0.0, "rewards/chosen": -0.18275336921215057, "rewards/margins": 0.0, "rewards/rejected": -0.18275336921215057, "step": 165 }, { "epoch": 0.04, "learning_rate": 6.102941176470589e-06, "logits/chosen": -0.6430598497390747, "logits/rejected": -0.6430598497390747, "logps/chosen": -48.57538604736328, "logps/rejected": -48.57538604736328, "loss": 0.5914, "rewards/accuracies": 0.0, "rewards/chosen": 0.017103195190429688, "rewards/margins": 0.0, "rewards/rejected": 0.017103195190429688, "step": 166 }, { "epoch": 0.04, "learning_rate": 6.139705882352942e-06, "logits/chosen": -0.4757423996925354, "logits/rejected": -0.5366734266281128, "logps/chosen": -267.5835266113281, "logps/rejected": -144.74365234375, "loss": 0.429, "rewards/accuracies": 1.0, "rewards/chosen": 1.7733306884765625, "rewards/margins": 1.7195556163787842, "rewards/rejected": 0.05377502366900444, "step": 167 }, { "epoch": 0.04, "learning_rate": 6.176470588235295e-06, "logits/chosen": -0.6897697448730469, "logits/rejected": -0.6189948916435242, "logps/chosen": -86.39494323730469, "logps/rejected": -215.42958068847656, "loss": 0.8853, "rewards/accuracies": 0.0, "rewards/chosen": 0.38848191499710083, "rewards/margins": -1.1290977001190186, "rewards/rejected": 1.5175796747207642, "step": 168 }, { "epoch": 0.04, "learning_rate": 6.213235294117647e-06, "logits/chosen": -0.5856705904006958, "logits/rejected": -0.5195869207382202, "logps/chosen": -223.70968627929688, "logps/rejected": -147.66741943359375, "loss": 0.0827, "rewards/accuracies": 1.0, "rewards/chosen": 2.201007127761841, "rewards/margins": 3.577645778656006, "rewards/rejected": -1.3766387701034546, "step": 169 }, { "epoch": 0.04, "learning_rate": 6.25e-06, "logits/chosen": -0.6947145462036133, "logits/rejected": -0.6947145462036133, "logps/chosen": -169.51004028320312, "logps/rejected": -169.51004028320312, "loss": 0.4425, "rewards/accuracies": 0.0, "rewards/chosen": -1.9301728010177612, "rewards/margins": 0.0, "rewards/rejected": -1.9301728010177612, "step": 170 }, { "epoch": 0.04, "learning_rate": 6.286764705882353e-06, "logits/chosen": -0.33186233043670654, "logits/rejected": -0.3016554117202759, "logps/chosen": -183.34811401367188, "logps/rejected": -178.6739044189453, "loss": 0.271, "rewards/accuracies": 1.0, "rewards/chosen": 1.5933197736740112, "rewards/margins": 3.9020156860351562, "rewards/rejected": -2.3086960315704346, "step": 171 }, { "epoch": 0.04, "learning_rate": 6.323529411764706e-06, "logits/chosen": -0.012054610066115856, "logits/rejected": -0.0050764442421495914, "logps/chosen": -65.97845458984375, "logps/rejected": -73.85562133789062, "loss": 0.4978, "rewards/accuracies": 1.0, "rewards/chosen": -0.40101319551467896, "rewards/margins": 0.01244354248046875, "rewards/rejected": -0.4134567379951477, "step": 172 }, { "epoch": 0.04, "learning_rate": 6.360294117647059e-06, "logits/chosen": -0.47361987829208374, "logits/rejected": -0.47361987829208374, "logps/chosen": -19.09791374206543, "logps/rejected": -19.09791374206543, "loss": 1.0695, "rewards/accuracies": 0.0, "rewards/chosen": -0.17888985574245453, "rewards/margins": 0.0, "rewards/rejected": -0.17888985574245453, "step": 173 }, { "epoch": 0.04, "learning_rate": 6.397058823529412e-06, "logits/chosen": -0.47104451060295105, "logits/rejected": -0.44528740644454956, "logps/chosen": -95.62456512451172, "logps/rejected": -66.19378662109375, "loss": 0.233, "rewards/accuracies": 1.0, "rewards/chosen": 0.10347366333007812, "rewards/margins": 0.5385391712188721, "rewards/rejected": -0.43506547808647156, "step": 174 }, { "epoch": 0.04, "learning_rate": 6.433823529411766e-06, "logits/chosen": -0.292463093996048, "logits/rejected": -0.292463093996048, "logps/chosen": -108.6245346069336, "logps/rejected": -108.6245346069336, "loss": 0.7293, "rewards/accuracies": 0.0, "rewards/chosen": -0.15174178779125214, "rewards/margins": 0.0, "rewards/rejected": -0.15174178779125214, "step": 175 }, { "epoch": 0.04, "learning_rate": 6.470588235294119e-06, "logits/chosen": -0.7468643188476562, "logits/rejected": -0.7312136888504028, "logps/chosen": -96.08912658691406, "logps/rejected": -94.50508117675781, "loss": 0.3304, "rewards/accuracies": 1.0, "rewards/chosen": 0.3334236145019531, "rewards/margins": 0.16852416098117828, "rewards/rejected": 0.16489945352077484, "step": 176 }, { "epoch": 0.04, "learning_rate": 6.507352941176472e-06, "logits/chosen": -0.2585594356060028, "logits/rejected": -0.25237858295440674, "logps/chosen": -89.35487365722656, "logps/rejected": -91.96610260009766, "loss": 0.429, "rewards/accuracies": 1.0, "rewards/chosen": -0.12730561196804047, "rewards/margins": 0.3483161926269531, "rewards/rejected": -0.4756217896938324, "step": 177 }, { "epoch": 0.04, "learning_rate": 6.544117647058824e-06, "logits/chosen": -0.6986017823219299, "logits/rejected": -0.7539211511611938, "logps/chosen": -116.70980834960938, "logps/rejected": -62.11834716796875, "loss": 0.341, "rewards/accuracies": 1.0, "rewards/chosen": 0.09188232570886612, "rewards/margins": 0.2476421296596527, "rewards/rejected": -0.1557598114013672, "step": 178 }, { "epoch": 0.04, "learning_rate": 6.580882352941177e-06, "logits/chosen": -0.6803989410400391, "logits/rejected": -0.6866398453712463, "logps/chosen": -61.207069396972656, "logps/rejected": -61.843082427978516, "loss": 0.3101, "rewards/accuracies": 1.0, "rewards/chosen": 0.17596435546875, "rewards/margins": 0.24908828735351562, "rewards/rejected": -0.07312393188476562, "step": 179 }, { "epoch": 0.04, "learning_rate": 6.61764705882353e-06, "logits/chosen": -0.5265435576438904, "logits/rejected": -0.4531315565109253, "logps/chosen": -96.47055053710938, "logps/rejected": -102.76316833496094, "loss": 0.4419, "rewards/accuracies": 0.0, "rewards/chosen": 0.15617676079273224, "rewards/margins": -0.13100279867649078, "rewards/rejected": 0.287179559469223, "step": 180 }, { "epoch": 0.04, "learning_rate": 6.654411764705883e-06, "logits/chosen": -0.45993101596832275, "logits/rejected": -0.45368388295173645, "logps/chosen": -85.15774536132812, "logps/rejected": -95.48772430419922, "loss": 0.1015, "rewards/accuracies": 1.0, "rewards/chosen": 0.6037582755088806, "rewards/margins": 1.7364082336425781, "rewards/rejected": -1.1326500177383423, "step": 181 }, { "epoch": 0.04, "learning_rate": 6.6911764705882356e-06, "logits/chosen": -0.5786221027374268, "logits/rejected": -0.5201244354248047, "logps/chosen": -152.37356567382812, "logps/rejected": -128.564208984375, "loss": 0.2229, "rewards/accuracies": 1.0, "rewards/chosen": 0.6794463992118835, "rewards/margins": 1.2117111682891846, "rewards/rejected": -0.5322647094726562, "step": 182 }, { "epoch": 0.04, "learning_rate": 6.727941176470589e-06, "logits/chosen": -0.5137429237365723, "logits/rejected": -0.49674323201179504, "logps/chosen": -106.73463439941406, "logps/rejected": -122.624755859375, "loss": 0.1773, "rewards/accuracies": 1.0, "rewards/chosen": 0.06525421142578125, "rewards/margins": 0.8582855463027954, "rewards/rejected": -0.7930313348770142, "step": 183 }, { "epoch": 0.04, "learning_rate": 6.764705882352942e-06, "logits/chosen": -0.4081304967403412, "logits/rejected": -0.38837310671806335, "logps/chosen": -156.388427734375, "logps/rejected": -237.8792266845703, "loss": 0.0737, "rewards/accuracies": 1.0, "rewards/chosen": 1.2275818586349487, "rewards/margins": 2.05853271484375, "rewards/rejected": -0.830950915813446, "step": 184 }, { "epoch": 0.04, "learning_rate": 6.801470588235295e-06, "logits/chosen": -0.6778456568717957, "logits/rejected": -0.7063929438591003, "logps/chosen": -172.5523223876953, "logps/rejected": -99.86625671386719, "loss": 0.1081, "rewards/accuracies": 1.0, "rewards/chosen": 1.0472183227539062, "rewards/margins": 1.7332031726837158, "rewards/rejected": -0.6859847903251648, "step": 185 }, { "epoch": 0.04, "learning_rate": 6.838235294117648e-06, "logits/chosen": -0.3692299425601959, "logits/rejected": -0.3692299425601959, "logps/chosen": -78.677490234375, "logps/rejected": -78.677490234375, "loss": 0.5416, "rewards/accuracies": 0.0, "rewards/chosen": -0.478555291891098, "rewards/margins": 0.0, "rewards/rejected": -0.478555291891098, "step": 186 }, { "epoch": 0.04, "learning_rate": 6.875e-06, "logits/chosen": -0.7819388508796692, "logits/rejected": -0.7620863914489746, "logps/chosen": -158.8505859375, "logps/rejected": -197.7701873779297, "loss": 0.3345, "rewards/accuracies": 1.0, "rewards/chosen": 0.9823074340820312, "rewards/margins": 0.5558319091796875, "rewards/rejected": 0.42647552490234375, "step": 187 }, { "epoch": 0.04, "learning_rate": 6.911764705882353e-06, "logits/chosen": -0.4402504563331604, "logits/rejected": -0.4498251974582672, "logps/chosen": -125.31782531738281, "logps/rejected": -65.7630844116211, "loss": 0.2053, "rewards/accuracies": 1.0, "rewards/chosen": 0.32674866914749146, "rewards/margins": 0.990713894367218, "rewards/rejected": -0.6639652252197266, "step": 188 }, { "epoch": 0.04, "learning_rate": 6.948529411764706e-06, "logits/chosen": -0.44656670093536377, "logits/rejected": -0.3599397540092468, "logps/chosen": -175.9863739013672, "logps/rejected": -143.54246520996094, "loss": 0.1195, "rewards/accuracies": 1.0, "rewards/chosen": 1.3147262334823608, "rewards/margins": 1.5776641368865967, "rewards/rejected": -0.2629379332065582, "step": 189 }, { "epoch": 0.04, "learning_rate": 6.985294117647059e-06, "logits/chosen": -0.4249301552772522, "logits/rejected": -0.45235398411750793, "logps/chosen": -68.91390991210938, "logps/rejected": -22.485822677612305, "loss": 0.5233, "rewards/accuracies": 0.0, "rewards/chosen": -0.5675838589668274, "rewards/margins": -0.4699403941631317, "rewards/rejected": -0.09764347225427628, "step": 190 }, { "epoch": 0.04, "learning_rate": 7.022058823529412e-06, "logits/chosen": -0.4725024998188019, "logits/rejected": -0.412349671125412, "logps/chosen": -195.66639709472656, "logps/rejected": -168.96041870117188, "loss": 0.0869, "rewards/accuracies": 1.0, "rewards/chosen": 1.6432968378067017, "rewards/margins": 1.7711166143417358, "rewards/rejected": -0.12781982123851776, "step": 191 }, { "epoch": 0.04, "learning_rate": 7.058823529411766e-06, "logits/chosen": -0.7357568740844727, "logits/rejected": -0.7470667362213135, "logps/chosen": -200.72410583496094, "logps/rejected": -147.7838134765625, "loss": 0.2416, "rewards/accuracies": 1.0, "rewards/chosen": 0.8422134518623352, "rewards/margins": 1.1641952991485596, "rewards/rejected": -0.321981817483902, "step": 192 }, { "epoch": 0.04, "learning_rate": 7.095588235294119e-06, "logits/chosen": -0.45549464225769043, "logits/rejected": -0.040166061371564865, "logps/chosen": -234.58914184570312, "logps/rejected": -244.909423828125, "loss": 0.1326, "rewards/accuracies": 1.0, "rewards/chosen": 1.282934546470642, "rewards/margins": 2.9942901134490967, "rewards/rejected": -1.7113555669784546, "step": 193 }, { "epoch": 0.04, "learning_rate": 7.132352941176472e-06, "logits/chosen": -0.5365193486213684, "logits/rejected": -0.5059455037117004, "logps/chosen": -83.27268981933594, "logps/rejected": -115.60865783691406, "loss": 0.5617, "rewards/accuracies": 1.0, "rewards/chosen": -0.549224853515625, "rewards/margins": 0.07811278104782104, "rewards/rejected": -0.627337634563446, "step": 194 }, { "epoch": 0.04, "learning_rate": 7.169117647058825e-06, "logits/chosen": -0.31402987241744995, "logits/rejected": -0.2746162712574005, "logps/chosen": -154.76754760742188, "logps/rejected": -209.53628540039062, "loss": 0.3139, "rewards/accuracies": 1.0, "rewards/chosen": 0.968890368938446, "rewards/margins": 0.2692840099334717, "rewards/rejected": 0.6996063590049744, "step": 195 }, { "epoch": 0.04, "learning_rate": 7.205882352941177e-06, "logits/chosen": -0.41366374492645264, "logits/rejected": 0.04860313981771469, "logps/chosen": -72.83004760742188, "logps/rejected": -169.1611328125, "loss": 0.2679, "rewards/accuracies": 1.0, "rewards/chosen": -0.8436943292617798, "rewards/margins": 0.7716026306152344, "rewards/rejected": -1.6152969598770142, "step": 196 }, { "epoch": 0.04, "learning_rate": 7.24264705882353e-06, "logits/chosen": -0.7460058927536011, "logits/rejected": -0.7460058927536011, "logps/chosen": -68.43229675292969, "logps/rejected": -68.43229675292969, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -0.5458602905273438, "rewards/margins": 0.0, "rewards/rejected": -0.5458602905273438, "step": 197 }, { "epoch": 0.04, "learning_rate": 7.2794117647058826e-06, "logits/chosen": -0.39927905797958374, "logits/rejected": -0.3461005687713623, "logps/chosen": -128.4522705078125, "logps/rejected": -127.8940200805664, "loss": 0.049, "rewards/accuracies": 1.0, "rewards/chosen": 0.7929626703262329, "rewards/margins": 2.8077454566955566, "rewards/rejected": -2.014782667160034, "step": 198 }, { "epoch": 0.04, "learning_rate": 7.3161764705882355e-06, "logits/chosen": -0.5098463892936707, "logits/rejected": -0.38880655169487, "logps/chosen": -142.7772674560547, "logps/rejected": -226.24078369140625, "loss": 0.0471, "rewards/accuracies": 1.0, "rewards/chosen": 1.5371261835098267, "rewards/margins": 3.126939296722412, "rewards/rejected": -1.589813232421875, "step": 199 }, { "epoch": 0.04, "learning_rate": 7.352941176470589e-06, "logits/chosen": -0.30108097195625305, "logits/rejected": -0.22250612080097198, "logps/chosen": -69.86894226074219, "logps/rejected": -123.28408813476562, "loss": 0.3571, "rewards/accuracies": 1.0, "rewards/chosen": -0.01703796349465847, "rewards/margins": 2.024017572402954, "rewards/rejected": -2.04105544090271, "step": 200 }, { "epoch": 0.04, "learning_rate": 7.389705882352942e-06, "logits/chosen": -0.5776257514953613, "logits/rejected": -0.5852473378181458, "logps/chosen": -90.52545166015625, "logps/rejected": -176.46365356445312, "loss": 0.114, "rewards/accuracies": 1.0, "rewards/chosen": 0.218647763133049, "rewards/margins": 1.4033737182617188, "rewards/rejected": -1.1847259998321533, "step": 201 }, { "epoch": 0.04, "learning_rate": 7.426470588235295e-06, "logits/chosen": -0.35164543986320496, "logits/rejected": -0.24262981116771698, "logps/chosen": -57.74798583984375, "logps/rejected": -150.8014678955078, "loss": 0.4327, "rewards/accuracies": 0.0, "rewards/chosen": 0.03524971008300781, "rewards/margins": -0.27164575457572937, "rewards/rejected": 0.3068954646587372, "step": 202 }, { "epoch": 0.04, "learning_rate": 7.463235294117648e-06, "logits/chosen": -0.6218171715736389, "logits/rejected": -0.6080642938613892, "logps/chosen": -114.82991027832031, "logps/rejected": -88.64637756347656, "loss": 0.1818, "rewards/accuracies": 1.0, "rewards/chosen": -0.0129852294921875, "rewards/margins": 0.8316566348075867, "rewards/rejected": -0.8446418642997742, "step": 203 }, { "epoch": 0.05, "learning_rate": 7.500000000000001e-06, "logits/chosen": -0.487122118473053, "logits/rejected": -0.6139196157455444, "logps/chosen": -166.09292602539062, "logps/rejected": -30.10934829711914, "loss": 0.1674, "rewards/accuracies": 1.0, "rewards/chosen": 0.7616668939590454, "rewards/margins": 1.0458998680114746, "rewards/rejected": -0.2842329144477844, "step": 204 }, { "epoch": 0.05, "learning_rate": 7.536764705882353e-06, "logits/chosen": -0.6391294002532959, "logits/rejected": -0.739324688911438, "logps/chosen": -204.131591796875, "logps/rejected": -163.46878051757812, "loss": 0.117, "rewards/accuracies": 1.0, "rewards/chosen": 0.7652221918106079, "rewards/margins": 1.3631272315979004, "rewards/rejected": -0.5979049801826477, "step": 205 }, { "epoch": 0.05, "learning_rate": 7.573529411764706e-06, "logits/chosen": -0.47593289613723755, "logits/rejected": 0.38099169731140137, "logps/chosen": -79.97743225097656, "logps/rejected": -121.43965148925781, "loss": 0.1716, "rewards/accuracies": 1.0, "rewards/chosen": -0.0763702392578125, "rewards/margins": 2.6892502307891846, "rewards/rejected": -2.765620470046997, "step": 206 }, { "epoch": 0.05, "learning_rate": 7.610294117647059e-06, "logits/chosen": -0.7070267200469971, "logits/rejected": -0.7070267200469971, "logps/chosen": -131.1415557861328, "logps/rejected": -131.1415557861328, "loss": 0.3622, "rewards/accuracies": 0.0, "rewards/chosen": -2.6338632106781006, "rewards/margins": 0.0, "rewards/rejected": -2.6338632106781006, "step": 207 }, { "epoch": 0.05, "learning_rate": 7.647058823529411e-06, "logits/chosen": -0.4645257294178009, "logits/rejected": -0.4335448443889618, "logps/chosen": -42.851776123046875, "logps/rejected": -111.05357360839844, "loss": 0.4152, "rewards/accuracies": 1.0, "rewards/chosen": -0.21033859252929688, "rewards/margins": 0.5533958673477173, "rewards/rejected": -0.7637344598770142, "step": 208 }, { "epoch": 0.05, "learning_rate": 7.683823529411766e-06, "logits/chosen": -0.6741001605987549, "logits/rejected": -0.06597761064767838, "logps/chosen": -76.5670394897461, "logps/rejected": -220.08010864257812, "loss": 0.4535, "rewards/accuracies": 1.0, "rewards/chosen": 0.2014930695295334, "rewards/margins": 6.054030895233154, "rewards/rejected": -5.852537631988525, "step": 209 }, { "epoch": 0.05, "learning_rate": 7.720588235294119e-06, "logits/chosen": -0.6688428521156311, "logits/rejected": -0.6714372038841248, "logps/chosen": -110.97991943359375, "logps/rejected": -131.36868286132812, "loss": 0.2293, "rewards/accuracies": 1.0, "rewards/chosen": 0.08145370334386826, "rewards/margins": 0.7205924987792969, "rewards/rejected": -0.6391388177871704, "step": 210 }, { "epoch": 0.05, "learning_rate": 7.757352941176472e-06, "logits/chosen": -0.20772449672222137, "logits/rejected": -0.21675963699817657, "logps/chosen": -74.84254455566406, "logps/rejected": -83.56388854980469, "loss": 0.1853, "rewards/accuracies": 1.0, "rewards/chosen": 0.02727508544921875, "rewards/margins": 1.31010901927948, "rewards/rejected": -1.2828339338302612, "step": 211 }, { "epoch": 0.05, "learning_rate": 7.794117647058825e-06, "logits/chosen": -0.32549893856048584, "logits/rejected": -0.33433103561401367, "logps/chosen": -86.0120620727539, "logps/rejected": -101.85514068603516, "loss": 0.3014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6189247369766235, "rewards/margins": 1.0587799549102783, "rewards/rejected": -1.6777046918869019, "step": 212 }, { "epoch": 0.05, "learning_rate": 7.830882352941177e-06, "logits/chosen": -0.35189786553382874, "logits/rejected": -0.3066087067127228, "logps/chosen": -174.55740356445312, "logps/rejected": -179.71812438964844, "loss": 0.2091, "rewards/accuracies": 1.0, "rewards/chosen": 0.5666168332099915, "rewards/margins": 3.670724391937256, "rewards/rejected": -3.104107618331909, "step": 213 }, { "epoch": 0.05, "learning_rate": 7.86764705882353e-06, "logits/chosen": -0.872410237789154, "logits/rejected": -0.8551499843597412, "logps/chosen": -108.94509887695312, "logps/rejected": -109.98414611816406, "loss": 0.2889, "rewards/accuracies": 1.0, "rewards/chosen": 0.2218681424856186, "rewards/margins": 0.4403846859931946, "rewards/rejected": -0.218516543507576, "step": 214 }, { "epoch": 0.05, "learning_rate": 7.904411764705883e-06, "logits/chosen": -0.2896658480167389, "logits/rejected": -0.2896658480167389, "logps/chosen": -57.99334716796875, "logps/rejected": -57.99334716796875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -0.1972217559814453, "rewards/margins": 0.0, "rewards/rejected": -0.1972217559814453, "step": 215 }, { "epoch": 0.05, "learning_rate": 7.941176470588236e-06, "logits/chosen": -0.4973568618297577, "logits/rejected": -0.47014936804771423, "logps/chosen": -93.31392669677734, "logps/rejected": -82.34593200683594, "loss": 0.3982, "rewards/accuracies": 1.0, "rewards/chosen": -0.047637939453125, "rewards/margins": 0.16277466714382172, "rewards/rejected": -0.21041260659694672, "step": 216 }, { "epoch": 0.05, "learning_rate": 7.97794117647059e-06, "logits/chosen": -0.5499997735023499, "logits/rejected": -0.37999868392944336, "logps/chosen": -237.59765625, "logps/rejected": -187.65594482421875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": 1.6228622198104858, "rewards/margins": 4.323803901672363, "rewards/rejected": -2.700941562652588, "step": 217 }, { "epoch": 0.05, "learning_rate": 8.014705882352942e-06, "logits/chosen": -0.5761139988899231, "logits/rejected": -0.6408694982528687, "logps/chosen": -208.95516967773438, "logps/rejected": -202.5316925048828, "loss": 0.6432, "rewards/accuracies": 0.0, "rewards/chosen": 0.8050033450126648, "rewards/margins": -0.581341564655304, "rewards/rejected": 1.3863449096679688, "step": 218 }, { "epoch": 0.05, "learning_rate": 8.051470588235295e-06, "logits/chosen": -0.5027859807014465, "logits/rejected": -0.4828643798828125, "logps/chosen": -66.21742248535156, "logps/rejected": -68.48150634765625, "loss": 0.2997, "rewards/accuracies": 1.0, "rewards/chosen": 0.07083282619714737, "rewards/margins": 0.2718650996685028, "rewards/rejected": -0.20103226602077484, "step": 219 }, { "epoch": 0.05, "learning_rate": 8.088235294117648e-06, "logits/chosen": -0.7186441421508789, "logits/rejected": -0.7186441421508789, "logps/chosen": -172.18402099609375, "logps/rejected": -172.18402099609375, "loss": 0.3476, "rewards/accuracies": 0.0, "rewards/chosen": -0.9092178344726562, "rewards/margins": 0.0, "rewards/rejected": -0.9092178344726562, "step": 220 }, { "epoch": 0.05, "learning_rate": 8.125000000000001e-06, "logits/chosen": -0.567449688911438, "logits/rejected": -0.5405920743942261, "logps/chosen": -194.51608276367188, "logps/rejected": -260.2820739746094, "loss": 0.3007, "rewards/accuracies": 1.0, "rewards/chosen": 0.8658065795898438, "rewards/margins": 3.665945529937744, "rewards/rejected": -2.8001389503479004, "step": 221 }, { "epoch": 0.05, "learning_rate": 8.161764705882354e-06, "logits/chosen": -0.6417229771614075, "logits/rejected": -0.6559605002403259, "logps/chosen": -95.19965362548828, "logps/rejected": -83.45352172851562, "loss": 0.2034, "rewards/accuracies": 1.0, "rewards/chosen": 0.2379859983921051, "rewards/margins": 1.2837531566619873, "rewards/rejected": -1.0457671880722046, "step": 222 }, { "epoch": 0.05, "learning_rate": 8.198529411764707e-06, "logits/chosen": -0.7578250169754028, "logits/rejected": -0.7259539365768433, "logps/chosen": -203.70901489257812, "logps/rejected": -203.09368896484375, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": 0.854870617389679, "rewards/margins": 3.468280076980591, "rewards/rejected": -2.6134095191955566, "step": 223 }, { "epoch": 0.05, "learning_rate": 8.23529411764706e-06, "logits/chosen": -0.4050217568874359, "logits/rejected": -0.40030986070632935, "logps/chosen": -139.3356170654297, "logps/rejected": -133.17892456054688, "loss": 0.0635, "rewards/accuracies": 1.0, "rewards/chosen": -0.01288452185690403, "rewards/margins": 2.1551406383514404, "rewards/rejected": -2.168025255203247, "step": 224 }, { "epoch": 0.05, "learning_rate": 8.272058823529413e-06, "logits/chosen": -0.6579787731170654, "logits/rejected": -0.6421557068824768, "logps/chosen": -86.15836334228516, "logps/rejected": -96.70236206054688, "loss": 0.3804, "rewards/accuracies": 0.0, "rewards/chosen": -1.0272690057754517, "rewards/margins": -0.07400667667388916, "rewards/rejected": -0.9532623291015625, "step": 225 }, { "epoch": 0.05, "learning_rate": 8.308823529411766e-06, "logits/chosen": -0.7145455479621887, "logits/rejected": -0.7565634846687317, "logps/chosen": -142.51077270507812, "logps/rejected": -149.81053161621094, "loss": 0.3936, "rewards/accuracies": 0.0, "rewards/chosen": -0.8607544302940369, "rewards/margins": -0.10235446691513062, "rewards/rejected": -0.7583999633789062, "step": 226 }, { "epoch": 0.05, "learning_rate": 8.345588235294119e-06, "logits/chosen": -0.7155442833900452, "logits/rejected": -0.7228834629058838, "logps/chosen": -105.62922668457031, "logps/rejected": -27.39461898803711, "loss": 0.4948, "rewards/accuracies": 0.0, "rewards/chosen": -0.8240280151367188, "rewards/margins": -0.5159000158309937, "rewards/rejected": -0.3081279695034027, "step": 227 }, { "epoch": 0.05, "learning_rate": 8.382352941176472e-06, "logits/chosen": -0.6559641361236572, "logits/rejected": -0.6480184197425842, "logps/chosen": -130.3647003173828, "logps/rejected": -117.48057556152344, "loss": 0.2994, "rewards/accuracies": 1.0, "rewards/chosen": 0.17730560898780823, "rewards/margins": 0.5556045770645142, "rewards/rejected": -0.37829896807670593, "step": 228 }, { "epoch": 0.05, "learning_rate": 8.419117647058824e-06, "logits/chosen": -0.5432214140892029, "logits/rejected": -0.5162635445594788, "logps/chosen": -112.87713623046875, "logps/rejected": -144.06968688964844, "loss": 0.2639, "rewards/accuracies": 1.0, "rewards/chosen": -0.10074005275964737, "rewards/margins": 1.2458648681640625, "rewards/rejected": -1.3466049432754517, "step": 229 }, { "epoch": 0.05, "learning_rate": 8.455882352941177e-06, "logits/chosen": -0.30580344796180725, "logits/rejected": -0.29635006189346313, "logps/chosen": -149.7102813720703, "logps/rejected": -176.65359497070312, "loss": 0.5784, "rewards/accuracies": 0.0, "rewards/chosen": 0.6329910159111023, "rewards/margins": -0.7787705063819885, "rewards/rejected": 1.4117615222930908, "step": 230 }, { "epoch": 0.05, "learning_rate": 8.49264705882353e-06, "logits/chosen": -0.6731932759284973, "logits/rejected": -0.6892932057380676, "logps/chosen": -292.66058349609375, "logps/rejected": -242.50038146972656, "loss": 0.4464, "rewards/accuracies": 0.0, "rewards/chosen": 1.5373963117599487, "rewards/margins": -0.14170074462890625, "rewards/rejected": 1.679097056388855, "step": 231 }, { "epoch": 0.05, "learning_rate": 8.529411764705883e-06, "logits/chosen": -0.5235775113105774, "logits/rejected": -0.5216423273086548, "logps/chosen": -219.71673583984375, "logps/rejected": -159.2740478515625, "loss": 0.0698, "rewards/accuracies": 1.0, "rewards/chosen": 0.22896423935890198, "rewards/margins": 1.9349029064178467, "rewards/rejected": -1.705938696861267, "step": 232 }, { "epoch": 0.05, "learning_rate": 8.566176470588236e-06, "logits/chosen": -0.3816875219345093, "logits/rejected": -0.3977317810058594, "logps/chosen": -167.43060302734375, "logps/rejected": -38.723968505859375, "loss": 0.3588, "rewards/accuracies": 1.0, "rewards/chosen": 0.28251343965530396, "rewards/margins": 1.4566211700439453, "rewards/rejected": -1.1741077899932861, "step": 233 }, { "epoch": 0.05, "learning_rate": 8.60294117647059e-06, "logits/chosen": -0.5446451902389526, "logits/rejected": -0.48032277822494507, "logps/chosen": -64.8843994140625, "logps/rejected": -200.89727783203125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": 0.04962158203125, "rewards/margins": 5.351403713226318, "rewards/rejected": -5.301782131195068, "step": 234 }, { "epoch": 0.05, "learning_rate": 8.639705882352942e-06, "logits/chosen": -0.5082846283912659, "logits/rejected": -0.48650962114334106, "logps/chosen": -94.28450775146484, "logps/rejected": -72.36483764648438, "loss": 0.4796, "rewards/accuracies": 0.0, "rewards/chosen": -0.4350570738315582, "rewards/margins": -0.42398834228515625, "rewards/rejected": -0.01106872595846653, "step": 235 }, { "epoch": 0.05, "learning_rate": 8.676470588235295e-06, "logits/chosen": -0.6172541379928589, "logits/rejected": -0.6051919460296631, "logps/chosen": -111.26639556884766, "logps/rejected": -169.59072875976562, "loss": 0.5546, "rewards/accuracies": 0.0, "rewards/chosen": -1.1337471008300781, "rewards/margins": -0.21385419368743896, "rewards/rejected": -0.9198929071426392, "step": 236 }, { "epoch": 0.05, "learning_rate": 8.713235294117648e-06, "logits/chosen": -0.5000206232070923, "logits/rejected": -0.5000206232070923, "logps/chosen": -241.83966064453125, "logps/rejected": -241.83966064453125, "loss": 0.3545, "rewards/accuracies": 0.0, "rewards/chosen": 0.5382308959960938, "rewards/margins": 0.0, "rewards/rejected": 0.5382308959960938, "step": 237 }, { "epoch": 0.05, "learning_rate": 8.750000000000001e-06, "logits/chosen": -0.5653108954429626, "logits/rejected": -0.5723724961280823, "logps/chosen": -104.52326202392578, "logps/rejected": -125.13864135742188, "loss": 0.2571, "rewards/accuracies": 1.0, "rewards/chosen": -0.7298759818077087, "rewards/margins": 0.4005897641181946, "rewards/rejected": -1.1304657459259033, "step": 238 }, { "epoch": 0.05, "learning_rate": 8.786764705882354e-06, "logits/chosen": -0.7178998589515686, "logits/rejected": -0.6726003885269165, "logps/chosen": -85.04025268554688, "logps/rejected": -210.72299194335938, "loss": 0.1212, "rewards/accuracies": 1.0, "rewards/chosen": 0.1044158935546875, "rewards/margins": 3.514849901199341, "rewards/rejected": -3.4104340076446533, "step": 239 }, { "epoch": 0.05, "learning_rate": 8.823529411764707e-06, "logits/chosen": -0.46119874715805054, "logits/rejected": -0.46260666847229004, "logps/chosen": -92.64321899414062, "logps/rejected": -104.4333724975586, "loss": 0.1865, "rewards/accuracies": 1.0, "rewards/chosen": 0.02172241173684597, "rewards/margins": 1.3265938758850098, "rewards/rejected": -1.3048714399337769, "step": 240 }, { "epoch": 0.05, "learning_rate": 8.86029411764706e-06, "logits/chosen": -0.3808166980743408, "logits/rejected": -0.3459915816783905, "logps/chosen": -75.61363220214844, "logps/rejected": -67.75294494628906, "loss": 0.3659, "rewards/accuracies": 1.0, "rewards/chosen": -0.8594101071357727, "rewards/margins": 0.09731829166412354, "rewards/rejected": -0.9567283987998962, "step": 241 }, { "epoch": 0.05, "learning_rate": 8.897058823529413e-06, "logits/chosen": -0.6715401411056519, "logits/rejected": -0.6952460408210754, "logps/chosen": -210.9180908203125, "logps/rejected": -166.4318084716797, "loss": 0.0867, "rewards/accuracies": 1.0, "rewards/chosen": 0.6076553463935852, "rewards/margins": 2.0269715785980225, "rewards/rejected": -1.4193161725997925, "step": 242 }, { "epoch": 0.05, "learning_rate": 8.933823529411766e-06, "logits/chosen": -0.36797913908958435, "logits/rejected": -0.3822478950023651, "logps/chosen": -127.91020202636719, "logps/rejected": -87.92842102050781, "loss": 1.214, "rewards/accuracies": 0.0, "rewards/chosen": -3.3962013721466064, "rewards/margins": -2.3293983936309814, "rewards/rejected": -1.066802978515625, "step": 243 }, { "epoch": 0.05, "learning_rate": 8.970588235294119e-06, "logits/chosen": -0.6311296820640564, "logits/rejected": -0.6119405031204224, "logps/chosen": -126.87101745605469, "logps/rejected": -141.79615783691406, "loss": 0.2409, "rewards/accuracies": 1.0, "rewards/chosen": -1.9787415266036987, "rewards/margins": 0.7905081510543823, "rewards/rejected": -2.769249677658081, "step": 244 }, { "epoch": 0.05, "learning_rate": 9.007352941176471e-06, "logits/chosen": -0.4689180552959442, "logits/rejected": -0.3878960609436035, "logps/chosen": -109.0294189453125, "logps/rejected": -131.91641235351562, "loss": 0.0495, "rewards/accuracies": 1.0, "rewards/chosen": 0.5498390197753906, "rewards/margins": 2.5473761558532715, "rewards/rejected": -1.9975372552871704, "step": 245 }, { "epoch": 0.05, "learning_rate": 9.044117647058824e-06, "logits/chosen": -0.34243613481521606, "logits/rejected": -0.35771629214286804, "logps/chosen": -91.37271118164062, "logps/rejected": -146.46664428710938, "loss": 0.0862, "rewards/accuracies": 1.0, "rewards/chosen": -1.4556373357772827, "rewards/margins": 1.7414268255233765, "rewards/rejected": -3.197064161300659, "step": 246 }, { "epoch": 0.05, "learning_rate": 9.080882352941177e-06, "logits/chosen": -0.43569856882095337, "logits/rejected": -0.4010339677333832, "logps/chosen": -72.57332611083984, "logps/rejected": -99.8325424194336, "loss": 0.263, "rewards/accuracies": 1.0, "rewards/chosen": -1.3802169561386108, "rewards/margins": 1.0622941255569458, "rewards/rejected": -2.4425110816955566, "step": 247 }, { "epoch": 0.05, "learning_rate": 9.11764705882353e-06, "logits/chosen": -0.37343597412109375, "logits/rejected": -0.3701236844062805, "logps/chosen": -102.47180938720703, "logps/rejected": -126.92728424072266, "loss": 0.471, "rewards/accuracies": 0.0, "rewards/chosen": -0.4967964291572571, "rewards/margins": -0.21942979097366333, "rewards/rejected": -0.27736663818359375, "step": 248 }, { "epoch": 0.06, "learning_rate": 9.154411764705883e-06, "logits/chosen": -0.3868759572505951, "logits/rejected": -0.3777203857898712, "logps/chosen": -193.43592834472656, "logps/rejected": -149.75228881835938, "loss": 0.068, "rewards/accuracies": 1.0, "rewards/chosen": 1.6474579572677612, "rewards/margins": 5.311408519744873, "rewards/rejected": -3.6639504432678223, "step": 249 }, { "epoch": 0.06, "learning_rate": 9.191176470588236e-06, "logits/chosen": -0.4793866276741028, "logits/rejected": -0.42941468954086304, "logps/chosen": -65.01927947998047, "logps/rejected": -94.70712280273438, "loss": 0.1963, "rewards/accuracies": 1.0, "rewards/chosen": 0.2510383725166321, "rewards/margins": 2.9450714588165283, "rewards/rejected": -2.694033145904541, "step": 250 }, { "epoch": 0.06, "learning_rate": 9.227941176470589e-06, "logits/chosen": -0.44773998856544495, "logits/rejected": -0.32318365573883057, "logps/chosen": -149.0774688720703, "logps/rejected": -93.37802124023438, "loss": 1.5404, "rewards/accuracies": 0.0, "rewards/chosen": -3.4673867225646973, "rewards/margins": -2.7167611122131348, "rewards/rejected": -0.7506256103515625, "step": 251 }, { "epoch": 0.06, "learning_rate": 9.264705882352942e-06, "logits/chosen": -0.5218012928962708, "logits/rejected": -0.5346832275390625, "logps/chosen": -247.510498046875, "logps/rejected": -194.3363494873047, "loss": 0.451, "rewards/accuracies": 1.0, "rewards/chosen": 2.1523683071136475, "rewards/margins": 6.526854515075684, "rewards/rejected": -4.374485969543457, "step": 252 }, { "epoch": 0.06, "learning_rate": 9.301470588235295e-06, "logits/chosen": -0.4273827373981476, "logits/rejected": -0.4273827373981476, "logps/chosen": -357.34295654296875, "logps/rejected": -357.34295654296875, "loss": 0.3523, "rewards/accuracies": 0.0, "rewards/chosen": -1.794439673423767, "rewards/margins": 0.0, "rewards/rejected": -1.794439673423767, "step": 253 }, { "epoch": 0.06, "learning_rate": 9.338235294117648e-06, "logits/chosen": -0.32126203179359436, "logits/rejected": -0.2955203056335449, "logps/chosen": -40.14509201049805, "logps/rejected": -29.198375701904297, "loss": 0.326, "rewards/accuracies": 1.0, "rewards/chosen": -0.2300773710012436, "rewards/margins": 0.5932292938232422, "rewards/rejected": -0.823306679725647, "step": 254 }, { "epoch": 0.06, "learning_rate": 9.375000000000001e-06, "logits/chosen": -0.5096769332885742, "logits/rejected": -0.5435668230056763, "logps/chosen": -97.67725372314453, "logps/rejected": -162.69775390625, "loss": 0.122, "rewards/accuracies": 1.0, "rewards/chosen": -0.16270065307617188, "rewards/margins": 3.4377479553222656, "rewards/rejected": -3.6004486083984375, "step": 255 }, { "epoch": 0.06, "learning_rate": 9.411764705882354e-06, "logits/chosen": -0.46198034286499023, "logits/rejected": -0.44790688157081604, "logps/chosen": -55.425819396972656, "logps/rejected": -26.109567642211914, "loss": 0.2009, "rewards/accuracies": 1.0, "rewards/chosen": 0.117467500269413, "rewards/margins": 0.731192409992218, "rewards/rejected": -0.6137248873710632, "step": 256 }, { "epoch": 0.06, "learning_rate": 9.448529411764707e-06, "logits/chosen": -0.5005356669425964, "logits/rejected": -0.5181611180305481, "logps/chosen": -166.0418701171875, "logps/rejected": -100.19924926757812, "loss": 0.172, "rewards/accuracies": 1.0, "rewards/chosen": 0.19702301919460297, "rewards/margins": 1.4881234169006348, "rewards/rejected": -1.2911003828048706, "step": 257 }, { "epoch": 0.06, "learning_rate": 9.48529411764706e-06, "logits/chosen": -0.39666783809661865, "logits/rejected": -0.4253653287887573, "logps/chosen": -72.51078796386719, "logps/rejected": -92.74624633789062, "loss": 0.5092, "rewards/accuracies": 0.0, "rewards/chosen": -0.3005508482456207, "rewards/margins": -0.42174074053764343, "rewards/rejected": 0.12118988484144211, "step": 258 }, { "epoch": 0.06, "learning_rate": 9.522058823529413e-06, "logits/chosen": -0.3893037438392639, "logits/rejected": -0.40525418519973755, "logps/chosen": -181.46963500976562, "logps/rejected": -84.41220092773438, "loss": 0.0753, "rewards/accuracies": 1.0, "rewards/chosen": 1.1588318347930908, "rewards/margins": 2.3943376541137695, "rewards/rejected": -1.2355057001113892, "step": 259 }, { "epoch": 0.06, "learning_rate": 9.558823529411766e-06, "logits/chosen": -0.5484069585800171, "logits/rejected": -0.5454832911491394, "logps/chosen": -117.17439270019531, "logps/rejected": -144.0460205078125, "loss": 0.1627, "rewards/accuracies": 1.0, "rewards/chosen": -0.4532425105571747, "rewards/margins": 1.420343041419983, "rewards/rejected": -1.87358558177948, "step": 260 }, { "epoch": 0.06, "learning_rate": 9.595588235294119e-06, "logits/chosen": -0.4284301698207855, "logits/rejected": -0.41451290249824524, "logps/chosen": -108.10997009277344, "logps/rejected": -231.0834197998047, "loss": 0.4944, "rewards/accuracies": 0.0, "rewards/chosen": -0.05602417141199112, "rewards/margins": -0.5196060538291931, "rewards/rejected": 0.4635818600654602, "step": 261 }, { "epoch": 0.06, "learning_rate": 9.632352941176471e-06, "logits/chosen": -0.407376229763031, "logits/rejected": -0.36814647912979126, "logps/chosen": -139.71157836914062, "logps/rejected": -122.22667694091797, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": 0.9926727414131165, "rewards/margins": 3.7803192138671875, "rewards/rejected": -2.787646532058716, "step": 262 }, { "epoch": 0.06, "learning_rate": 9.669117647058824e-06, "logits/chosen": -0.7192574739456177, "logits/rejected": -0.7447265386581421, "logps/chosen": -114.54086303710938, "logps/rejected": -103.61560821533203, "loss": 0.119, "rewards/accuracies": 1.0, "rewards/chosen": 0.835650622844696, "rewards/margins": 2.0359597206115723, "rewards/rejected": -1.2003090381622314, "step": 263 }, { "epoch": 0.06, "learning_rate": 9.705882352941177e-06, "logits/chosen": -0.5588687062263489, "logits/rejected": -0.5973625779151917, "logps/chosen": -76.53242492675781, "logps/rejected": -123.63102722167969, "loss": 0.6381, "rewards/accuracies": 0.0, "rewards/chosen": -0.9436416625976562, "rewards/margins": -0.6955459713935852, "rewards/rejected": -0.24809570610523224, "step": 264 }, { "epoch": 0.06, "learning_rate": 9.74264705882353e-06, "logits/chosen": -0.5804030299186707, "logits/rejected": -0.5481200814247131, "logps/chosen": -126.97421264648438, "logps/rejected": -139.61514282226562, "loss": 0.4938, "rewards/accuracies": 0.0, "rewards/chosen": 0.15825195610523224, "rewards/margins": -0.4247833490371704, "rewards/rejected": 0.5830352902412415, "step": 265 }, { "epoch": 0.06, "learning_rate": 9.779411764705883e-06, "logits/chosen": -0.5056776404380798, "logits/rejected": -0.5094541907310486, "logps/chosen": -85.1507339477539, "logps/rejected": -68.7333984375, "loss": 0.3853, "rewards/accuracies": 0.0, "rewards/chosen": -0.2262062132358551, "rewards/margins": -0.11584167927503586, "rewards/rejected": -0.11036453396081924, "step": 266 }, { "epoch": 0.06, "learning_rate": 9.816176470588236e-06, "logits/chosen": -0.5937280058860779, "logits/rejected": -0.5969550013542175, "logps/chosen": -78.05831909179688, "logps/rejected": -72.86515808105469, "loss": 0.3679, "rewards/accuracies": 1.0, "rewards/chosen": -0.437387079000473, "rewards/margins": 1.359014868736267, "rewards/rejected": -1.7964019775390625, "step": 267 }, { "epoch": 0.06, "learning_rate": 9.852941176470589e-06, "logits/chosen": -0.6417698264122009, "logits/rejected": -0.5646606683731079, "logps/chosen": -124.66981506347656, "logps/rejected": -181.507080078125, "loss": 0.1424, "rewards/accuracies": 1.0, "rewards/chosen": -0.190937802195549, "rewards/margins": 1.13878333568573, "rewards/rejected": -1.3297210931777954, "step": 268 }, { "epoch": 0.06, "learning_rate": 9.889705882352942e-06, "logits/chosen": -0.4660787880420685, "logits/rejected": -0.31257757544517517, "logps/chosen": -251.02821350097656, "logps/rejected": -123.52104187011719, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 2.180375814437866, "rewards/margins": 4.681105136871338, "rewards/rejected": -2.5007293224334717, "step": 269 }, { "epoch": 0.06, "learning_rate": 9.926470588235295e-06, "logits/chosen": -0.25420665740966797, "logits/rejected": -0.220696359872818, "logps/chosen": -104.23686981201172, "logps/rejected": -179.81689453125, "loss": 0.5359, "rewards/accuracies": 0.0, "rewards/chosen": 0.1285759061574936, "rewards/margins": -0.6004493832588196, "rewards/rejected": 0.7290253043174744, "step": 270 }, { "epoch": 0.06, "learning_rate": 9.963235294117648e-06, "logits/chosen": -0.33227652311325073, "logits/rejected": -0.34118321537971497, "logps/chosen": -94.75898742675781, "logps/rejected": -80.06493377685547, "loss": 0.1415, "rewards/accuracies": 1.0, "rewards/chosen": 0.3712265193462372, "rewards/margins": 1.12352454662323, "rewards/rejected": -0.7522979974746704, "step": 271 }, { "epoch": 0.06, "learning_rate": 1e-05, "logits/chosen": -0.7054105401039124, "logits/rejected": -0.6982227563858032, "logps/chosen": -90.06941223144531, "logps/rejected": -99.17505645751953, "loss": 0.1599, "rewards/accuracies": 1.0, "rewards/chosen": 0.4968719482421875, "rewards/margins": 1.0793373584747314, "rewards/rejected": -0.5824653506278992, "step": 272 }, { "epoch": 0.06, "learning_rate": 9.99999967875601e-06, "logits/chosen": -0.6168368458747864, "logits/rejected": -0.5376067757606506, "logps/chosen": -117.88021850585938, "logps/rejected": -191.37228393554688, "loss": 1.6159, "rewards/accuracies": 0.0, "rewards/chosen": -1.83251953125, "rewards/margins": -2.8487930297851562, "rewards/rejected": 1.0162734985351562, "step": 273 }, { "epoch": 0.06, "learning_rate": 9.999998715024082e-06, "logits/chosen": -0.505367636680603, "logits/rejected": -0.49499914050102234, "logps/chosen": -102.3973617553711, "logps/rejected": -146.83395385742188, "loss": 0.201, "rewards/accuracies": 1.0, "rewards/chosen": 0.5335304141044617, "rewards/margins": 2.8234353065490723, "rewards/rejected": -2.289904832839966, "step": 274 }, { "epoch": 0.06, "learning_rate": 9.999997108804337e-06, "logits/chosen": -0.7838420867919922, "logits/rejected": -0.727482795715332, "logps/chosen": -126.68959045410156, "logps/rejected": -203.7467041015625, "loss": 0.0715, "rewards/accuracies": 1.0, "rewards/chosen": -0.20181122422218323, "rewards/margins": 3.0812180042266846, "rewards/rejected": -3.283029317855835, "step": 275 }, { "epoch": 0.06, "learning_rate": 9.999994860096985e-06, "logits/chosen": -0.6151400208473206, "logits/rejected": -0.5474783182144165, "logps/chosen": -115.20477294921875, "logps/rejected": -199.70501708984375, "loss": 1.0178, "rewards/accuracies": 0.0, "rewards/chosen": -0.5035324096679688, "rewards/margins": -1.8905441761016846, "rewards/rejected": 1.3870117664337158, "step": 276 }, { "epoch": 0.06, "learning_rate": 9.99999196890231e-06, "logits/chosen": -0.720587968826294, "logits/rejected": -0.63923579454422, "logps/chosen": -179.93917846679688, "logps/rejected": -215.2684326171875, "loss": 0.051, "rewards/accuracies": 1.0, "rewards/chosen": 1.714558482170105, "rewards/margins": 4.321485996246338, "rewards/rejected": -2.6069276332855225, "step": 277 }, { "epoch": 0.06, "learning_rate": 9.999988435220688e-06, "logits/chosen": -0.8414016366004944, "logits/rejected": -0.9521644115447998, "logps/chosen": -124.33001708984375, "logps/rejected": -19.403629302978516, "loss": 0.6122, "rewards/accuracies": 0.0, "rewards/chosen": -1.0017365217208862, "rewards/margins": -0.8576624393463135, "rewards/rejected": -0.14407406747341156, "step": 278 }, { "epoch": 0.06, "learning_rate": 9.999984259052573e-06, "logits/chosen": -0.8180961012840271, "logits/rejected": -0.8312571048736572, "logps/chosen": -142.525146484375, "logps/rejected": -182.0592041015625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 2.2381134033203125, "rewards/margins": 6.576849460601807, "rewards/rejected": -4.338736057281494, "step": 279 }, { "epoch": 0.06, "learning_rate": 9.9999794403985e-06, "logits/chosen": -0.4078643023967743, "logits/rejected": -0.3554116189479828, "logps/chosen": -101.03709411621094, "logps/rejected": -56.92597961425781, "loss": 0.1491, "rewards/accuracies": 1.0, "rewards/chosen": 0.2268577665090561, "rewards/margins": 1.2604622840881348, "rewards/rejected": -1.0336045026779175, "step": 280 }, { "epoch": 0.06, "learning_rate": 9.999973979259088e-06, "logits/chosen": -0.4840322434902191, "logits/rejected": -0.49566105008125305, "logps/chosen": -141.43035888671875, "logps/rejected": -143.98565673828125, "loss": 0.5347, "rewards/accuracies": 0.0, "rewards/chosen": -2.6298744678497314, "rewards/margins": -0.49120259284973145, "rewards/rejected": -2.138671875, "step": 281 }, { "epoch": 0.06, "learning_rate": 9.99996787563504e-06, "logits/chosen": -0.2978028655052185, "logits/rejected": -0.2719429135322571, "logps/chosen": -171.5517578125, "logps/rejected": -311.79656982421875, "loss": 0.7535, "rewards/accuracies": 0.0, "rewards/chosen": 0.5405929684638977, "rewards/margins": -0.7472580075263977, "rewards/rejected": 1.2878509759902954, "step": 282 }, { "epoch": 0.06, "learning_rate": 9.999961129527139e-06, "logits/chosen": -0.4141436815261841, "logits/rejected": -0.3983819782733917, "logps/chosen": -74.26148986816406, "logps/rejected": -108.08724212646484, "loss": 0.4294, "rewards/accuracies": 1.0, "rewards/chosen": 0.0032379149924963713, "rewards/margins": 0.906847357749939, "rewards/rejected": -0.9036094546318054, "step": 283 }, { "epoch": 0.06, "learning_rate": 9.999953740936252e-06, "logits/chosen": -0.3218068778514862, "logits/rejected": -0.3218068778514862, "logps/chosen": -43.6424674987793, "logps/rejected": -43.6424674987793, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -0.7442790865898132, "rewards/margins": 0.0, "rewards/rejected": -0.7442790865898132, "step": 284 }, { "epoch": 0.06, "learning_rate": 9.99994570986333e-06, "logits/chosen": -0.49614217877388, "logits/rejected": -0.46574416756629944, "logps/chosen": -99.57603454589844, "logps/rejected": -29.975746154785156, "loss": 0.2313, "rewards/accuracies": 1.0, "rewards/chosen": -0.01064376812428236, "rewards/margins": 0.6483722925186157, "rewards/rejected": -0.6590160727500916, "step": 285 }, { "epoch": 0.06, "learning_rate": 9.999937036309402e-06, "logits/chosen": -0.3056703507900238, "logits/rejected": -0.26752984523773193, "logps/chosen": -270.3473815917969, "logps/rejected": -96.29867553710938, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": 1.2091950178146362, "rewards/margins": 2.555755615234375, "rewards/rejected": -1.3465607166290283, "step": 286 }, { "epoch": 0.06, "learning_rate": 9.999927720275586e-06, "logits/chosen": -0.7691265344619751, "logits/rejected": -0.7551110982894897, "logps/chosen": -64.27165222167969, "logps/rejected": -124.12799072265625, "loss": 0.4151, "rewards/accuracies": 1.0, "rewards/chosen": -0.3019993007183075, "rewards/margins": 1.9180431365966797, "rewards/rejected": -2.2200424671173096, "step": 287 }, { "epoch": 0.06, "learning_rate": 9.999917761763076e-06, "logits/chosen": -0.5965301990509033, "logits/rejected": -0.6060196757316589, "logps/chosen": -113.19938659667969, "logps/rejected": -57.04283142089844, "loss": 1.1605, "rewards/accuracies": 0.0, "rewards/chosen": -2.624577283859253, "rewards/margins": -2.1986243724823, "rewards/rejected": -0.4259529113769531, "step": 288 }, { "epoch": 0.06, "learning_rate": 9.999907160773155e-06, "logits/chosen": -0.4867146909236908, "logits/rejected": -0.4867146909236908, "logps/chosen": -125.85655975341797, "logps/rejected": -125.85655975341797, "loss": 0.3595, "rewards/accuracies": 0.0, "rewards/chosen": -3.261767625808716, "rewards/margins": 0.0, "rewards/rejected": -3.261767625808716, "step": 289 }, { "epoch": 0.06, "learning_rate": 9.99989591730718e-06, "logits/chosen": -0.3178165853023529, "logits/rejected": -0.3011423945426941, "logps/chosen": -44.67627716064453, "logps/rejected": -100.61430358886719, "loss": 0.4013, "rewards/accuracies": 0.0, "rewards/chosen": -0.057683564722537994, "rewards/margins": -0.18896561861038208, "rewards/rejected": 0.1312820464372635, "step": 290 }, { "epoch": 0.06, "learning_rate": 9.999884031366603e-06, "logits/chosen": -0.5095275640487671, "logits/rejected": -0.5200386643409729, "logps/chosen": -90.76538848876953, "logps/rejected": -33.11595153808594, "loss": 0.5473, "rewards/accuracies": 1.0, "rewards/chosen": 0.1900383085012436, "rewards/margins": 0.7054426670074463, "rewards/rejected": -0.5154043436050415, "step": 291 }, { "epoch": 0.06, "learning_rate": 9.999871502952944e-06, "logits/chosen": -0.6814134120941162, "logits/rejected": -0.6739498972892761, "logps/chosen": -150.8265380859375, "logps/rejected": -114.15859985351562, "loss": 0.2762, "rewards/accuracies": 1.0, "rewards/chosen": -0.8597168326377869, "rewards/margins": 0.6473022103309631, "rewards/rejected": -1.50701904296875, "step": 292 }, { "epoch": 0.06, "learning_rate": 9.99985833206782e-06, "logits/chosen": -0.7569717168807983, "logits/rejected": -0.7713258266448975, "logps/chosen": -111.15505981445312, "logps/rejected": -205.54840087890625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 1.4159294366836548, "rewards/margins": 7.25335168838501, "rewards/rejected": -5.8374223709106445, "step": 293 }, { "epoch": 0.07, "learning_rate": 9.999844518712917e-06, "logits/chosen": -0.37752681970596313, "logits/rejected": -0.36488527059555054, "logps/chosen": -89.20761108398438, "logps/rejected": -125.10860443115234, "loss": 0.1631, "rewards/accuracies": 1.0, "rewards/chosen": -0.3533035218715668, "rewards/margins": 0.9766685962677002, "rewards/rejected": -1.3299721479415894, "step": 294 }, { "epoch": 0.07, "learning_rate": 9.999830062890012e-06, "logits/chosen": -0.598232626914978, "logits/rejected": -0.5651987791061401, "logps/chosen": -164.62191772460938, "logps/rejected": -173.77349853515625, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": 0.666180431842804, "rewards/margins": 4.5011749267578125, "rewards/rejected": -3.8349945545196533, "step": 295 }, { "epoch": 0.07, "learning_rate": 9.999814964600965e-06, "logits/chosen": -0.34754958748817444, "logits/rejected": -0.3140261471271515, "logps/chosen": -94.11422729492188, "logps/rejected": -92.76731872558594, "loss": 0.2774, "rewards/accuracies": 1.0, "rewards/chosen": -0.640789806842804, "rewards/margins": 0.3200271725654602, "rewards/rejected": -0.9608169794082642, "step": 296 }, { "epoch": 0.07, "learning_rate": 9.999799223847714e-06, "logits/chosen": -0.6227644085884094, "logits/rejected": -0.5969597697257996, "logps/chosen": -90.39479064941406, "logps/rejected": -99.43984985351562, "loss": 0.2592, "rewards/accuracies": 1.0, "rewards/chosen": 0.38278505206108093, "rewards/margins": 0.39348068833351135, "rewards/rejected": -0.01069564837962389, "step": 297 }, { "epoch": 0.07, "learning_rate": 9.999782840632281e-06, "logits/chosen": -0.6556271314620972, "logits/rejected": -0.5647959113121033, "logps/chosen": -224.43040466308594, "logps/rejected": -286.16162109375, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": 1.064473032951355, "rewards/margins": 3.4092302322387695, "rewards/rejected": -2.344757080078125, "step": 298 }, { "epoch": 0.07, "learning_rate": 9.999765814956771e-06, "logits/chosen": -0.47215256094932556, "logits/rejected": -0.47215256094932556, "logps/chosen": -54.64445877075195, "logps/rejected": -54.64445877075195, "loss": 0.4041, "rewards/accuracies": 0.0, "rewards/chosen": -1.0248943567276, "rewards/margins": 0.0, "rewards/rejected": -1.0248943567276, "step": 299 }, { "epoch": 0.07, "learning_rate": 9.999748146823376e-06, "logits/chosen": -0.566555917263031, "logits/rejected": -0.538169801235199, "logps/chosen": -169.98709106445312, "logps/rejected": -142.43592834472656, "loss": 0.0762, "rewards/accuracies": 1.0, "rewards/chosen": -0.7405670285224915, "rewards/margins": 1.884432315826416, "rewards/rejected": -2.6249992847442627, "step": 300 }, { "epoch": 0.07, "learning_rate": 9.999729836234363e-06, "logits/chosen": -0.44190093874931335, "logits/rejected": -0.44190093874931335, "logps/chosen": -100.10836029052734, "logps/rejected": -100.10836029052734, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -1.6352424621582031, "rewards/margins": 0.0, "rewards/rejected": -1.6352424621582031, "step": 301 }, { "epoch": 0.07, "learning_rate": 9.999710883192082e-06, "logits/chosen": -0.3797481954097748, "logits/rejected": -0.3797481954097748, "logps/chosen": -54.98337173461914, "logps/rejected": -54.98337173461914, "loss": 0.6973, "rewards/accuracies": 0.0, "rewards/chosen": -0.5465855002403259, "rewards/margins": 0.0, "rewards/rejected": -0.5465855002403259, "step": 302 }, { "epoch": 0.07, "learning_rate": 9.999691287698975e-06, "logits/chosen": -0.6472834944725037, "logits/rejected": -0.6215773224830627, "logps/chosen": -168.5401611328125, "logps/rejected": -195.267333984375, "loss": 0.2486, "rewards/accuracies": 1.0, "rewards/chosen": 0.9153915643692017, "rewards/margins": 0.5363662838935852, "rewards/rejected": 0.37902528047561646, "step": 303 }, { "epoch": 0.07, "learning_rate": 9.999671049757554e-06, "logits/chosen": -0.5808424353599548, "logits/rejected": -0.6091081500053406, "logps/chosen": -239.83578491210938, "logps/rejected": -217.49131774902344, "loss": 0.0938, "rewards/accuracies": 1.0, "rewards/chosen": -0.558868408203125, "rewards/margins": 1.6029984951019287, "rewards/rejected": -2.1618669033050537, "step": 304 }, { "epoch": 0.07, "learning_rate": 9.999650169370423e-06, "logits/chosen": -0.569691002368927, "logits/rejected": -0.569691002368927, "logps/chosen": -71.40131378173828, "logps/rejected": -71.40131378173828, "loss": 0.5866, "rewards/accuracies": 0.0, "rewards/chosen": -0.7787544131278992, "rewards/margins": 0.0, "rewards/rejected": -0.7787544131278992, "step": 305 }, { "epoch": 0.07, "learning_rate": 9.999628646540262e-06, "logits/chosen": -0.7598720788955688, "logits/rejected": -0.7472785115242004, "logps/chosen": -133.16775512695312, "logps/rejected": -122.23546600341797, "loss": 0.345, "rewards/accuracies": 1.0, "rewards/chosen": -3.7131645679473877, "rewards/margins": 0.057636260986328125, "rewards/rejected": -3.770800828933716, "step": 306 }, { "epoch": 0.07, "learning_rate": 9.999606481269841e-06, "logits/chosen": -0.5293912887573242, "logits/rejected": -0.5030085444450378, "logps/chosen": -111.57818603515625, "logps/rejected": -162.5950927734375, "loss": 0.109, "rewards/accuracies": 1.0, "rewards/chosen": 0.43386536836624146, "rewards/margins": 2.409738302230835, "rewards/rejected": -1.9758728742599487, "step": 307 }, { "epoch": 0.07, "learning_rate": 9.999583673562006e-06, "logits/chosen": -0.175004780292511, "logits/rejected": -0.20587339997291565, "logps/chosen": -36.929840087890625, "logps/rejected": -174.875732421875, "loss": 0.1337, "rewards/accuracies": 1.0, "rewards/chosen": -1.1298836469650269, "rewards/margins": 1.4455984830856323, "rewards/rejected": -2.575482130050659, "step": 308 }, { "epoch": 0.07, "learning_rate": 9.999560223419687e-06, "logits/chosen": -0.38077834248542786, "logits/rejected": -0.38077834248542786, "logps/chosen": -96.01638793945312, "logps/rejected": -96.01638793945312, "loss": 0.4136, "rewards/accuracies": 0.0, "rewards/chosen": -0.27326127886772156, "rewards/margins": 0.0, "rewards/rejected": -0.27326127886772156, "step": 309 }, { "epoch": 0.07, "learning_rate": 9.999536130845897e-06, "logits/chosen": -0.26641157269477844, "logits/rejected": -0.22361169755458832, "logps/chosen": -60.6243896484375, "logps/rejected": -161.52244567871094, "loss": 0.0646, "rewards/accuracies": 1.0, "rewards/chosen": -0.1315891295671463, "rewards/margins": 4.572479724884033, "rewards/rejected": -4.704068660736084, "step": 310 }, { "epoch": 0.07, "learning_rate": 9.999511395843734e-06, "logits/chosen": -0.4860153794288635, "logits/rejected": -0.4684809744358063, "logps/chosen": -110.22604370117188, "logps/rejected": -190.7411651611328, "loss": 0.261, "rewards/accuracies": 1.0, "rewards/chosen": 0.1587478667497635, "rewards/margins": 1.1033782958984375, "rewards/rejected": -0.9446304440498352, "step": 311 }, { "epoch": 0.07, "learning_rate": 9.999486018416375e-06, "logits/chosen": -0.7451525926589966, "logits/rejected": -0.018771272152662277, "logps/chosen": -115.4053955078125, "logps/rejected": -103.98987579345703, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": -0.9001907706260681, "rewards/margins": 3.4365811347961426, "rewards/rejected": -4.3367719650268555, "step": 312 }, { "epoch": 0.07, "learning_rate": 9.99945999856708e-06, "logits/chosen": -0.6028633117675781, "logits/rejected": -0.5865288376808167, "logps/chosen": -222.76629638671875, "logps/rejected": -187.02035522460938, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": 1.4494781494140625, "rewards/margins": 5.420692443847656, "rewards/rejected": -3.9712142944335938, "step": 313 }, { "epoch": 0.07, "learning_rate": 9.999433336299195e-06, "logits/chosen": -0.3050719201564789, "logits/rejected": -0.33245226740837097, "logps/chosen": -90.75306701660156, "logps/rejected": -132.75634765625, "loss": 0.2712, "rewards/accuracies": 1.0, "rewards/chosen": -0.5241638422012329, "rewards/margins": 0.6377776861190796, "rewards/rejected": -1.1619415283203125, "step": 314 }, { "epoch": 0.07, "learning_rate": 9.999406031616143e-06, "logits/chosen": -0.3878789246082306, "logits/rejected": -0.4110984802246094, "logps/chosen": -116.96298217773438, "logps/rejected": -175.44712829589844, "loss": 0.632, "rewards/accuracies": 0.0, "rewards/chosen": -0.39632874727249146, "rewards/margins": -0.7997086048126221, "rewards/rejected": 0.4033798277378082, "step": 315 }, { "epoch": 0.07, "learning_rate": 9.999378084521436e-06, "logits/chosen": -0.6097286939620972, "logits/rejected": -0.5912095308303833, "logps/chosen": -102.54853820800781, "logps/rejected": -110.7279052734375, "loss": 0.2126, "rewards/accuracies": 1.0, "rewards/chosen": -0.44515153765678406, "rewards/margins": 0.9026496410369873, "rewards/rejected": -1.3478012084960938, "step": 316 }, { "epoch": 0.07, "learning_rate": 9.999349495018662e-06, "logits/chosen": -0.7045256495475769, "logits/rejected": -0.6689183115959167, "logps/chosen": -129.3040313720703, "logps/rejected": -285.8292236328125, "loss": 0.5251, "rewards/accuracies": 1.0, "rewards/chosen": -4.93383264541626, "rewards/margins": 0.42349910736083984, "rewards/rejected": -5.3573317527771, "step": 317 }, { "epoch": 0.07, "learning_rate": 9.999320263111495e-06, "logits/chosen": -0.6996862888336182, "logits/rejected": -0.758640706539154, "logps/chosen": -156.0247802734375, "logps/rejected": -135.02700805664062, "loss": 0.2499, "rewards/accuracies": 1.0, "rewards/chosen": -2.7668397426605225, "rewards/margins": 1.5296905040740967, "rewards/rejected": -4.296530246734619, "step": 318 }, { "epoch": 0.07, "learning_rate": 9.999290388803695e-06, "logits/chosen": -0.8890697360038757, "logits/rejected": -0.7408114671707153, "logps/chosen": -148.52896118164062, "logps/rejected": -236.42459106445312, "loss": 1.228, "rewards/accuracies": 0.0, "rewards/chosen": -2.770463705062866, "rewards/margins": -1.797194004058838, "rewards/rejected": -0.9732696413993835, "step": 319 }, { "epoch": 0.07, "learning_rate": 9.999259872099095e-06, "logits/chosen": -0.5480485558509827, "logits/rejected": -0.5229490399360657, "logps/chosen": -215.09066772460938, "logps/rejected": -109.01242065429688, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": 0.8183990716934204, "rewards/margins": 4.694596767425537, "rewards/rejected": -3.8761978149414062, "step": 320 }, { "epoch": 0.07, "learning_rate": 9.999228713001622e-06, "logits/chosen": -0.21175262331962585, "logits/rejected": -0.211053729057312, "logps/chosen": -194.92724609375, "logps/rejected": -141.3343505859375, "loss": 1.0415, "rewards/accuracies": 0.0, "rewards/chosen": -5.3874406814575195, "rewards/margins": -1.502030372619629, "rewards/rejected": -3.8854103088378906, "step": 321 }, { "epoch": 0.07, "learning_rate": 9.999196911515277e-06, "logits/chosen": -0.7753443121910095, "logits/rejected": -0.8041125535964966, "logps/chosen": -185.84762573242188, "logps/rejected": -165.12796020507812, "loss": 0.0953, "rewards/accuracies": 1.0, "rewards/chosen": -0.756512463092804, "rewards/margins": 2.3879213333129883, "rewards/rejected": -3.1444337368011475, "step": 322 }, { "epoch": 0.07, "learning_rate": 9.999164467644146e-06, "logits/chosen": -0.3966159224510193, "logits/rejected": -0.37227392196655273, "logps/chosen": -99.08973693847656, "logps/rejected": -86.46882629394531, "loss": 0.6026, "rewards/accuracies": 1.0, "rewards/chosen": -0.35491180419921875, "rewards/margins": 0.8833588361740112, "rewards/rejected": -1.23827064037323, "step": 323 }, { "epoch": 0.07, "learning_rate": 9.999131381392397e-06, "logits/chosen": -0.4474775791168213, "logits/rejected": -0.3997822403907776, "logps/chosen": -222.5205078125, "logps/rejected": -162.04104614257812, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": 0.6334167718887329, "rewards/margins": 3.292990207672119, "rewards/rejected": -2.6595733165740967, "step": 324 }, { "epoch": 0.07, "learning_rate": 9.999097652764285e-06, "logits/chosen": -0.4585176110267639, "logits/rejected": -0.49210324883461, "logps/chosen": -174.5332489013672, "logps/rejected": -33.887699127197266, "loss": 0.4414, "rewards/accuracies": 1.0, "rewards/chosen": 1.7773650884628296, "rewards/margins": 2.584845542907715, "rewards/rejected": -0.8074804544448853, "step": 325 }, { "epoch": 0.07, "learning_rate": 9.999063281764142e-06, "logits/chosen": -0.5773325562477112, "logits/rejected": -0.5620754957199097, "logps/chosen": -226.9332733154297, "logps/rejected": -256.2749328613281, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.9294723868370056, "rewards/margins": 4.39553689956665, "rewards/rejected": -5.325009346008301, "step": 326 }, { "epoch": 0.07, "learning_rate": 9.999028268396384e-06, "logits/chosen": -0.5200255513191223, "logits/rejected": -0.5364241600036621, "logps/chosen": -121.673828125, "logps/rejected": -222.4392547607422, "loss": 0.0747, "rewards/accuracies": 1.0, "rewards/chosen": -1.1366379261016846, "rewards/margins": 3.6064088344573975, "rewards/rejected": -4.743046760559082, "step": 327 }, { "epoch": 0.07, "learning_rate": 9.99899261266551e-06, "logits/chosen": -0.3275953531265259, "logits/rejected": -0.2910524904727936, "logps/chosen": -68.23646545410156, "logps/rejected": -149.3769989013672, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": 0.16314926743507385, "rewards/margins": 3.0960168838500977, "rewards/rejected": -2.9328675270080566, "step": 328 }, { "epoch": 0.07, "learning_rate": 9.998956314576105e-06, "logits/chosen": -0.38868448138237, "logits/rejected": -0.18493397533893585, "logps/chosen": -242.23971557617188, "logps/rejected": -246.46739196777344, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.39697572588920593, "rewards/margins": 12.634496688842773, "rewards/rejected": -12.237521171569824, "step": 329 }, { "epoch": 0.07, "learning_rate": 9.998919374132829e-06, "logits/chosen": -0.5898688435554504, "logits/rejected": -0.526026725769043, "logps/chosen": -177.70016479492188, "logps/rejected": -59.73912048339844, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": 1.065211534500122, "rewards/margins": 3.0883331298828125, "rewards/rejected": -2.0231215953826904, "step": 330 }, { "epoch": 0.07, "learning_rate": 9.99888179134043e-06, "logits/chosen": -0.6465707421302795, "logits/rejected": -0.6605151891708374, "logps/chosen": -126.05156707763672, "logps/rejected": -135.33206176757812, "loss": 0.1102, "rewards/accuracies": 1.0, "rewards/chosen": 0.12533798813819885, "rewards/margins": 1.5047714710235596, "rewards/rejected": -1.379433512687683, "step": 331 }, { "epoch": 0.07, "learning_rate": 9.99884356620374e-06, "logits/chosen": -0.4386920630931854, "logits/rejected": -0.4127255976200104, "logps/chosen": -211.64297485351562, "logps/rejected": -383.89971923828125, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": 0.3694091737270355, "rewards/margins": 2.8706724643707275, "rewards/rejected": -2.501263380050659, "step": 332 }, { "epoch": 0.07, "learning_rate": 9.998804698727667e-06, "logits/chosen": -0.4006539285182953, "logits/rejected": -0.4006539285182953, "logps/chosen": -143.13699340820312, "logps/rejected": -143.13699340820312, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -0.6594192385673523, "rewards/margins": 0.0, "rewards/rejected": -0.6594192385673523, "step": 333 }, { "epoch": 0.07, "learning_rate": 9.998765188917206e-06, "logits/chosen": -0.45632556080818176, "logits/rejected": -0.4145861566066742, "logps/chosen": -81.96099853515625, "logps/rejected": -157.73313903808594, "loss": 0.2405, "rewards/accuracies": 1.0, "rewards/chosen": -2.3319129943847656, "rewards/margins": 2.6025466918945312, "rewards/rejected": -4.934459686279297, "step": 334 }, { "epoch": 0.07, "learning_rate": 9.998725036777437e-06, "logits/chosen": -0.7654889225959778, "logits/rejected": -0.8674423098564148, "logps/chosen": -225.3609161376953, "logps/rejected": -109.20712280273438, "loss": 0.085, "rewards/accuracies": 1.0, "rewards/chosen": 0.141917422413826, "rewards/margins": 2.4875879287719727, "rewards/rejected": -2.345670461654663, "step": 335 }, { "epoch": 0.07, "learning_rate": 9.998684242313516e-06, "logits/chosen": -0.6031949520111084, "logits/rejected": -0.6146412491798401, "logps/chosen": -159.221923828125, "logps/rejected": -38.387939453125, "loss": 1.0196, "rewards/accuracies": 0.0, "rewards/chosen": -1.78246009349823, "rewards/margins": -1.0446419715881348, "rewards/rejected": -0.73781818151474, "step": 336 }, { "epoch": 0.07, "learning_rate": 9.998642805530687e-06, "logits/chosen": -0.4795409142971039, "logits/rejected": -0.4494435489177704, "logps/chosen": -99.38811492919922, "logps/rejected": -173.57260131835938, "loss": 0.0637, "rewards/accuracies": 1.0, "rewards/chosen": -0.7804145812988281, "rewards/margins": 3.581226348876953, "rewards/rejected": -4.361640930175781, "step": 337 }, { "epoch": 0.07, "learning_rate": 9.998600726434274e-06, "logits/chosen": -0.8742333054542542, "logits/rejected": -0.9126362800598145, "logps/chosen": -167.51104736328125, "logps/rejected": -128.06561279296875, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -0.42639467120170593, "rewards/margins": 3.5930025577545166, "rewards/rejected": -4.019397258758545, "step": 338 }, { "epoch": 0.08, "learning_rate": 9.998558005029685e-06, "logits/chosen": -0.41578859090805054, "logits/rejected": -0.41578859090805054, "logps/chosen": -106.59512329101562, "logps/rejected": -106.59512329101562, "loss": 0.3634, "rewards/accuracies": 0.0, "rewards/chosen": -2.432363986968994, "rewards/margins": 0.0, "rewards/rejected": -2.432363986968994, "step": 339 }, { "epoch": 0.08, "learning_rate": 9.998514641322406e-06, "logits/chosen": -0.6039824485778809, "logits/rejected": -0.5730206370353699, "logps/chosen": -210.958984375, "logps/rejected": -150.97750854492188, "loss": 0.0965, "rewards/accuracies": 1.0, "rewards/chosen": 0.04132995754480362, "rewards/margins": 1.8763245344161987, "rewards/rejected": -1.8349945545196533, "step": 340 }, { "epoch": 0.08, "learning_rate": 9.998470635318015e-06, "logits/chosen": -0.45129165053367615, "logits/rejected": -0.4313910901546478, "logps/chosen": -148.52487182617188, "logps/rejected": -180.27272033691406, "loss": 0.4286, "rewards/accuracies": 0.0, "rewards/chosen": 0.26335906982421875, "rewards/margins": -0.28928226232528687, "rewards/rejected": 0.5526413321495056, "step": 341 }, { "epoch": 0.08, "learning_rate": 9.99842598702216e-06, "logits/chosen": -0.7009820938110352, "logits/rejected": -0.7009820938110352, "logps/chosen": -176.51495361328125, "logps/rejected": -176.51495361328125, "loss": 0.3484, "rewards/accuracies": 0.0, "rewards/chosen": -1.3134063482284546, "rewards/margins": 0.0, "rewards/rejected": -1.3134063482284546, "step": 342 }, { "epoch": 0.08, "learning_rate": 9.998380696440582e-06, "logits/chosen": -0.7465410828590393, "logits/rejected": -0.7267897129058838, "logps/chosen": -92.808349609375, "logps/rejected": -222.759765625, "loss": 0.0615, "rewards/accuracies": 1.0, "rewards/chosen": -0.6052307486534119, "rewards/margins": 4.1631178855896, "rewards/rejected": -4.768348693847656, "step": 343 }, { "epoch": 0.08, "learning_rate": 9.998334763579103e-06, "logits/chosen": -0.5475226640701294, "logits/rejected": -0.5523521900177002, "logps/chosen": -213.14047241210938, "logps/rejected": -255.7945556640625, "loss": 0.1107, "rewards/accuracies": 1.0, "rewards/chosen": 0.8688324093818665, "rewards/margins": 1.827569603919983, "rewards/rejected": -0.9587371945381165, "step": 344 }, { "epoch": 0.08, "learning_rate": 9.998288188443619e-06, "logits/chosen": -0.3969237208366394, "logits/rejected": -0.29452720284461975, "logps/chosen": -176.5487518310547, "logps/rejected": -58.069297790527344, "loss": 0.0473, "rewards/accuracies": 1.0, "rewards/chosen": 1.5946975946426392, "rewards/margins": 2.5449352264404297, "rewards/rejected": -0.9502376914024353, "step": 345 }, { "epoch": 0.08, "learning_rate": 9.99824097104012e-06, "logits/chosen": -0.447057843208313, "logits/rejected": -0.158769428730011, "logps/chosen": -123.01866912841797, "logps/rejected": -192.27896118164062, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5156669616699219, "rewards/margins": 8.234126091003418, "rewards/rejected": -8.74979305267334, "step": 346 }, { "epoch": 0.08, "learning_rate": 9.998193111374673e-06, "logits/chosen": -0.7157707214355469, "logits/rejected": -0.05728980898857117, "logps/chosen": -142.35513305664062, "logps/rejected": -137.25201416015625, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/chosen": 1.7897125482559204, "rewards/margins": 7.8852219581604, "rewards/rejected": -6.0955095291137695, "step": 347 }, { "epoch": 0.08, "learning_rate": 9.998144609453425e-06, "logits/chosen": -0.6456716060638428, "logits/rejected": -0.6402435898780823, "logps/chosen": -118.73731231689453, "logps/rejected": -145.94741821289062, "loss": 0.0894, "rewards/accuracies": 1.0, "rewards/chosen": 0.2900840938091278, "rewards/margins": 1.7405037879943848, "rewards/rejected": -1.4504196643829346, "step": 348 }, { "epoch": 0.08, "learning_rate": 9.99809546528261e-06, "logits/chosen": -0.6317960619926453, "logits/rejected": -0.6400430798530579, "logps/chosen": -118.08970642089844, "logps/rejected": -110.46991729736328, "loss": 0.087, "rewards/accuracies": 1.0, "rewards/chosen": -0.10506744682788849, "rewards/margins": 1.6652565002441406, "rewards/rejected": -1.7703239917755127, "step": 349 }, { "epoch": 0.08, "learning_rate": 9.998045678868541e-06, "logits/chosen": -0.4067741930484772, "logits/rejected": 0.10670067369937897, "logps/chosen": -100.7750473022461, "logps/rejected": -206.20504760742188, "loss": 0.5507, "rewards/accuracies": 1.0, "rewards/chosen": -0.28837814927101135, "rewards/margins": 7.599761009216309, "rewards/rejected": -7.888139247894287, "step": 350 }, { "epoch": 0.08, "learning_rate": 9.99799525021762e-06, "logits/chosen": -0.45653611421585083, "logits/rejected": -0.45653611421585083, "logps/chosen": -122.7860107421875, "logps/rejected": -122.7860107421875, "loss": 0.3905, "rewards/accuracies": 0.0, "rewards/chosen": -3.688241720199585, "rewards/margins": 0.0, "rewards/rejected": -3.688241720199585, "step": 351 }, { "epoch": 0.08, "learning_rate": 9.997944179336323e-06, "logits/chosen": -0.9570465683937073, "logits/rejected": -0.9204679727554321, "logps/chosen": -101.84388732910156, "logps/rejected": -171.72421264648438, "loss": 0.0397, "rewards/accuracies": 1.0, "rewards/chosen": -0.213481143116951, "rewards/margins": 3.5882248878479004, "rewards/rejected": -3.801706075668335, "step": 352 }, { "epoch": 0.08, "learning_rate": 9.997892466231215e-06, "logits/chosen": -0.4954265058040619, "logits/rejected": -0.5313407182693481, "logps/chosen": -202.51675415039062, "logps/rejected": -209.031982421875, "loss": 0.2768, "rewards/accuracies": 1.0, "rewards/chosen": 1.457482933998108, "rewards/margins": 0.3224914073944092, "rewards/rejected": 1.1349915266036987, "step": 353 }, { "epoch": 0.08, "learning_rate": 9.997840110908938e-06, "logits/chosen": -0.5315839648246765, "logits/rejected": -0.5196253061294556, "logps/chosen": -136.3310546875, "logps/rejected": -119.50221252441406, "loss": 0.5897, "rewards/accuracies": 0.0, "rewards/chosen": -4.213489055633545, "rewards/margins": -0.8070528507232666, "rewards/rejected": -3.4064362049102783, "step": 354 }, { "epoch": 0.08, "learning_rate": 9.997787113376223e-06, "logits/chosen": -0.3309610188007355, "logits/rejected": -0.30366066098213196, "logps/chosen": -92.13694763183594, "logps/rejected": -154.21868896484375, "loss": 0.1536, "rewards/accuracies": 1.0, "rewards/chosen": -0.5164428949356079, "rewards/margins": 1.2009795904159546, "rewards/rejected": -1.7174224853515625, "step": 355 }, { "epoch": 0.08, "learning_rate": 9.997733473639876e-06, "logits/chosen": -0.58486008644104, "logits/rejected": -0.571080207824707, "logps/chosen": -141.71795654296875, "logps/rejected": -135.96395874023438, "loss": 0.2125, "rewards/accuracies": 1.0, "rewards/chosen": -2.5340492725372314, "rewards/margins": 0.7058875560760498, "rewards/rejected": -3.2399368286132812, "step": 356 }, { "epoch": 0.08, "learning_rate": 9.997679191706794e-06, "logits/chosen": -0.7749851942062378, "logits/rejected": -0.7499831914901733, "logps/chosen": -96.30532836914062, "logps/rejected": -150.72323608398438, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.26544952392578125, "rewards/margins": 5.7674560546875, "rewards/rejected": -6.032905578613281, "step": 357 }, { "epoch": 0.08, "learning_rate": 9.99762426758395e-06, "logits/chosen": -1.2090572118759155, "logits/rejected": -1.306236743927002, "logps/chosen": -230.71005249023438, "logps/rejected": -193.15025329589844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 4.29937744140625, "rewards/margins": 9.951292037963867, "rewards/rejected": -5.651915073394775, "step": 358 }, { "epoch": 0.08, "learning_rate": 9.997568701278399e-06, "logits/chosen": -0.6027388572692871, "logits/rejected": -0.6265177130699158, "logps/chosen": -142.41555786132812, "logps/rejected": -212.53993225097656, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": 0.9519180655479431, "rewards/margins": 5.285159587860107, "rewards/rejected": -4.3332414627075195, "step": 359 }, { "epoch": 0.08, "learning_rate": 9.997512492797285e-06, "logits/chosen": -0.8285294771194458, "logits/rejected": -0.8554014563560486, "logps/chosen": -196.66595458984375, "logps/rejected": -189.12826538085938, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": -0.22259826958179474, "rewards/margins": 2.587146043777466, "rewards/rejected": -2.809744358062744, "step": 360 }, { "epoch": 0.08, "learning_rate": 9.997455642147831e-06, "logits/chosen": -0.5591023564338684, "logits/rejected": -0.5121173858642578, "logps/chosen": -143.7119903564453, "logps/rejected": -196.23013305664062, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.5920486450195312, "rewards/margins": 5.188520908355713, "rewards/rejected": -4.596472263336182, "step": 361 }, { "epoch": 0.08, "learning_rate": 9.997398149337338e-06, "logits/chosen": -0.6684654951095581, "logits/rejected": -0.6471025347709656, "logps/chosen": -99.88774871826172, "logps/rejected": -112.25448608398438, "loss": 0.1381, "rewards/accuracies": 1.0, "rewards/chosen": 0.09233474731445312, "rewards/margins": 1.1807289123535156, "rewards/rejected": -1.0883941650390625, "step": 362 }, { "epoch": 0.08, "learning_rate": 9.997340014373198e-06, "logits/chosen": -0.31803858280181885, "logits/rejected": -0.26907485723495483, "logps/chosen": -166.26263427734375, "logps/rejected": -184.60537719726562, "loss": 0.0408, "rewards/accuracies": 1.0, "rewards/chosen": 0.3421874940395355, "rewards/margins": 2.469102382659912, "rewards/rejected": -2.1269149780273438, "step": 363 }, { "epoch": 0.08, "learning_rate": 9.99728123726288e-06, "logits/chosen": -0.45520836114883423, "logits/rejected": -0.37208032608032227, "logps/chosen": -171.09271240234375, "logps/rejected": -148.14486694335938, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 1.6474822759628296, "rewards/margins": 7.255409240722656, "rewards/rejected": -5.607926845550537, "step": 364 }, { "epoch": 0.08, "learning_rate": 9.997221818013933e-06, "logits/chosen": -0.5107866525650024, "logits/rejected": -0.5124202966690063, "logps/chosen": -160.28512573242188, "logps/rejected": -55.94767761230469, "loss": 0.1814, "rewards/accuracies": 1.0, "rewards/chosen": -0.5295761227607727, "rewards/margins": 1.392716646194458, "rewards/rejected": -1.922292709350586, "step": 365 }, { "epoch": 0.08, "learning_rate": 9.997161756633998e-06, "logits/chosen": -0.8416858911514282, "logits/rejected": -0.06205608323216438, "logps/chosen": -171.01246643066406, "logps/rejected": -106.02871704101562, "loss": 0.0726, "rewards/accuracies": 1.0, "rewards/chosen": 0.4489761292934418, "rewards/margins": 1.9765007495880127, "rewards/rejected": -1.5275245904922485, "step": 366 }, { "epoch": 0.08, "learning_rate": 9.99710105313079e-06, "logits/chosen": -0.5536087155342102, "logits/rejected": -0.5932965874671936, "logps/chosen": -193.86814880371094, "logps/rejected": -43.09735870361328, "loss": 0.0434, "rewards/accuracies": 1.0, "rewards/chosen": -0.15008698403835297, "rewards/margins": 2.4016125202178955, "rewards/rejected": -2.55169939994812, "step": 367 }, { "epoch": 0.08, "learning_rate": 9.997039707512109e-06, "logits/chosen": -0.5450173020362854, "logits/rejected": 0.1206023097038269, "logps/chosen": -122.07170867919922, "logps/rejected": -154.15158081054688, "loss": 0.186, "rewards/accuracies": 1.0, "rewards/chosen": -2.0803704261779785, "rewards/margins": 4.3736982345581055, "rewards/rejected": -6.454068660736084, "step": 368 }, { "epoch": 0.08, "learning_rate": 9.996977719785837e-06, "logits/chosen": -0.6580538749694824, "logits/rejected": -0.6580538749694824, "logps/chosen": -198.19943237304688, "logps/rejected": -198.19943237304688, "loss": 0.3509, "rewards/accuracies": 0.0, "rewards/chosen": -5.810403347015381, "rewards/margins": 0.0, "rewards/rejected": -5.810403347015381, "step": 369 }, { "epoch": 0.08, "learning_rate": 9.996915089959942e-06, "logits/chosen": -0.42158636450767517, "logits/rejected": -0.4064846932888031, "logps/chosen": -103.66844940185547, "logps/rejected": -102.21072387695312, "loss": 0.5375, "rewards/accuracies": 0.0, "rewards/chosen": -2.100470781326294, "rewards/margins": -0.6287781000137329, "rewards/rejected": -1.471692681312561, "step": 370 }, { "epoch": 0.08, "learning_rate": 9.99685181804247e-06, "logits/chosen": -0.4898449778556824, "logits/rejected": -0.4543897807598114, "logps/chosen": -85.78934478759766, "logps/rejected": -30.255640029907227, "loss": 0.4771, "rewards/accuracies": 1.0, "rewards/chosen": -0.6594749689102173, "rewards/margins": 0.33349037170410156, "rewards/rejected": -0.9929653406143188, "step": 371 }, { "epoch": 0.08, "learning_rate": 9.996787904041551e-06, "logits/chosen": -0.6106479167938232, "logits/rejected": -0.0763939842581749, "logps/chosen": -126.6964111328125, "logps/rejected": -241.14385986328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.0020020008087158, "rewards/margins": 10.801490783691406, "rewards/rejected": -9.79948902130127, "step": 372 }, { "epoch": 0.08, "learning_rate": 9.996723347965399e-06, "logits/chosen": -0.36788496375083923, "logits/rejected": -0.29902851581573486, "logps/chosen": -77.38174438476562, "logps/rejected": -122.82186889648438, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/chosen": -1.5151833295822144, "rewards/margins": 3.197333335876465, "rewards/rejected": -4.712516784667969, "step": 373 }, { "epoch": 0.08, "learning_rate": 9.996658149822307e-06, "logits/chosen": -0.3417374789714813, "logits/rejected": -0.3417374789714813, "logps/chosen": -250.86399841308594, "logps/rejected": -250.86399841308594, "loss": 0.3475, "rewards/accuracies": 0.0, "rewards/chosen": -9.57805347442627, "rewards/margins": 0.0, "rewards/rejected": -9.57805347442627, "step": 374 }, { "epoch": 0.08, "learning_rate": 9.996592309620656e-06, "logits/chosen": -0.8155494332313538, "logits/rejected": -0.8240489959716797, "logps/chosen": -106.68511962890625, "logps/rejected": -59.015933990478516, "loss": 0.0917, "rewards/accuracies": 1.0, "rewards/chosen": -0.11069335788488388, "rewards/margins": 1.6060051918029785, "rewards/rejected": -1.7166985273361206, "step": 375 }, { "epoch": 0.08, "learning_rate": 9.996525827368903e-06, "logits/chosen": -0.3862275183200836, "logits/rejected": -0.36654672026634216, "logps/chosen": -148.84426879882812, "logps/rejected": -159.26861572265625, "loss": 0.1216, "rewards/accuracies": 1.0, "rewards/chosen": -2.245046377182007, "rewards/margins": 1.2924628257751465, "rewards/rejected": -3.5375092029571533, "step": 376 }, { "epoch": 0.08, "learning_rate": 9.996458703075593e-06, "logits/chosen": -0.8338645696640015, "logits/rejected": -0.798644483089447, "logps/chosen": -98.96458435058594, "logps/rejected": -119.89568328857422, "loss": 0.5525, "rewards/accuracies": 1.0, "rewards/chosen": -0.8069961667060852, "rewards/margins": 1.0300674438476562, "rewards/rejected": -1.8370636701583862, "step": 377 }, { "epoch": 0.08, "learning_rate": 9.996390936749351e-06, "logits/chosen": -0.5016870498657227, "logits/rejected": -0.44073396921157837, "logps/chosen": -109.73316955566406, "logps/rejected": -160.19837951660156, "loss": 1.1457, "rewards/accuracies": 1.0, "rewards/chosen": -1.3678306341171265, "rewards/margins": 2.3085670471191406, "rewards/rejected": -3.6763978004455566, "step": 378 }, { "epoch": 0.08, "learning_rate": 9.996322528398886e-06, "logits/chosen": -0.6336040496826172, "logits/rejected": -0.6156743168830872, "logps/chosen": -88.71147918701172, "logps/rejected": -143.72897338867188, "loss": 0.1034, "rewards/accuracies": 1.0, "rewards/chosen": -1.2060470581054688, "rewards/margins": 1.5401122570037842, "rewards/rejected": -2.746159315109253, "step": 379 }, { "epoch": 0.08, "learning_rate": 9.996253478032987e-06, "logits/chosen": -0.8632605671882629, "logits/rejected": -0.8076001405715942, "logps/chosen": -101.36412048339844, "logps/rejected": -147.84393310546875, "loss": 0.4576, "rewards/accuracies": 0.0, "rewards/chosen": -0.9699951410293579, "rewards/margins": -0.40364688634872437, "rewards/rejected": -0.5663482546806335, "step": 380 }, { "epoch": 0.08, "learning_rate": 9.996183785660526e-06, "logits/chosen": -0.6236224174499512, "logits/rejected": -0.6088656783103943, "logps/chosen": -91.95722961425781, "logps/rejected": -85.77050018310547, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": 0.9087783694267273, "rewards/margins": 2.8981971740722656, "rewards/rejected": -1.989418864250183, "step": 381 }, { "epoch": 0.08, "learning_rate": 9.996113451290457e-06, "logits/chosen": -0.8169088959693909, "logits/rejected": -0.8353679180145264, "logps/chosen": -186.44815063476562, "logps/rejected": -194.42172241210938, "loss": 0.0448, "rewards/accuracies": 1.0, "rewards/chosen": 0.20159760117530823, "rewards/margins": 2.375383138656616, "rewards/rejected": -2.173785448074341, "step": 382 }, { "epoch": 0.08, "learning_rate": 9.996042474931821e-06, "logits/chosen": -0.4949718415737152, "logits/rejected": -0.43649208545684814, "logps/chosen": -215.22914123535156, "logps/rejected": -191.9014892578125, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": 1.4711350202560425, "rewards/margins": 3.6138429641723633, "rewards/rejected": -2.1427078247070312, "step": 383 }, { "epoch": 0.08, "learning_rate": 9.995970856593739e-06, "logits/chosen": -0.44833821058273315, "logits/rejected": -0.46672552824020386, "logps/chosen": -101.35948181152344, "logps/rejected": -120.87263488769531, "loss": 0.3995, "rewards/accuracies": 0.0, "rewards/chosen": -1.3991081714630127, "rewards/margins": -0.19283521175384521, "rewards/rejected": -1.2062729597091675, "step": 384 }, { "epoch": 0.09, "learning_rate": 9.99589859628541e-06, "logits/chosen": -0.6055171489715576, "logits/rejected": -0.20825746655464172, "logps/chosen": -42.867881774902344, "logps/rejected": -245.67849731445312, "loss": 0.551, "rewards/accuracies": 1.0, "rewards/chosen": -1.2989555597305298, "rewards/margins": 4.717441558837891, "rewards/rejected": -6.016396999359131, "step": 385 }, { "epoch": 0.09, "learning_rate": 9.995825694016122e-06, "logits/chosen": -0.8000704646110535, "logits/rejected": -0.909558892250061, "logps/chosen": -161.2856903076172, "logps/rejected": -40.878746032714844, "loss": 0.1578, "rewards/accuracies": 1.0, "rewards/chosen": 0.4076431393623352, "rewards/margins": 1.0882904529571533, "rewards/rejected": -0.6806473135948181, "step": 386 }, { "epoch": 0.09, "learning_rate": 9.995752149795241e-06, "logits/chosen": -0.41133013367652893, "logits/rejected": -0.4167964458465576, "logps/chosen": -65.72003173828125, "logps/rejected": -116.42878723144531, "loss": 0.1967, "rewards/accuracies": 1.0, "rewards/chosen": -1.3626644611358643, "rewards/margins": 0.7753911018371582, "rewards/rejected": -2.1380555629730225, "step": 387 }, { "epoch": 0.09, "learning_rate": 9.99567796363222e-06, "logits/chosen": -0.47080090641975403, "logits/rejected": -0.3774513900279999, "logps/chosen": -249.7522735595703, "logps/rejected": -220.67465209960938, "loss": 0.874, "rewards/accuracies": 0.0, "rewards/chosen": -0.8017135858535767, "rewards/margins": -1.396392822265625, "rewards/rejected": 0.5946792960166931, "step": 388 }, { "epoch": 0.09, "learning_rate": 9.995603135536587e-06, "logits/chosen": -0.40158000588417053, "logits/rejected": -0.38154083490371704, "logps/chosen": -208.19613647460938, "logps/rejected": -131.99794006347656, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": 2.9085357189178467, "rewards/margins": 4.434792995452881, "rewards/rejected": -1.5262573957443237, "step": 389 }, { "epoch": 0.09, "learning_rate": 9.995527665517964e-06, "logits/chosen": -0.31995266675949097, "logits/rejected": -0.3247722089290619, "logps/chosen": -76.89492797851562, "logps/rejected": -99.95726776123047, "loss": 0.7339, "rewards/accuracies": 0.0, "rewards/chosen": -1.609270453453064, "rewards/margins": -0.8301040530204773, "rewards/rejected": -0.7791664004325867, "step": 390 }, { "epoch": 0.09, "learning_rate": 9.995451553586042e-06, "logits/chosen": -0.5849414467811584, "logits/rejected": -0.5303778648376465, "logps/chosen": -202.06021118164062, "logps/rejected": -193.53228759765625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 1.1688416004180908, "rewards/margins": 5.050413608551025, "rewards/rejected": -3.8815720081329346, "step": 391 }, { "epoch": 0.09, "learning_rate": 9.995374799750606e-06, "logits/chosen": -0.6917182803153992, "logits/rejected": -0.7105321884155273, "logps/chosen": -76.07177734375, "logps/rejected": -74.48612976074219, "loss": 0.078, "rewards/accuracies": 1.0, "rewards/chosen": -0.18185119330883026, "rewards/margins": 1.7787182331085205, "rewards/rejected": -1.9605693817138672, "step": 392 }, { "epoch": 0.09, "learning_rate": 9.995297404021515e-06, "logits/chosen": -0.5924174785614014, "logits/rejected": -0.5817927718162537, "logps/chosen": -95.77531433105469, "logps/rejected": -63.9687614440918, "loss": 0.3802, "rewards/accuracies": 0.0, "rewards/chosen": -1.2636085748672485, "rewards/margins": -0.12608075141906738, "rewards/rejected": -1.1375278234481812, "step": 393 }, { "epoch": 0.09, "learning_rate": 9.995219366408717e-06, "logits/chosen": -0.5122961401939392, "logits/rejected": -0.5122961401939392, "logps/chosen": -185.11920166015625, "logps/rejected": -185.11920166015625, "loss": 0.3472, "rewards/accuracies": 0.0, "rewards/chosen": -3.550701856613159, "rewards/margins": 0.0, "rewards/rejected": -3.550701856613159, "step": 394 }, { "epoch": 0.09, "learning_rate": 9.995140686922237e-06, "logits/chosen": -0.5809307098388672, "logits/rejected": -0.5827289819717407, "logps/chosen": -116.32696533203125, "logps/rejected": -92.62715911865234, "loss": 0.358, "rewards/accuracies": 1.0, "rewards/chosen": -1.1231796741485596, "rewards/margins": 0.19846642017364502, "rewards/rejected": -1.3216460943222046, "step": 395 }, { "epoch": 0.09, "learning_rate": 9.995061365572188e-06, "logits/chosen": -0.5499765276908875, "logits/rejected": 0.20580612123012543, "logps/chosen": -169.0367431640625, "logps/rejected": -354.5299987792969, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.1187301874160767, "rewards/margins": 17.006755828857422, "rewards/rejected": -15.888026237487793, "step": 396 }, { "epoch": 0.09, "learning_rate": 9.994981402368763e-06, "logits/chosen": -0.7693426012992859, "logits/rejected": -0.7103562355041504, "logps/chosen": -80.93232727050781, "logps/rejected": -113.20079803466797, "loss": 0.0676, "rewards/accuracies": 1.0, "rewards/chosen": 0.189198300242424, "rewards/margins": 2.346045732498169, "rewards/rejected": -2.1568474769592285, "step": 397 }, { "epoch": 0.09, "learning_rate": 9.994900797322233e-06, "logits/chosen": -0.30681702494621277, "logits/rejected": -0.320207804441452, "logps/chosen": -106.72647857666016, "logps/rejected": -166.4994354248047, "loss": 1.5886, "rewards/accuracies": 0.0, "rewards/chosen": -1.134931206703186, "rewards/margins": -0.45320361852645874, "rewards/rejected": -0.6817275881767273, "step": 398 }, { "epoch": 0.09, "learning_rate": 9.994819550442958e-06, "logits/chosen": -0.7498601078987122, "logits/rejected": -0.7419355511665344, "logps/chosen": -141.25592041015625, "logps/rejected": -204.4729461669922, "loss": 0.3037, "rewards/accuracies": 1.0, "rewards/chosen": -2.397563934326172, "rewards/margins": 0.1797630786895752, "rewards/rejected": -2.577327013015747, "step": 399 }, { "epoch": 0.09, "learning_rate": 9.994737661741379e-06, "logits/chosen": -0.7827035188674927, "logits/rejected": -0.7728462815284729, "logps/chosen": -174.95925903320312, "logps/rejected": -220.30203247070312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.02493896521627903, "rewards/margins": 8.134847640991211, "rewards/rejected": -8.109909057617188, "step": 400 }, { "epoch": 0.09, "learning_rate": 9.994655131228017e-06, "logits/chosen": -0.2707982063293457, "logits/rejected": -0.25938647985458374, "logps/chosen": -86.28851318359375, "logps/rejected": -150.8103485107422, "loss": 0.1336, "rewards/accuracies": 1.0, "rewards/chosen": -0.6241989135742188, "rewards/margins": 1.3887085914611816, "rewards/rejected": -2.0129075050354004, "step": 401 }, { "epoch": 0.09, "learning_rate": 9.994571958913477e-06, "logits/chosen": -0.83186936378479, "logits/rejected": -0.8368569016456604, "logps/chosen": -103.80189514160156, "logps/rejected": -114.81651306152344, "loss": 0.2305, "rewards/accuracies": 1.0, "rewards/chosen": -1.8901054859161377, "rewards/margins": 0.5394272804260254, "rewards/rejected": -2.429532766342163, "step": 402 }, { "epoch": 0.09, "learning_rate": 9.994488144808449e-06, "logits/chosen": -0.3340971767902374, "logits/rejected": -0.34480050206184387, "logps/chosen": -71.83480834960938, "logps/rejected": -131.1804656982422, "loss": 0.6456, "rewards/accuracies": 1.0, "rewards/chosen": -0.9058822989463806, "rewards/margins": 1.8019423484802246, "rewards/rejected": -2.70782470703125, "step": 403 }, { "epoch": 0.09, "learning_rate": 9.994403688923699e-06, "logits/chosen": -0.4932517111301422, "logits/rejected": -0.48397642374038696, "logps/chosen": -117.135986328125, "logps/rejected": -157.00906372070312, "loss": 0.0712, "rewards/accuracies": 1.0, "rewards/chosen": -2.0137009620666504, "rewards/margins": 2.397189140319824, "rewards/rejected": -4.410890102386475, "step": 404 }, { "epoch": 0.09, "learning_rate": 9.994318591270081e-06, "logits/chosen": -0.5432431101799011, "logits/rejected": -0.4632590711116791, "logps/chosen": -195.79449462890625, "logps/rejected": -226.2201690673828, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 1.6753662824630737, "rewards/margins": 5.639203071594238, "rewards/rejected": -3.963836669921875, "step": 405 }, { "epoch": 0.09, "learning_rate": 9.99423285185853e-06, "logits/chosen": -0.7670482397079468, "logits/rejected": -0.7255743741989136, "logps/chosen": -191.01025390625, "logps/rejected": -321.55364990234375, "loss": 0.222, "rewards/accuracies": 1.0, "rewards/chosen": -0.6162444949150085, "rewards/margins": 13.137161254882812, "rewards/rejected": -13.753405570983887, "step": 406 }, { "epoch": 0.09, "learning_rate": 9.994146470700065e-06, "logits/chosen": -0.5373204350471497, "logits/rejected": -0.5991174578666687, "logps/chosen": -196.30035400390625, "logps/rejected": -152.89698791503906, "loss": 0.3762, "rewards/accuracies": 1.0, "rewards/chosen": 1.5138763189315796, "rewards/margins": 2.796247959136963, "rewards/rejected": -1.2823715209960938, "step": 407 }, { "epoch": 0.09, "learning_rate": 9.994059447805781e-06, "logits/chosen": -0.49390557408332825, "logits/rejected": -0.47823596000671387, "logps/chosen": -188.82859802246094, "logps/rejected": -33.17985153198242, "loss": 0.5217, "rewards/accuracies": 1.0, "rewards/chosen": -0.09184875339269638, "rewards/margins": 0.24204197525978088, "rewards/rejected": -0.33389073610305786, "step": 408 }, { "epoch": 0.09, "learning_rate": 9.993971783186867e-06, "logits/chosen": -0.653221845626831, "logits/rejected": -0.6593363285064697, "logps/chosen": -84.24299621582031, "logps/rejected": -204.85061645507812, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": 0.28088685870170593, "rewards/margins": 5.196588039398193, "rewards/rejected": -4.915701389312744, "step": 409 }, { "epoch": 0.09, "learning_rate": 9.993883476854582e-06, "logits/chosen": -0.6574355959892273, "logits/rejected": -0.635276734828949, "logps/chosen": -257.5585021972656, "logps/rejected": -226.74853515625, "loss": 0.3483, "rewards/accuracies": 1.0, "rewards/chosen": -0.22543029487133026, "rewards/margins": 1.1292099952697754, "rewards/rejected": -1.354640245437622, "step": 410 }, { "epoch": 0.09, "learning_rate": 9.993794528820275e-06, "logits/chosen": -0.4268138110637665, "logits/rejected": -0.45066919922828674, "logps/chosen": -98.89785766601562, "logps/rejected": -82.88349914550781, "loss": 0.4586, "rewards/accuracies": 0.0, "rewards/chosen": -1.3877991437911987, "rewards/margins": -0.008879899978637695, "rewards/rejected": -1.378919243812561, "step": 411 }, { "epoch": 0.09, "learning_rate": 9.993704939095376e-06, "logits/chosen": -0.6030325889587402, "logits/rejected": -0.5643925666809082, "logps/chosen": -90.8040771484375, "logps/rejected": -146.65921020507812, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.356100469827652, "rewards/margins": 4.283844947814941, "rewards/rejected": -4.6399455070495605, "step": 412 }, { "epoch": 0.09, "learning_rate": 9.9936147076914e-06, "logits/chosen": -0.5641208291053772, "logits/rejected": -0.5450237989425659, "logps/chosen": -142.54295349121094, "logps/rejected": -66.18421936035156, "loss": 0.4015, "rewards/accuracies": 0.0, "rewards/chosen": -1.5116370916366577, "rewards/margins": -0.20180892944335938, "rewards/rejected": -1.3098281621932983, "step": 413 }, { "epoch": 0.09, "learning_rate": 9.993523834619933e-06, "logits/chosen": -0.554821252822876, "logits/rejected": -0.6025373935699463, "logps/chosen": -256.19744873046875, "logps/rejected": -53.28321838378906, "loss": 0.8833, "rewards/accuracies": 0.0, "rewards/chosen": -3.7215576171875, "rewards/margins": -1.4864284992218018, "rewards/rejected": -2.2351291179656982, "step": 414 }, { "epoch": 0.09, "learning_rate": 9.99343231989266e-06, "logits/chosen": -0.7672052383422852, "logits/rejected": -0.7795262336730957, "logps/chosen": -134.2536163330078, "logps/rejected": -130.07763671875, "loss": 0.2861, "rewards/accuracies": 1.0, "rewards/chosen": -1.1121749877929688, "rewards/margins": 0.2598663568496704, "rewards/rejected": -1.3720413446426392, "step": 415 }, { "epoch": 0.09, "learning_rate": 9.99334016352134e-06, "logits/chosen": -0.7885582447052002, "logits/rejected": -0.7495971918106079, "logps/chosen": -128.49942016601562, "logps/rejected": -169.165771484375, "loss": 0.1773, "rewards/accuracies": 1.0, "rewards/chosen": -2.000826358795166, "rewards/margins": 0.863044023513794, "rewards/rejected": -2.86387038230896, "step": 416 }, { "epoch": 0.09, "learning_rate": 9.993247365517808e-06, "logits/chosen": -0.5079734921455383, "logits/rejected": -0.43624210357666016, "logps/chosen": -96.79195404052734, "logps/rejected": -168.10874938964844, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.2577522397041321, "rewards/margins": 5.346271514892578, "rewards/rejected": -5.6040239334106445, "step": 417 }, { "epoch": 0.09, "learning_rate": 9.993153925893997e-06, "logits/chosen": -0.6652522087097168, "logits/rejected": -0.6175897717475891, "logps/chosen": -124.10990905761719, "logps/rejected": -166.85476684570312, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": -0.7148513793945312, "rewards/margins": 3.994637966156006, "rewards/rejected": -4.709489345550537, "step": 418 }, { "epoch": 0.09, "learning_rate": 9.993059844661908e-06, "logits/chosen": -0.6192551255226135, "logits/rejected": -0.6192551255226135, "logps/chosen": -75.01583099365234, "logps/rejected": -75.01583099365234, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -1.7332428693771362, "rewards/margins": 0.0, "rewards/rejected": -1.7332428693771362, "step": 419 }, { "epoch": 0.09, "learning_rate": 9.992965121833631e-06, "logits/chosen": -0.48193514347076416, "logits/rejected": -0.44665512442588806, "logps/chosen": -196.38694763183594, "logps/rejected": -33.89821243286133, "loss": 0.3959, "rewards/accuracies": 1.0, "rewards/chosen": 0.7504211664199829, "rewards/margins": 1.2979665994644165, "rewards/rejected": -0.5475454330444336, "step": 420 }, { "epoch": 0.09, "learning_rate": 9.99286975742134e-06, "logits/chosen": -0.41070282459259033, "logits/rejected": -0.38753896951675415, "logps/chosen": -44.68336868286133, "logps/rejected": -48.81311798095703, "loss": 0.1591, "rewards/accuracies": 1.0, "rewards/chosen": -0.6283649802207947, "rewards/margins": 1.326622486114502, "rewards/rejected": -1.9549875259399414, "step": 421 }, { "epoch": 0.09, "learning_rate": 9.992773751437288e-06, "logits/chosen": -0.7218750715255737, "logits/rejected": -0.7308358550071716, "logps/chosen": -98.51249694824219, "logps/rejected": -195.86776733398438, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": 0.479959100484848, "rewards/margins": 3.435450792312622, "rewards/rejected": -2.955491781234741, "step": 422 }, { "epoch": 0.09, "learning_rate": 9.99267710389381e-06, "logits/chosen": -0.5165485739707947, "logits/rejected": -0.5234714150428772, "logps/chosen": -81.94783020019531, "logps/rejected": -59.013004302978516, "loss": 0.6863, "rewards/accuracies": 0.0, "rewards/chosen": -1.7264808416366577, "rewards/margins": -1.054915189743042, "rewards/rejected": -0.6715656518936157, "step": 423 }, { "epoch": 0.09, "learning_rate": 9.992579814803327e-06, "logits/chosen": -0.7354977130889893, "logits/rejected": -0.6750704050064087, "logps/chosen": -96.51058197021484, "logps/rejected": -130.34927368164062, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": 0.5364899039268494, "rewards/margins": 3.497825860977173, "rewards/rejected": -2.9613358974456787, "step": 424 }, { "epoch": 0.09, "learning_rate": 9.992481884178338e-06, "logits/chosen": -0.46712303161621094, "logits/rejected": -0.39071497321128845, "logps/chosen": -45.6033935546875, "logps/rejected": -55.772361755371094, "loss": 1.4127, "rewards/accuracies": 1.0, "rewards/chosen": -0.6855079531669617, "rewards/margins": 0.4564308524131775, "rewards/rejected": -1.1419388055801392, "step": 425 }, { "epoch": 0.09, "learning_rate": 9.99238331203143e-06, "logits/chosen": -0.6172305941581726, "logits/rejected": -0.6047384738922119, "logps/chosen": -54.35866165161133, "logps/rejected": -126.00277709960938, "loss": 0.2258, "rewards/accuracies": 1.0, "rewards/chosen": 0.3532364070415497, "rewards/margins": 1.4929077625274658, "rewards/rejected": -1.1396713256835938, "step": 426 }, { "epoch": 0.09, "learning_rate": 9.99228409837527e-06, "logits/chosen": -0.5222217440605164, "logits/rejected": -0.5222217440605164, "logps/chosen": -105.97865295410156, "logps/rejected": -105.97865295410156, "loss": 0.3526, "rewards/accuracies": 0.0, "rewards/chosen": -0.7588416934013367, "rewards/margins": 0.0, "rewards/rejected": -0.7588416934013367, "step": 427 }, { "epoch": 0.09, "learning_rate": 9.9921842432226e-06, "logits/chosen": -0.5474478006362915, "logits/rejected": -0.4882815182209015, "logps/chosen": -91.66929626464844, "logps/rejected": -51.266571044921875, "loss": 0.1502, "rewards/accuracies": 1.0, "rewards/chosen": -0.8048210144042969, "rewards/margins": 1.064439058303833, "rewards/rejected": -1.8692600727081299, "step": 428 }, { "epoch": 0.09, "learning_rate": 9.992083746586258e-06, "logits/chosen": -0.7152131795883179, "logits/rejected": -0.5988662242889404, "logps/chosen": -177.9053192138672, "logps/rejected": -240.8032989501953, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 1.560083031654358, "rewards/margins": 7.116523742675781, "rewards/rejected": -5.556440830230713, "step": 429 }, { "epoch": 0.1, "learning_rate": 9.991982608479156e-06, "logits/chosen": -0.5836482644081116, "logits/rejected": -0.6484730243682861, "logps/chosen": -226.00379943847656, "logps/rejected": -177.5400390625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.042140245437622, "rewards/margins": 6.982237815856934, "rewards/rejected": -8.024377822875977, "step": 430 }, { "epoch": 0.1, "learning_rate": 9.991880828914288e-06, "logits/chosen": -0.8403031229972839, "logits/rejected": -0.8746959567070007, "logps/chosen": -194.7852783203125, "logps/rejected": -137.10411071777344, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": 2.482438802719116, "rewards/margins": 4.706904888153076, "rewards/rejected": -2.22446608543396, "step": 431 }, { "epoch": 0.1, "learning_rate": 9.991778407904733e-06, "logits/chosen": -0.18825966119766235, "logits/rejected": -0.18825966119766235, "logps/chosen": -140.44894409179688, "logps/rejected": -140.44894409179688, "loss": 0.4196, "rewards/accuracies": 0.0, "rewards/chosen": -5.149005889892578, "rewards/margins": 0.0, "rewards/rejected": -5.149005889892578, "step": 432 }, { "epoch": 0.1, "learning_rate": 9.991675345463654e-06, "logits/chosen": -0.8241117000579834, "logits/rejected": -0.8004544973373413, "logps/chosen": -88.40975952148438, "logps/rejected": -125.55667114257812, "loss": 0.3736, "rewards/accuracies": 0.0, "rewards/chosen": 0.3505111634731293, "rewards/margins": -0.0957840085029602, "rewards/rejected": 0.4462951719760895, "step": 433 }, { "epoch": 0.1, "learning_rate": 9.991571641604291e-06, "logits/chosen": -0.7389707565307617, "logits/rejected": -0.17788898944854736, "logps/chosen": -92.90804290771484, "logps/rejected": -228.20828247070312, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -2.2600083351135254, "rewards/margins": 6.411789417266846, "rewards/rejected": -8.671797752380371, "step": 434 }, { "epoch": 0.1, "learning_rate": 9.991467296339973e-06, "logits/chosen": -0.8744543790817261, "logits/rejected": -0.9027704000473022, "logps/chosen": -165.159423828125, "logps/rejected": -202.7401885986328, "loss": 0.0675, "rewards/accuracies": 1.0, "rewards/chosen": -0.536663830280304, "rewards/margins": 2.4126038551330566, "rewards/rejected": -2.949267625808716, "step": 435 }, { "epoch": 0.1, "learning_rate": 9.991362309684105e-06, "logits/chosen": -0.6480880975723267, "logits/rejected": -0.6143184900283813, "logps/chosen": -81.83438873291016, "logps/rejected": -95.85984802246094, "loss": 0.2988, "rewards/accuracies": 1.0, "rewards/chosen": -1.035047173500061, "rewards/margins": 1.3053847551345825, "rewards/rejected": -2.3404319286346436, "step": 436 }, { "epoch": 0.1, "learning_rate": 9.991256681650181e-06, "logits/chosen": -0.6161605715751648, "logits/rejected": -0.6571806073188782, "logps/chosen": -253.92562866210938, "logps/rejected": -257.4503479003906, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": 2.9630188941955566, "rewards/margins": 4.615025520324707, "rewards/rejected": -1.6520065069198608, "step": 437 }, { "epoch": 0.1, "learning_rate": 9.99115041225177e-06, "logits/chosen": -0.5531795620918274, "logits/rejected": -0.5531795620918274, "logps/chosen": -130.4197235107422, "logps/rejected": -130.4197235107422, "loss": 0.3624, "rewards/accuracies": 0.0, "rewards/chosen": -1.1555794477462769, "rewards/margins": 0.0, "rewards/rejected": -1.1555794477462769, "step": 438 }, { "epoch": 0.1, "learning_rate": 9.991043501502532e-06, "logits/chosen": -0.6987422108650208, "logits/rejected": -0.6987422108650208, "logps/chosen": -92.20032501220703, "logps/rejected": -92.20032501220703, "loss": 0.3862, "rewards/accuracies": 0.0, "rewards/chosen": -0.07879257202148438, "rewards/margins": 0.0, "rewards/rejected": -0.07879257202148438, "step": 439 }, { "epoch": 0.1, "learning_rate": 9.9909359494162e-06, "logits/chosen": -0.5795769691467285, "logits/rejected": -0.5741731524467468, "logps/chosen": -74.83198547363281, "logps/rejected": -116.97931671142578, "loss": 0.1545, "rewards/accuracies": 1.0, "rewards/chosen": 0.14755859971046448, "rewards/margins": 1.0521217584609985, "rewards/rejected": -0.9045631289482117, "step": 440 }, { "epoch": 0.1, "learning_rate": 9.990827756006599e-06, "logits/chosen": -0.4973534047603607, "logits/rejected": -0.420881986618042, "logps/chosen": -175.40078735351562, "logps/rejected": -186.49774169921875, "loss": 0.0924, "rewards/accuracies": 1.0, "rewards/chosen": -1.637233018875122, "rewards/margins": 1.6003708839416504, "rewards/rejected": -3.2376039028167725, "step": 441 }, { "epoch": 0.1, "learning_rate": 9.990718921287625e-06, "logits/chosen": -0.5787881016731262, "logits/rejected": -0.6945056319236755, "logps/chosen": -201.58489990234375, "logps/rejected": -89.24900817871094, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": 0.906506359577179, "rewards/margins": 3.0776734352111816, "rewards/rejected": -2.1711671352386475, "step": 442 }, { "epoch": 0.1, "learning_rate": 9.99060944527327e-06, "logits/chosen": -0.7106373310089111, "logits/rejected": -0.6919758915901184, "logps/chosen": -189.39431762695312, "logps/rejected": -183.08914184570312, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": 1.8875519037246704, "rewards/margins": 8.031298637390137, "rewards/rejected": -6.143746852874756, "step": 443 }, { "epoch": 0.1, "learning_rate": 9.990499327977599e-06, "logits/chosen": -0.41885682940483093, "logits/rejected": -0.2688857316970825, "logps/chosen": -195.0255584716797, "logps/rejected": -133.65548706054688, "loss": 0.0866, "rewards/accuracies": 1.0, "rewards/chosen": 1.3786758184432983, "rewards/margins": 5.802655220031738, "rewards/rejected": -4.42397928237915, "step": 444 }, { "epoch": 0.1, "learning_rate": 9.990388569414759e-06, "logits/chosen": -0.7726318836212158, "logits/rejected": -0.693453848361969, "logps/chosen": -93.73490905761719, "logps/rejected": -228.61721801757812, "loss": 0.1792, "rewards/accuracies": 1.0, "rewards/chosen": 0.0072883605025708675, "rewards/margins": 0.8489982485771179, "rewards/rejected": -0.8417099118232727, "step": 445 }, { "epoch": 0.1, "learning_rate": 9.990277169598985e-06, "logits/chosen": -0.334623783826828, "logits/rejected": -0.2991361916065216, "logps/chosen": -81.03193664550781, "logps/rejected": -106.81752014160156, "loss": 0.507, "rewards/accuracies": 1.0, "rewards/chosen": 0.1605072021484375, "rewards/margins": 1.9743576049804688, "rewards/rejected": -1.8138504028320312, "step": 446 }, { "epoch": 0.1, "learning_rate": 9.99016512854459e-06, "logits/chosen": -1.0847231149673462, "logits/rejected": -1.1140137910842896, "logps/chosen": -78.68034362792969, "logps/rejected": -69.65718078613281, "loss": 0.1859, "rewards/accuracies": 1.0, "rewards/chosen": 0.12260513752698898, "rewards/margins": 0.8028702139854431, "rewards/rejected": -0.6802650690078735, "step": 447 }, { "epoch": 0.1, "learning_rate": 9.990052446265974e-06, "logits/chosen": -0.9503148794174194, "logits/rejected": -0.9160030484199524, "logps/chosen": -137.3331756591797, "logps/rejected": -160.292236328125, "loss": 0.4266, "rewards/accuracies": 1.0, "rewards/chosen": 0.24306488037109375, "rewards/margins": 2.2731399536132812, "rewards/rejected": -2.0300750732421875, "step": 448 }, { "epoch": 0.1, "learning_rate": 9.989939122777614e-06, "logits/chosen": -0.6684783101081848, "logits/rejected": -0.6370528340339661, "logps/chosen": -108.787109375, "logps/rejected": -57.735897064208984, "loss": 0.1302, "rewards/accuracies": 1.0, "rewards/chosen": 2.1247634887695312, "rewards/margins": 3.3677945137023926, "rewards/rejected": -1.2430309057235718, "step": 449 }, { "epoch": 0.1, "learning_rate": 9.98982515809407e-06, "logits/chosen": -0.6753978133201599, "logits/rejected": -0.7925333976745605, "logps/chosen": -230.28445434570312, "logps/rejected": -81.76043701171875, "loss": 0.0542, "rewards/accuracies": 1.0, "rewards/chosen": 0.7927581667900085, "rewards/margins": 2.2138755321502686, "rewards/rejected": -1.4211174249649048, "step": 450 }, { "epoch": 0.1, "learning_rate": 9.989710552229992e-06, "logits/chosen": -0.8840436339378357, "logits/rejected": -0.9447087645530701, "logps/chosen": -164.79757690429688, "logps/rejected": -63.44011688232422, "loss": 0.1311, "rewards/accuracies": 1.0, "rewards/chosen": -0.704174816608429, "rewards/margins": 1.2054054737091064, "rewards/rejected": -1.9095802307128906, "step": 451 }, { "epoch": 0.1, "learning_rate": 9.9895953052001e-06, "logits/chosen": -0.7597707509994507, "logits/rejected": -0.7168670296669006, "logps/chosen": -75.60633850097656, "logps/rejected": -188.92608642578125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.13300934433937073, "rewards/margins": 4.493539333343506, "rewards/rejected": -4.626548767089844, "step": 452 }, { "epoch": 0.1, "learning_rate": 9.989479417019208e-06, "logits/chosen": -0.6566914916038513, "logits/rejected": -0.662895679473877, "logps/chosen": -142.9601593017578, "logps/rejected": -127.87373352050781, "loss": 0.3566, "rewards/accuracies": 1.0, "rewards/chosen": -1.3427612781524658, "rewards/margins": 1.930699110031128, "rewards/rejected": -3.2734603881835938, "step": 453 }, { "epoch": 0.1, "learning_rate": 9.989362887702203e-06, "logits/chosen": -0.6770066022872925, "logits/rejected": -0.6765448451042175, "logps/chosen": -208.6943817138672, "logps/rejected": -100.30207824707031, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": 0.8595504760742188, "rewards/margins": 3.0864975452423096, "rewards/rejected": -2.226947069168091, "step": 454 }, { "epoch": 0.1, "learning_rate": 9.989245717264063e-06, "logits/chosen": -0.6043165326118469, "logits/rejected": -0.515072226524353, "logps/chosen": -70.80613708496094, "logps/rejected": -113.27326202392578, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": 0.14076843857765198, "rewards/margins": 3.747899055480957, "rewards/rejected": -3.607130527496338, "step": 455 }, { "epoch": 0.1, "learning_rate": 9.989127905719841e-06, "logits/chosen": -0.5816397070884705, "logits/rejected": -0.5679159164428711, "logps/chosen": -98.46045684814453, "logps/rejected": -120.84068298339844, "loss": 0.6231, "rewards/accuracies": 0.0, "rewards/chosen": -3.6593997478485107, "rewards/margins": -0.8923571109771729, "rewards/rejected": -2.767042636871338, "step": 456 }, { "epoch": 0.1, "learning_rate": 9.989009453084678e-06, "logits/chosen": -0.481584757566452, "logits/rejected": -0.46364089846611023, "logps/chosen": -208.51487731933594, "logps/rejected": -168.94998168945312, "loss": 0.3766, "rewards/accuracies": 1.0, "rewards/chosen": -0.2716262936592102, "rewards/margins": 3.955108880996704, "rewards/rejected": -4.2267351150512695, "step": 457 }, { "epoch": 0.1, "learning_rate": 9.988890359373794e-06, "logits/chosen": -0.8784392476081848, "logits/rejected": -0.9178779721260071, "logps/chosen": -230.21575927734375, "logps/rejected": -140.7716827392578, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 3.155535936355591, "rewards/margins": 6.77281379699707, "rewards/rejected": -3.6172776222229004, "step": 458 }, { "epoch": 0.1, "learning_rate": 9.988770624602488e-06, "logits/chosen": -0.4645901918411255, "logits/rejected": -0.12775032222270966, "logps/chosen": -82.26856994628906, "logps/rejected": -148.423095703125, "loss": 0.3664, "rewards/accuracies": 1.0, "rewards/chosen": -0.1419319212436676, "rewards/margins": 5.37680196762085, "rewards/rejected": -5.518733978271484, "step": 459 }, { "epoch": 0.1, "learning_rate": 9.988650248786153e-06, "logits/chosen": -0.7020595073699951, "logits/rejected": -0.6424924731254578, "logps/chosen": -104.7337646484375, "logps/rejected": -144.9095001220703, "loss": 0.1213, "rewards/accuracies": 1.0, "rewards/chosen": -0.3918968141078949, "rewards/margins": 2.4389145374298096, "rewards/rejected": -2.8308112621307373, "step": 460 }, { "epoch": 0.1, "learning_rate": 9.988529231940252e-06, "logits/chosen": -0.3710990846157074, "logits/rejected": -0.27950650453567505, "logps/chosen": -134.67044067382812, "logps/rejected": -156.87057495117188, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 1.4654709100723267, "rewards/margins": 7.5296630859375, "rewards/rejected": -6.064192295074463, "step": 461 }, { "epoch": 0.1, "learning_rate": 9.988407574080337e-06, "logits/chosen": -0.8873169422149658, "logits/rejected": -0.8414136171340942, "logps/chosen": -206.78704833984375, "logps/rejected": -126.32740020751953, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 3.429363965988159, "rewards/margins": 6.815360069274902, "rewards/rejected": -3.3859963417053223, "step": 462 }, { "epoch": 0.1, "learning_rate": 9.988285275222041e-06, "logits/chosen": -0.5971731543540955, "logits/rejected": -0.6353877186775208, "logps/chosen": -140.97918701171875, "logps/rejected": -120.93473815917969, "loss": 0.3917, "rewards/accuracies": 0.0, "rewards/chosen": 0.3923492431640625, "rewards/margins": -0.04581451416015625, "rewards/rejected": 0.43816375732421875, "step": 463 }, { "epoch": 0.1, "learning_rate": 9.988162335381077e-06, "logits/chosen": -1.1065760850906372, "logits/rejected": -1.1065760850906372, "logps/chosen": -149.6951141357422, "logps/rejected": -149.6951141357422, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": 1.672200083732605, "rewards/margins": 0.0, "rewards/rejected": 1.672200083732605, "step": 464 }, { "epoch": 0.1, "learning_rate": 9.988038754573245e-06, "logits/chosen": -0.482843816280365, "logits/rejected": -0.4720526337623596, "logps/chosen": -133.36141967773438, "logps/rejected": -55.349639892578125, "loss": 0.1982, "rewards/accuracies": 1.0, "rewards/chosen": 0.08271179348230362, "rewards/margins": 0.7837837338447571, "rewards/rejected": -0.7010719180107117, "step": 465 }, { "epoch": 0.1, "learning_rate": 9.987914532814425e-06, "logits/chosen": -0.4905114471912384, "logits/rejected": -0.4515135884284973, "logps/chosen": -137.45913696289062, "logps/rejected": -122.61897277832031, "loss": 0.1375, "rewards/accuracies": 1.0, "rewards/chosen": 1.598486304283142, "rewards/margins": 3.758584499359131, "rewards/rejected": -2.1600983142852783, "step": 466 }, { "epoch": 0.1, "learning_rate": 9.987789670120578e-06, "logits/chosen": -0.6108877062797546, "logits/rejected": -0.5643476247787476, "logps/chosen": -63.706626892089844, "logps/rejected": -111.33335876464844, "loss": 0.0948, "rewards/accuracies": 1.0, "rewards/chosen": -0.4346870481967926, "rewards/margins": 3.234168291091919, "rewards/rejected": -3.6688554286956787, "step": 467 }, { "epoch": 0.1, "learning_rate": 9.987664166507749e-06, "logits/chosen": -0.49649810791015625, "logits/rejected": -0.44765356183052063, "logps/chosen": -156.86679077148438, "logps/rejected": -260.2196044921875, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -1.6311066150665283, "rewards/margins": 3.204022169113159, "rewards/rejected": -4.8351287841796875, "step": 468 }, { "epoch": 0.1, "learning_rate": 9.987538021992063e-06, "logits/chosen": -0.6756890416145325, "logits/rejected": -0.6453751921653748, "logps/chosen": -115.5110092163086, "logps/rejected": -135.0018310546875, "loss": 0.4157, "rewards/accuracies": 0.0, "rewards/chosen": -1.0843803882598877, "rewards/margins": -0.20653384923934937, "rewards/rejected": -0.8778465390205383, "step": 469 }, { "epoch": 0.1, "learning_rate": 9.987411236589733e-06, "logits/chosen": -0.7109142541885376, "logits/rejected": -0.7005774974822998, "logps/chosen": -117.22102355957031, "logps/rejected": -171.90478515625, "loss": 0.3984, "rewards/accuracies": 1.0, "rewards/chosen": -2.2001419067382812, "rewards/margins": 2.215219020843506, "rewards/rejected": -4.415360927581787, "step": 470 }, { "epoch": 0.1, "learning_rate": 9.987283810317046e-06, "logits/chosen": -0.47328484058380127, "logits/rejected": -0.40866807103157043, "logps/chosen": -110.45417785644531, "logps/rejected": -74.35539245605469, "loss": 0.1616, "rewards/accuracies": 1.0, "rewards/chosen": -1.44158935546875, "rewards/margins": 1.5872204303741455, "rewards/rejected": -3.0288097858428955, "step": 471 }, { "epoch": 0.1, "learning_rate": 9.987155743190379e-06, "logits/chosen": -0.5467115640640259, "logits/rejected": -0.5531499981880188, "logps/chosen": -258.3065185546875, "logps/rejected": -378.6409912109375, "loss": 0.2188, "rewards/accuracies": 1.0, "rewards/chosen": -0.246470645070076, "rewards/margins": 0.6009597778320312, "rewards/rejected": -0.847430408000946, "step": 472 }, { "epoch": 0.1, "learning_rate": 9.98702703522619e-06, "logits/chosen": -0.3241347372531891, "logits/rejected": -0.36230385303497314, "logps/chosen": -158.97384643554688, "logps/rejected": -124.52635192871094, "loss": 1.4698, "rewards/accuracies": 0.0, "rewards/chosen": -1.9639099836349487, "rewards/margins": -2.1346254348754883, "rewards/rejected": 0.17071533203125, "step": 473 }, { "epoch": 0.1, "learning_rate": 9.986897686441012e-06, "logits/chosen": -0.6073166728019714, "logits/rejected": -0.5469771027565002, "logps/chosen": -151.78492736816406, "logps/rejected": -166.23724365234375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 1.3906692266464233, "rewards/margins": 5.9925994873046875, "rewards/rejected": -4.601930141448975, "step": 474 }, { "epoch": 0.11, "learning_rate": 9.986767696851472e-06, "logits/chosen": -0.6443341374397278, "logits/rejected": -0.5845492482185364, "logps/chosen": -173.42684936523438, "logps/rejected": -275.4814453125, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": 0.49425965547561646, "rewards/margins": 3.8513381481170654, "rewards/rejected": -3.3570785522460938, "step": 475 }, { "epoch": 0.11, "learning_rate": 9.98663706647427e-06, "logits/chosen": -0.8012450933456421, "logits/rejected": -0.8264419436454773, "logps/chosen": -240.8748321533203, "logps/rejected": -148.71728515625, "loss": 0.1203, "rewards/accuracies": 1.0, "rewards/chosen": 1.145588755607605, "rewards/margins": 2.1095657348632812, "rewards/rejected": -0.963977038860321, "step": 476 }, { "epoch": 0.11, "learning_rate": 9.986505795326194e-06, "logits/chosen": -0.9915199279785156, "logits/rejected": -0.9699882864952087, "logps/chosen": -124.26457214355469, "logps/rejected": -174.1499481201172, "loss": 0.3777, "rewards/accuracies": 1.0, "rewards/chosen": -3.247021436691284, "rewards/margins": 3.994859457015991, "rewards/rejected": -7.241880893707275, "step": 477 }, { "epoch": 0.11, "learning_rate": 9.986373883424108e-06, "logits/chosen": -0.48701170086860657, "logits/rejected": -0.49657487869262695, "logps/chosen": -148.27615356445312, "logps/rejected": -193.21401977539062, "loss": 0.3636, "rewards/accuracies": 1.0, "rewards/chosen": 1.4705413579940796, "rewards/margins": 3.361041307449341, "rewards/rejected": -1.8904999494552612, "step": 478 }, { "epoch": 0.11, "learning_rate": 9.986241330784967e-06, "logits/chosen": -0.9626457095146179, "logits/rejected": -0.9555395841598511, "logps/chosen": -116.85376739501953, "logps/rejected": -226.31210327148438, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.4372520446777344, "rewards/margins": 6.239347457885742, "rewards/rejected": -8.676599502563477, "step": 479 }, { "epoch": 0.11, "learning_rate": 9.9861081374258e-06, "logits/chosen": -0.5113767385482788, "logits/rejected": -0.4569534361362457, "logps/chosen": -98.05864715576172, "logps/rejected": -163.6622314453125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.2579078674316406, "rewards/margins": 4.374946117401123, "rewards/rejected": -4.632853984832764, "step": 480 }, { "epoch": 0.11, "learning_rate": 9.985974303363723e-06, "logits/chosen": -0.9141353368759155, "logits/rejected": -0.9168792366981506, "logps/chosen": -77.9969482421875, "logps/rejected": -72.67218017578125, "loss": 0.448, "rewards/accuracies": 0.0, "rewards/chosen": -0.5123909115791321, "rewards/margins": -0.3706115782260895, "rewards/rejected": -0.1417793333530426, "step": 481 }, { "epoch": 0.11, "learning_rate": 9.985839828615937e-06, "logits/chosen": -0.4447694420814514, "logits/rejected": -0.4447694420814514, "logps/chosen": -135.35044860839844, "logps/rejected": -135.35044860839844, "loss": 0.3943, "rewards/accuracies": 0.0, "rewards/chosen": -2.7662293910980225, "rewards/margins": 0.0, "rewards/rejected": -2.7662293910980225, "step": 482 }, { "epoch": 0.11, "learning_rate": 9.985704713199715e-06, "logits/chosen": -0.6694200038909912, "logits/rejected": -0.582830011844635, "logps/chosen": -242.10833740234375, "logps/rejected": -412.21514892578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.91851806640625, "rewards/margins": 11.435757637023926, "rewards/rejected": -10.517239570617676, "step": 483 }, { "epoch": 0.11, "learning_rate": 9.985568957132425e-06, "logits/chosen": -0.6951518654823303, "logits/rejected": -0.6652774810791016, "logps/chosen": -126.09233093261719, "logps/rejected": -174.11077880859375, "loss": 0.2364, "rewards/accuracies": 1.0, "rewards/chosen": 2.1973588466644287, "rewards/margins": 0.5049852132797241, "rewards/rejected": 1.6923736333847046, "step": 484 }, { "epoch": 0.11, "learning_rate": 9.98543256043151e-06, "logits/chosen": -0.6700296401977539, "logits/rejected": -0.6590355634689331, "logps/chosen": -94.02130889892578, "logps/rejected": -93.84684753417969, "loss": 0.4607, "rewards/accuracies": 1.0, "rewards/chosen": -0.06220092996954918, "rewards/margins": 0.8594551682472229, "rewards/rejected": -0.92165607213974, "step": 485 }, { "epoch": 0.11, "learning_rate": 9.985295523114492e-06, "logits/chosen": -0.3562183082103729, "logits/rejected": -0.39252954721450806, "logps/chosen": -94.86741638183594, "logps/rejected": -144.89794921875, "loss": 0.1194, "rewards/accuracies": 1.0, "rewards/chosen": -1.2405685186386108, "rewards/margins": 1.3114837408065796, "rewards/rejected": -2.5520522594451904, "step": 486 }, { "epoch": 0.11, "learning_rate": 9.985157845198987e-06, "logits/chosen": -0.7114580869674683, "logits/rejected": -0.6429816484451294, "logps/chosen": -228.74722290039062, "logps/rejected": -176.81051635742188, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": 1.0863052606582642, "rewards/margins": 7.941153049468994, "rewards/rejected": -6.8548479080200195, "step": 487 }, { "epoch": 0.11, "learning_rate": 9.985019526702682e-06, "logits/chosen": -0.40314555168151855, "logits/rejected": -0.40314555168151855, "logps/chosen": -134.44825744628906, "logps/rejected": -134.44825744628906, "loss": 0.3524, "rewards/accuracies": 0.0, "rewards/chosen": -2.617083787918091, "rewards/margins": 0.0, "rewards/rejected": -2.617083787918091, "step": 488 }, { "epoch": 0.11, "learning_rate": 9.984880567643351e-06, "logits/chosen": -0.5526432394981384, "logits/rejected": -0.5430623292922974, "logps/chosen": -142.57266235351562, "logps/rejected": -148.93319702148438, "loss": 0.1407, "rewards/accuracies": 1.0, "rewards/chosen": -0.4791809022426605, "rewards/margins": 1.125213623046875, "rewards/rejected": -1.604394555091858, "step": 489 }, { "epoch": 0.11, "learning_rate": 9.984740968038852e-06, "logits/chosen": -0.5992652773857117, "logits/rejected": -0.5159466862678528, "logps/chosen": -175.55270385742188, "logps/rejected": -179.67120361328125, "loss": 0.4959, "rewards/accuracies": 1.0, "rewards/chosen": 2.2514588832855225, "rewards/margins": 6.1033830642700195, "rewards/rejected": -3.851924180984497, "step": 490 }, { "epoch": 0.11, "learning_rate": 9.984600727907119e-06, "logits/chosen": -0.5646854639053345, "logits/rejected": -0.5485546588897705, "logps/chosen": -146.6224365234375, "logps/rejected": -123.49617004394531, "loss": 0.4031, "rewards/accuracies": 1.0, "rewards/chosen": -0.7220444083213806, "rewards/margins": 1.2337067127227783, "rewards/rejected": -1.9557510614395142, "step": 491 }, { "epoch": 0.11, "learning_rate": 9.984459847266176e-06, "logits/chosen": -0.6206889152526855, "logits/rejected": -0.570344865322113, "logps/chosen": -74.24215698242188, "logps/rejected": -140.42587280273438, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": 0.006813812535256147, "rewards/margins": 3.868194580078125, "rewards/rejected": -3.8613808155059814, "step": 492 }, { "epoch": 0.11, "learning_rate": 9.984318326134125e-06, "logits/chosen": -0.7695109844207764, "logits/rejected": -0.8023179769515991, "logps/chosen": -213.77584838867188, "logps/rejected": -178.485595703125, "loss": 0.3424, "rewards/accuracies": 1.0, "rewards/chosen": 1.2174164056777954, "rewards/margins": 0.8706954717636108, "rewards/rejected": 0.3467209041118622, "step": 493 }, { "epoch": 0.11, "learning_rate": 9.984176164529151e-06, "logits/chosen": -0.7725856304168701, "logits/rejected": -0.7078924179077148, "logps/chosen": -124.91610717773438, "logps/rejected": -165.9505157470703, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 0.12916718423366547, "rewards/margins": 4.50264835357666, "rewards/rejected": -4.373481273651123, "step": 494 }, { "epoch": 0.11, "learning_rate": 9.984033362469522e-06, "logits/chosen": -0.7908350229263306, "logits/rejected": -0.8117672204971313, "logps/chosen": -85.4332275390625, "logps/rejected": -53.236026763916016, "loss": 0.4954, "rewards/accuracies": 0.0, "rewards/chosen": -0.5481773614883423, "rewards/margins": -0.36845171451568604, "rewards/rejected": -0.17972564697265625, "step": 495 }, { "epoch": 0.11, "learning_rate": 9.983889919973586e-06, "logits/chosen": -0.6276232004165649, "logits/rejected": -0.5545917749404907, "logps/chosen": -134.67710876464844, "logps/rejected": -231.45933532714844, "loss": 0.1085, "rewards/accuracies": 1.0, "rewards/chosen": 0.0061706542037427425, "rewards/margins": 4.923316955566406, "rewards/rejected": -4.9171462059021, "step": 496 }, { "epoch": 0.11, "learning_rate": 9.983745837059777e-06, "logits/chosen": -1.1127679347991943, "logits/rejected": -1.0841789245605469, "logps/chosen": -107.87115478515625, "logps/rejected": -135.68397521972656, "loss": 0.1615, "rewards/accuracies": 1.0, "rewards/chosen": -0.3331863582134247, "rewards/margins": 2.261873722076416, "rewards/rejected": -2.595060110092163, "step": 497 }, { "epoch": 0.11, "learning_rate": 9.98360111374661e-06, "logits/chosen": -0.5253493189811707, "logits/rejected": -0.49609556794166565, "logps/chosen": -40.507205963134766, "logps/rejected": -76.22518157958984, "loss": 0.3662, "rewards/accuracies": 1.0, "rewards/chosen": -1.5264167785644531, "rewards/margins": 0.9144408702850342, "rewards/rejected": -2.4408576488494873, "step": 498 }, { "epoch": 0.11, "learning_rate": 9.983455750052678e-06, "logits/chosen": -0.46163883805274963, "logits/rejected": -0.3927372395992279, "logps/chosen": -141.8114776611328, "logps/rejected": -195.7259521484375, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": -0.5511917471885681, "rewards/margins": 2.5866806507110596, "rewards/rejected": -3.1378724575042725, "step": 499 }, { "epoch": 0.11, "learning_rate": 9.983309745996663e-06, "logits/chosen": -0.6280028820037842, "logits/rejected": -0.7068947553634644, "logps/chosen": -143.55198669433594, "logps/rejected": -244.89291381835938, "loss": 0.109, "rewards/accuracies": 1.0, "rewards/chosen": -0.03560638427734375, "rewards/margins": 1.412867784500122, "rewards/rejected": -1.4484741687774658, "step": 500 }, { "epoch": 0.11, "learning_rate": 9.983163101597325e-06, "logits/chosen": -0.6037439703941345, "logits/rejected": -0.5073035359382629, "logps/chosen": -239.4945068359375, "logps/rejected": -188.94012451171875, "loss": 0.0592, "rewards/accuracies": 1.0, "rewards/chosen": 2.2581207752227783, "rewards/margins": 2.4831268787384033, "rewards/rejected": -0.225006103515625, "step": 501 }, { "epoch": 0.11, "learning_rate": 9.983015816873508e-06, "logits/chosen": -0.7051157355308533, "logits/rejected": -0.5864474177360535, "logps/chosen": -232.767822265625, "logps/rejected": -422.0250244140625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 2.355419874191284, "rewards/margins": 5.248700141906738, "rewards/rejected": -2.893280029296875, "step": 502 }, { "epoch": 0.11, "learning_rate": 9.982867891844136e-06, "logits/chosen": -0.7556862831115723, "logits/rejected": -0.7525623440742493, "logps/chosen": -124.04446411132812, "logps/rejected": -106.69267272949219, "loss": 1.4212, "rewards/accuracies": 0.0, "rewards/chosen": -2.0548171997070312, "rewards/margins": -2.780285596847534, "rewards/rejected": 0.7254684567451477, "step": 503 }, { "epoch": 0.11, "learning_rate": 9.98271932652822e-06, "logits/chosen": -0.8966175317764282, "logits/rejected": -0.9051766991615295, "logps/chosen": -100.76205444335938, "logps/rejected": -125.29520416259766, "loss": 0.3268, "rewards/accuracies": 1.0, "rewards/chosen": -0.43840715289115906, "rewards/margins": 0.34327927231788635, "rewards/rejected": -0.7816864252090454, "step": 504 }, { "epoch": 0.11, "learning_rate": 9.982570120944847e-06, "logits/chosen": -0.7121794819831848, "logits/rejected": -0.6593374013900757, "logps/chosen": -79.9317626953125, "logps/rejected": -195.9005584716797, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.441641241312027, "rewards/margins": 4.48391580581665, "rewards/rejected": -4.9255571365356445, "step": 505 }, { "epoch": 0.11, "learning_rate": 9.982420275113194e-06, "logits/chosen": -0.8331749439239502, "logits/rejected": -0.821622371673584, "logps/chosen": -136.80807495117188, "logps/rejected": -204.75631713867188, "loss": 0.0495, "rewards/accuracies": 1.0, "rewards/chosen": 0.6892120242118835, "rewards/margins": 6.5621795654296875, "rewards/rejected": -5.872967720031738, "step": 506 }, { "epoch": 0.11, "learning_rate": 9.98226978905251e-06, "logits/chosen": -0.7578980326652527, "logits/rejected": -0.7326966524124146, "logps/chosen": -185.61984252929688, "logps/rejected": -181.00985717773438, "loss": 0.1328, "rewards/accuracies": 1.0, "rewards/chosen": 0.963153064250946, "rewards/margins": 1.2053756713867188, "rewards/rejected": -0.2422225922346115, "step": 507 }, { "epoch": 0.11, "learning_rate": 9.982118662782136e-06, "logits/chosen": -0.8706294298171997, "logits/rejected": -0.9029723405838013, "logps/chosen": -104.59502410888672, "logps/rejected": -65.66200256347656, "loss": 0.5369, "rewards/accuracies": 1.0, "rewards/chosen": 0.39676591753959656, "rewards/margins": 2.1701743602752686, "rewards/rejected": -1.7734085321426392, "step": 508 }, { "epoch": 0.11, "learning_rate": 9.981966896321492e-06, "logits/chosen": -0.5302107334136963, "logits/rejected": -0.5142199993133545, "logps/chosen": -87.1651611328125, "logps/rejected": -207.78189086914062, "loss": 0.2196, "rewards/accuracies": 1.0, "rewards/chosen": -0.7880668640136719, "rewards/margins": 3.418731212615967, "rewards/rejected": -4.206798076629639, "step": 509 }, { "epoch": 0.11, "learning_rate": 9.981814489690077e-06, "logits/chosen": -0.5063857436180115, "logits/rejected": -0.4480626881122589, "logps/chosen": -65.60636901855469, "logps/rejected": -174.4337158203125, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": -1.0173977613449097, "rewards/margins": 2.972151756286621, "rewards/rejected": -3.989549398422241, "step": 510 }, { "epoch": 0.11, "learning_rate": 9.981661442907477e-06, "logits/chosen": -0.4671403169631958, "logits/rejected": -0.43266940116882324, "logps/chosen": -80.3114013671875, "logps/rejected": -207.81015014648438, "loss": 0.2849, "rewards/accuracies": 1.0, "rewards/chosen": 0.4964096248149872, "rewards/margins": 0.2642959952354431, "rewards/rejected": 0.23211364448070526, "step": 511 }, { "epoch": 0.11, "learning_rate": 9.981507755993357e-06, "logits/chosen": -0.8487785458564758, "logits/rejected": -0.8628777861595154, "logps/chosen": -89.84423065185547, "logps/rejected": -145.86077880859375, "loss": 0.1369, "rewards/accuracies": 1.0, "rewards/chosen": 0.0896400436758995, "rewards/margins": 1.159247636795044, "rewards/rejected": -1.0696076154708862, "step": 512 }, { "epoch": 0.11, "learning_rate": 9.981353428967465e-06, "logits/chosen": -0.651858925819397, "logits/rejected": -0.6739110946655273, "logps/chosen": -71.88076782226562, "logps/rejected": -46.41902160644531, "loss": 0.273, "rewards/accuracies": 1.0, "rewards/chosen": 1.0280563831329346, "rewards/margins": 0.5177410244941711, "rewards/rejected": 0.5103153586387634, "step": 513 }, { "epoch": 0.11, "learning_rate": 9.98119846184963e-06, "logits/chosen": -0.6642687916755676, "logits/rejected": -0.6413002610206604, "logps/chosen": -122.88861846923828, "logps/rejected": -70.59894561767578, "loss": 0.6052, "rewards/accuracies": 0.0, "rewards/chosen": -0.6527412533760071, "rewards/margins": -0.40738677978515625, "rewards/rejected": -0.24535445868968964, "step": 514 }, { "epoch": 0.11, "learning_rate": 9.98104285465977e-06, "logits/chosen": -0.606203019618988, "logits/rejected": -0.4912429749965668, "logps/chosen": -423.960693359375, "logps/rejected": -132.62063598632812, "loss": 0.1748, "rewards/accuracies": 1.0, "rewards/chosen": -0.834362804889679, "rewards/margins": 0.8984466195106506, "rewards/rejected": -1.7328094244003296, "step": 515 }, { "epoch": 0.11, "learning_rate": 9.980886607417877e-06, "logits/chosen": -0.6725032329559326, "logits/rejected": -0.6722657680511475, "logps/chosen": -154.0214385986328, "logps/rejected": -202.37953186035156, "loss": 0.0724, "rewards/accuracies": 1.0, "rewards/chosen": 0.7026947140693665, "rewards/margins": 1.8603456020355225, "rewards/rejected": -1.1576508283615112, "step": 516 }, { "epoch": 0.11, "learning_rate": 9.980729720144027e-06, "logits/chosen": -0.39861878752708435, "logits/rejected": -0.3967175781726837, "logps/chosen": -118.74299621582031, "logps/rejected": -113.03506469726562, "loss": 0.1837, "rewards/accuracies": 1.0, "rewards/chosen": 0.5594375729560852, "rewards/margins": 0.8125060796737671, "rewards/rejected": -0.2530685365200043, "step": 517 }, { "epoch": 0.11, "learning_rate": 9.980572192858383e-06, "logits/chosen": -0.6440351605415344, "logits/rejected": -0.6440351605415344, "logps/chosen": -79.51539611816406, "logps/rejected": -79.51539611816406, "loss": 0.3659, "rewards/accuracies": 0.0, "rewards/chosen": -0.3076126277446747, "rewards/margins": 0.0, "rewards/rejected": -0.3076126277446747, "step": 518 }, { "epoch": 0.11, "learning_rate": 9.980414025581185e-06, "logits/chosen": -0.8411838412284851, "logits/rejected": -0.7780060768127441, "logps/chosen": -157.69821166992188, "logps/rejected": -150.81634521484375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": 2.0848357677459717, "rewards/margins": 8.672000885009766, "rewards/rejected": -6.587165355682373, "step": 519 }, { "epoch": 0.12, "learning_rate": 9.980255218332758e-06, "logits/chosen": -0.5041256546974182, "logits/rejected": -0.48447155952453613, "logps/chosen": -58.77599334716797, "logps/rejected": -69.76158142089844, "loss": 0.0665, "rewards/accuracies": 1.0, "rewards/chosen": 0.9167976379394531, "rewards/margins": 2.2634940147399902, "rewards/rejected": -1.3466964960098267, "step": 520 }, { "epoch": 0.12, "learning_rate": 9.980095771133504e-06, "logits/chosen": -0.4637027978897095, "logits/rejected": -0.4637027978897095, "logps/chosen": -126.01541900634766, "logps/rejected": -126.01541900634766, "loss": 0.3534, "rewards/accuracies": 0.0, "rewards/chosen": 0.45941391587257385, "rewards/margins": 0.0, "rewards/rejected": 0.45941391587257385, "step": 521 }, { "epoch": 0.12, "learning_rate": 9.979935684003918e-06, "logits/chosen": -0.7817404866218567, "logits/rejected": -0.787335216999054, "logps/chosen": -116.35714721679688, "logps/rejected": -78.04150390625, "loss": 0.0883, "rewards/accuracies": 1.0, "rewards/chosen": 0.038550566881895065, "rewards/margins": 1.9950169324874878, "rewards/rejected": -1.9564663171768188, "step": 522 }, { "epoch": 0.12, "learning_rate": 9.979774956964569e-06, "logits/chosen": -0.629838228225708, "logits/rejected": -0.6145849823951721, "logps/chosen": -68.97647094726562, "logps/rejected": -95.83221435546875, "loss": 0.0726, "rewards/accuracies": 1.0, "rewards/chosen": -0.4296623170375824, "rewards/margins": 2.0204193592071533, "rewards/rejected": -2.4500815868377686, "step": 523 }, { "epoch": 0.12, "learning_rate": 9.979613590036108e-06, "logits/chosen": -0.23490934073925018, "logits/rejected": -0.2209901213645935, "logps/chosen": -105.11576080322266, "logps/rejected": -184.28970336914062, "loss": 0.034, "rewards/accuracies": 1.0, "rewards/chosen": 0.7948676943778992, "rewards/margins": 2.708472490310669, "rewards/rejected": -1.913604736328125, "step": 524 }, { "epoch": 0.12, "learning_rate": 9.979451583239272e-06, "logits/chosen": -0.5596106052398682, "logits/rejected": -0.5596106052398682, "logps/chosen": -51.8060302734375, "logps/rejected": -51.8060302734375, "loss": 0.3588, "rewards/accuracies": 0.0, "rewards/chosen": -0.09146805107593536, "rewards/margins": 0.0, "rewards/rejected": -0.09146805107593536, "step": 525 }, { "epoch": 0.12, "learning_rate": 9.979288936594877e-06, "logits/chosen": -0.577609658241272, "logits/rejected": -0.564347505569458, "logps/chosen": -102.5333023071289, "logps/rejected": -137.21604919433594, "loss": 0.0719, "rewards/accuracies": 1.0, "rewards/chosen": -1.9290794134140015, "rewards/margins": 2.175811767578125, "rewards/rejected": -4.104891300201416, "step": 526 }, { "epoch": 0.12, "learning_rate": 9.979125650123824e-06, "logits/chosen": -0.8085185289382935, "logits/rejected": -0.7571079730987549, "logps/chosen": -179.62380981445312, "logps/rejected": -202.4844512939453, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": 1.1835540533065796, "rewards/margins": 3.2686262130737305, "rewards/rejected": -2.0850722789764404, "step": 527 }, { "epoch": 0.12, "learning_rate": 9.978961723847093e-06, "logits/chosen": -0.827333927154541, "logits/rejected": -0.7353221774101257, "logps/chosen": -80.0083236694336, "logps/rejected": -188.28244018554688, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.5837600827217102, "rewards/margins": 5.2451276779174805, "rewards/rejected": -5.828887939453125, "step": 528 }, { "epoch": 0.12, "learning_rate": 9.978797157785752e-06, "logits/chosen": -0.7910352945327759, "logits/rejected": -0.6820618510246277, "logps/chosen": -206.80661010742188, "logps/rejected": -176.231201171875, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": 3.4168121814727783, "rewards/margins": 4.454812526702881, "rewards/rejected": -1.038000464439392, "step": 529 }, { "epoch": 0.12, "learning_rate": 9.978631951960942e-06, "logits/chosen": -0.49323928356170654, "logits/rejected": 0.04159851372241974, "logps/chosen": -125.86226654052734, "logps/rejected": -348.8887939453125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.8361320495605469, "rewards/margins": 19.928874969482422, "rewards/rejected": -19.092742919921875, "step": 530 }, { "epoch": 0.12, "learning_rate": 9.978466106393896e-06, "logits/chosen": -0.531879186630249, "logits/rejected": -0.461507111787796, "logps/chosen": -104.98174285888672, "logps/rejected": -122.84970092773438, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": 0.2706436216831207, "rewards/margins": 3.140089511871338, "rewards/rejected": -2.86944580078125, "step": 531 }, { "epoch": 0.12, "learning_rate": 9.978299621105924e-06, "logits/chosen": -0.8577405214309692, "logits/rejected": -0.8595219254493713, "logps/chosen": -127.45433044433594, "logps/rejected": -185.7558135986328, "loss": 0.128, "rewards/accuracies": 1.0, "rewards/chosen": -1.3080971240997314, "rewards/margins": 4.676745414733887, "rewards/rejected": -5.984842777252197, "step": 532 }, { "epoch": 0.12, "learning_rate": 9.978132496118418e-06, "logits/chosen": -0.4634995460510254, "logits/rejected": -0.4071389138698578, "logps/chosen": -236.5218048095703, "logps/rejected": -121.48963165283203, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": 1.4535903930664062, "rewards/margins": 4.972501754760742, "rewards/rejected": -3.518911123275757, "step": 533 }, { "epoch": 0.12, "learning_rate": 9.977964731452852e-06, "logits/chosen": -0.6904398202896118, "logits/rejected": -0.6671662330627441, "logps/chosen": -116.30360412597656, "logps/rejected": -206.23483276367188, "loss": 0.4715, "rewards/accuracies": 0.0, "rewards/chosen": -0.3094840943813324, "rewards/margins": -0.41445082426071167, "rewards/rejected": 0.10496673732995987, "step": 534 }, { "epoch": 0.12, "learning_rate": 9.977796327130786e-06, "logits/chosen": -0.9138378500938416, "logits/rejected": -0.9549282789230347, "logps/chosen": -169.98614501953125, "logps/rejected": -187.9864501953125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 1.683380126953125, "rewards/margins": 7.028225898742676, "rewards/rejected": -5.344845771789551, "step": 535 }, { "epoch": 0.12, "learning_rate": 9.977627283173858e-06, "logits/chosen": -0.3413739800453186, "logits/rejected": -0.3413739800453186, "logps/chosen": -72.09425354003906, "logps/rejected": -72.09425354003906, "loss": 0.4367, "rewards/accuracies": 0.0, "rewards/chosen": -0.6365295648574829, "rewards/margins": 0.0, "rewards/rejected": -0.6365295648574829, "step": 536 }, { "epoch": 0.12, "learning_rate": 9.97745759960379e-06, "logits/chosen": -0.44318023324012756, "logits/rejected": -0.36651164293289185, "logps/chosen": -69.47000885009766, "logps/rejected": -221.4317626953125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 0.0028869628440588713, "rewards/margins": 6.181568622589111, "rewards/rejected": -6.17868185043335, "step": 537 }, { "epoch": 0.12, "learning_rate": 9.977287276442385e-06, "logits/chosen": -0.73805832862854, "logits/rejected": -0.7459003329277039, "logps/chosen": -40.126277923583984, "logps/rejected": -47.02195739746094, "loss": 0.2859, "rewards/accuracies": 1.0, "rewards/chosen": 0.16629448533058167, "rewards/margins": 1.0751250982284546, "rewards/rejected": -0.9088306427001953, "step": 538 }, { "epoch": 0.12, "learning_rate": 9.97711631371153e-06, "logits/chosen": -0.3979322016239166, "logits/rejected": -0.4000396728515625, "logps/chosen": -166.51197814941406, "logps/rejected": -134.95335388183594, "loss": 0.485, "rewards/accuracies": 0.0, "rewards/chosen": -4.999547481536865, "rewards/margins": -0.492612361907959, "rewards/rejected": -4.506935119628906, "step": 539 }, { "epoch": 0.12, "learning_rate": 9.976944711433194e-06, "logits/chosen": -0.5478523969650269, "logits/rejected": -0.5699415802955627, "logps/chosen": -243.94113159179688, "logps/rejected": -79.46397399902344, "loss": 0.4967, "rewards/accuracies": 0.0, "rewards/chosen": -1.2062851190567017, "rewards/margins": -0.3279876708984375, "rewards/rejected": -0.8782974481582642, "step": 540 }, { "epoch": 0.12, "learning_rate": 9.976772469629428e-06, "logits/chosen": -0.7618892788887024, "logits/rejected": -0.7358133792877197, "logps/chosen": -77.45536804199219, "logps/rejected": -108.62104034423828, "loss": 0.8268, "rewards/accuracies": 1.0, "rewards/chosen": -0.691693127155304, "rewards/margins": 1.6314506530761719, "rewards/rejected": -2.323143720626831, "step": 541 }, { "epoch": 0.12, "learning_rate": 9.976599588322362e-06, "logits/chosen": -0.7130159139633179, "logits/rejected": -0.7181368470191956, "logps/chosen": -74.08792114257812, "logps/rejected": -28.423011779785156, "loss": 0.2621, "rewards/accuracies": 1.0, "rewards/chosen": -0.5790039300918579, "rewards/margins": 0.4074276089668274, "rewards/rejected": -0.9864315390586853, "step": 542 }, { "epoch": 0.12, "learning_rate": 9.976426067534212e-06, "logits/chosen": -0.5789256691932678, "logits/rejected": -0.5550122857093811, "logps/chosen": -97.8905258178711, "logps/rejected": -270.85986328125, "loss": 0.109, "rewards/accuracies": 1.0, "rewards/chosen": -0.20698928833007812, "rewards/margins": 1.6405738592147827, "rewards/rejected": -1.8475631475448608, "step": 543 }, { "epoch": 0.12, "learning_rate": 9.976251907287277e-06, "logits/chosen": -0.45319947600364685, "logits/rejected": -0.39891526103019714, "logps/chosen": -75.39026641845703, "logps/rejected": -214.0352783203125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.3030349910259247, "rewards/margins": 7.193073749542236, "rewards/rejected": -6.890038967132568, "step": 544 }, { "epoch": 0.12, "learning_rate": 9.976077107603933e-06, "logits/chosen": -0.5079336166381836, "logits/rejected": -0.4948216676712036, "logps/chosen": -82.81365203857422, "logps/rejected": -147.82562255859375, "loss": 0.0638, "rewards/accuracies": 1.0, "rewards/chosen": 0.8915069699287415, "rewards/margins": 2.0981781482696533, "rewards/rejected": -1.206671118736267, "step": 545 }, { "epoch": 0.12, "learning_rate": 9.975901668506644e-06, "logits/chosen": -0.6202579736709595, "logits/rejected": -0.6161818504333496, "logps/chosen": -177.92193603515625, "logps/rejected": -118.86180114746094, "loss": 0.0532, "rewards/accuracies": 1.0, "rewards/chosen": 1.1934112310409546, "rewards/margins": 2.1872718334198, "rewards/rejected": -0.99386066198349, "step": 546 }, { "epoch": 0.12, "learning_rate": 9.97572559001795e-06, "logits/chosen": -0.41812241077423096, "logits/rejected": -0.41812241077423096, "logps/chosen": -53.80045700073242, "logps/rejected": -53.80045700073242, "loss": 0.3469, "rewards/accuracies": 0.0, "rewards/chosen": -2.4240682125091553, "rewards/margins": 0.0, "rewards/rejected": -2.4240682125091553, "step": 547 }, { "epoch": 0.12, "learning_rate": 9.975548872160482e-06, "logits/chosen": -0.7435089945793152, "logits/rejected": -0.6909858584403992, "logps/chosen": -117.75532531738281, "logps/rejected": -173.24493408203125, "loss": 0.086, "rewards/accuracies": 1.0, "rewards/chosen": -1.4238418340682983, "rewards/margins": 1.6756669282913208, "rewards/rejected": -3.099508762359619, "step": 548 }, { "epoch": 0.12, "learning_rate": 9.975371514956945e-06, "logits/chosen": -0.9876294732093811, "logits/rejected": -0.985566258430481, "logps/chosen": -25.886981964111328, "logps/rejected": -65.52371978759766, "loss": 0.6489, "rewards/accuracies": 0.0, "rewards/chosen": -0.7765993475914001, "rewards/margins": -0.9682894349098206, "rewards/rejected": 0.19169007241725922, "step": 549 }, { "epoch": 0.12, "learning_rate": 9.975193518430127e-06, "logits/chosen": -0.7729825973510742, "logits/rejected": -0.718802273273468, "logps/chosen": -71.57733154296875, "logps/rejected": -61.884700775146484, "loss": 0.0932, "rewards/accuracies": 1.0, "rewards/chosen": 0.8439132571220398, "rewards/margins": 2.5030314922332764, "rewards/rejected": -1.6591182947158813, "step": 550 }, { "epoch": 0.12, "learning_rate": 9.9750148826029e-06, "logits/chosen": -0.4407617449760437, "logits/rejected": -0.4402139484882355, "logps/chosen": -114.768798828125, "logps/rejected": -46.32079315185547, "loss": 0.7053, "rewards/accuracies": 0.0, "rewards/chosen": -3.4199721813201904, "rewards/margins": -1.1304433345794678, "rewards/rejected": -2.2895288467407227, "step": 551 }, { "epoch": 0.12, "learning_rate": 9.974835607498224e-06, "logits/chosen": -0.8362148404121399, "logits/rejected": -0.8174648284912109, "logps/chosen": -70.15849304199219, "logps/rejected": -96.94276428222656, "loss": 0.6877, "rewards/accuracies": 1.0, "rewards/chosen": -0.20818786323070526, "rewards/margins": 0.9029091000556946, "rewards/rejected": -1.111096978187561, "step": 552 }, { "epoch": 0.12, "learning_rate": 9.97465569313913e-06, "logits/chosen": -0.5588389039039612, "logits/rejected": -0.47667086124420166, "logps/chosen": -139.12832641601562, "logps/rejected": -230.46002197265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.25431519746780396, "rewards/margins": 9.146820068359375, "rewards/rejected": -9.401135444641113, "step": 553 }, { "epoch": 0.12, "learning_rate": 9.974475139548738e-06, "logits/chosen": -0.7992130517959595, "logits/rejected": -0.6882429122924805, "logps/chosen": -159.13204956054688, "logps/rejected": -291.118896484375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 2.1241912841796875, "rewards/margins": 8.304208755493164, "rewards/rejected": -6.180016994476318, "step": 554 }, { "epoch": 0.12, "learning_rate": 9.97429394675025e-06, "logits/chosen": -0.6631167531013489, "logits/rejected": -0.5627027750015259, "logps/chosen": -71.28030395507812, "logps/rejected": -47.27032470703125, "loss": 0.0447, "rewards/accuracies": 1.0, "rewards/chosen": 0.8418548703193665, "rewards/margins": 2.3984432220458984, "rewards/rejected": -1.5565884113311768, "step": 555 }, { "epoch": 0.12, "learning_rate": 9.974112114766945e-06, "logits/chosen": -0.6055999398231506, "logits/rejected": -0.5530303716659546, "logps/chosen": -248.49246215820312, "logps/rejected": -44.321075439453125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": 2.8500335216522217, "rewards/margins": 5.182438850402832, "rewards/rejected": -2.3324053287506104, "step": 556 }, { "epoch": 0.12, "learning_rate": 9.973929643622194e-06, "logits/chosen": -0.7193117141723633, "logits/rejected": -0.6858044862747192, "logps/chosen": -153.65847778320312, "logps/rejected": -147.08209228515625, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": -0.661224365234375, "rewards/margins": 4.106536865234375, "rewards/rejected": -4.76776123046875, "step": 557 }, { "epoch": 0.12, "learning_rate": 9.973746533339438e-06, "logits/chosen": -0.3178046643733978, "logits/rejected": -0.30865150690078735, "logps/chosen": -79.37213134765625, "logps/rejected": -85.29435729980469, "loss": 0.4367, "rewards/accuracies": 1.0, "rewards/chosen": 0.245198056101799, "rewards/margins": 0.8147934079170227, "rewards/rejected": -0.5695953369140625, "step": 558 }, { "epoch": 0.12, "learning_rate": 9.97356278394221e-06, "logits/chosen": -0.4249363839626312, "logits/rejected": -0.3800058662891388, "logps/chosen": -76.58409118652344, "logps/rejected": -111.66057586669922, "loss": 0.1668, "rewards/accuracies": 1.0, "rewards/chosen": -1.1234298944473267, "rewards/margins": 1.098168969154358, "rewards/rejected": -2.2215988636016846, "step": 559 }, { "epoch": 0.12, "learning_rate": 9.973378395454121e-06, "logits/chosen": -0.5918751955032349, "logits/rejected": -0.5611302852630615, "logps/chosen": -123.83504486083984, "logps/rejected": -153.9122314453125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 1.8581489324569702, "rewards/margins": 5.990818023681641, "rewards/rejected": -4.132668972015381, "step": 560 }, { "epoch": 0.12, "learning_rate": 9.973193367898863e-06, "logits/chosen": -0.891991376876831, "logits/rejected": -0.8860205411911011, "logps/chosen": -79.28285217285156, "logps/rejected": -63.97467041015625, "loss": 0.1274, "rewards/accuracies": 1.0, "rewards/chosen": -0.2966606318950653, "rewards/margins": 1.237431287765503, "rewards/rejected": -1.5340919494628906, "step": 561 }, { "epoch": 0.12, "learning_rate": 9.973007701300214e-06, "logits/chosen": -0.9521768689155579, "logits/rejected": -0.9655455946922302, "logps/chosen": -124.34764862060547, "logps/rejected": -138.92958068847656, "loss": 0.1913, "rewards/accuracies": 1.0, "rewards/chosen": -4.671075344085693, "rewards/margins": 0.8446817398071289, "rewards/rejected": -5.515757083892822, "step": 562 }, { "epoch": 0.12, "learning_rate": 9.972821395682029e-06, "logits/chosen": -0.8203745484352112, "logits/rejected": -0.7752024531364441, "logps/chosen": -121.18783569335938, "logps/rejected": -127.43165588378906, "loss": 0.8952, "rewards/accuracies": 0.0, "rewards/chosen": -0.6115165948867798, "rewards/margins": -1.5965735912322998, "rewards/rejected": 0.9850570559501648, "step": 563 }, { "epoch": 0.12, "learning_rate": 9.972634451068248e-06, "logits/chosen": -1.0970451831817627, "logits/rejected": -1.1981042623519897, "logps/chosen": -146.37960815429688, "logps/rejected": -48.42169189453125, "loss": 0.2152, "rewards/accuracies": 1.0, "rewards/chosen": -1.8125686645507812, "rewards/margins": 0.6265742778778076, "rewards/rejected": -2.439142942428589, "step": 564 }, { "epoch": 0.13, "learning_rate": 9.972446867482896e-06, "logits/chosen": -0.8064634799957275, "logits/rejected": -0.747029185295105, "logps/chosen": -171.074462890625, "logps/rejected": -136.121337890625, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": 0.6565521359443665, "rewards/margins": 4.072272777557373, "rewards/rejected": -3.4157204627990723, "step": 565 }, { "epoch": 0.13, "learning_rate": 9.972258644950074e-06, "logits/chosen": -0.6996528506278992, "logits/rejected": -0.7034784555435181, "logps/chosen": -78.9278564453125, "logps/rejected": -124.830078125, "loss": 0.0949, "rewards/accuracies": 1.0, "rewards/chosen": 1.0320602655410767, "rewards/margins": 1.662909746170044, "rewards/rejected": -0.6308494806289673, "step": 566 }, { "epoch": 0.13, "learning_rate": 9.97206978349397e-06, "logits/chosen": -0.5791073441505432, "logits/rejected": -0.5471688508987427, "logps/chosen": -48.198272705078125, "logps/rejected": -33.777122497558594, "loss": 0.1032, "rewards/accuracies": 1.0, "rewards/chosen": 0.4256324768066406, "rewards/margins": 1.5558452606201172, "rewards/rejected": -1.1302127838134766, "step": 567 }, { "epoch": 0.13, "learning_rate": 9.971880283138849e-06, "logits/chosen": -0.7549297213554382, "logits/rejected": -0.7208660840988159, "logps/chosen": -134.46104431152344, "logps/rejected": -301.7008972167969, "loss": 0.0757, "rewards/accuracies": 1.0, "rewards/chosen": 1.8488022089004517, "rewards/margins": 16.14240264892578, "rewards/rejected": -14.293601036071777, "step": 568 }, { "epoch": 0.13, "learning_rate": 9.971690143909066e-06, "logits/chosen": -0.30785125494003296, "logits/rejected": -0.2923506200313568, "logps/chosen": -176.79736328125, "logps/rejected": -132.23814392089844, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": 1.2541321516036987, "rewards/margins": 3.5949831008911133, "rewards/rejected": -2.340850830078125, "step": 569 }, { "epoch": 0.13, "learning_rate": 9.971499365829049e-06, "logits/chosen": -0.5149192214012146, "logits/rejected": -0.5671703815460205, "logps/chosen": -201.27182006835938, "logps/rejected": -175.69473266601562, "loss": 0.2982, "rewards/accuracies": 1.0, "rewards/chosen": -5.150219917297363, "rewards/margins": 0.7662911415100098, "rewards/rejected": -5.916511058807373, "step": 570 }, { "epoch": 0.13, "learning_rate": 9.971307948923316e-06, "logits/chosen": -0.6733738780021667, "logits/rejected": -0.7022985816001892, "logps/chosen": -173.27914428710938, "logps/rejected": -214.56692504882812, "loss": 0.1928, "rewards/accuracies": 1.0, "rewards/chosen": -2.5359559059143066, "rewards/margins": 2.937234401702881, "rewards/rejected": -5.4731903076171875, "step": 571 }, { "epoch": 0.13, "learning_rate": 9.971115893216463e-06, "logits/chosen": -0.71466064453125, "logits/rejected": -0.6448569893836975, "logps/chosen": -75.61909484863281, "logps/rejected": -129.0181884765625, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": 0.87628173828125, "rewards/margins": 4.059700012207031, "rewards/rejected": -3.1834182739257812, "step": 572 }, { "epoch": 0.13, "learning_rate": 9.970923198733167e-06, "logits/chosen": -0.45341527462005615, "logits/rejected": -0.430344820022583, "logps/chosen": -95.2536849975586, "logps/rejected": -97.26507568359375, "loss": 0.1337, "rewards/accuracies": 1.0, "rewards/chosen": -0.6784157156944275, "rewards/margins": 1.1830604076385498, "rewards/rejected": -1.861476182937622, "step": 573 }, { "epoch": 0.13, "learning_rate": 9.97072986549819e-06, "logits/chosen": -0.6132628917694092, "logits/rejected": -0.580146849155426, "logps/chosen": -155.31466674804688, "logps/rejected": -159.360107421875, "loss": 0.126, "rewards/accuracies": 1.0, "rewards/chosen": 1.2270797491073608, "rewards/margins": 1.250102162361145, "rewards/rejected": -0.02302246168255806, "step": 574 }, { "epoch": 0.13, "learning_rate": 9.970535893536375e-06, "logits/chosen": -0.83331298828125, "logits/rejected": -0.7841315269470215, "logps/chosen": -90.71029663085938, "logps/rejected": -118.62601470947266, "loss": 0.1313, "rewards/accuracies": 1.0, "rewards/chosen": -2.4273879528045654, "rewards/margins": 2.3112094402313232, "rewards/rejected": -4.738597393035889, "step": 575 }, { "epoch": 0.13, "learning_rate": 9.970341282872645e-06, "logits/chosen": -0.891575813293457, "logits/rejected": -0.891575813293457, "logps/chosen": -96.0318603515625, "logps/rejected": -96.0318603515625, "loss": 0.3532, "rewards/accuracies": 0.0, "rewards/chosen": -0.8034637570381165, "rewards/margins": 0.0, "rewards/rejected": -0.8034637570381165, "step": 576 }, { "epoch": 0.13, "learning_rate": 9.97014603353201e-06, "logits/chosen": -0.5157510042190552, "logits/rejected": -0.5026519894599915, "logps/chosen": -193.66690063476562, "logps/rejected": -62.29485321044922, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": 2.0345535278320312, "rewards/margins": 3.8331351280212402, "rewards/rejected": -1.7985817193984985, "step": 577 }, { "epoch": 0.13, "learning_rate": 9.969950145539557e-06, "logits/chosen": -0.6382860541343689, "logits/rejected": -0.6558476686477661, "logps/chosen": -123.03097534179688, "logps/rejected": -161.93637084960938, "loss": 0.6797, "rewards/accuracies": 0.0, "rewards/chosen": -0.07429733127355576, "rewards/margins": -1.0622673034667969, "rewards/rejected": 0.9879699945449829, "step": 578 }, { "epoch": 0.13, "learning_rate": 9.969753618920456e-06, "logits/chosen": -0.821486234664917, "logits/rejected": -0.850084125995636, "logps/chosen": -176.1470489501953, "logps/rejected": -138.07981872558594, "loss": 0.2485, "rewards/accuracies": 1.0, "rewards/chosen": 1.0211304426193237, "rewards/margins": 4.928936958312988, "rewards/rejected": -3.907806396484375, "step": 579 }, { "epoch": 0.13, "learning_rate": 9.969556453699966e-06, "logits/chosen": -0.6829347014427185, "logits/rejected": -0.5990414023399353, "logps/chosen": -91.35401916503906, "logps/rejected": -167.1944122314453, "loss": 0.3469, "rewards/accuracies": 1.0, "rewards/chosen": 1.688635230064392, "rewards/margins": 7.250429153442383, "rewards/rejected": -5.561793804168701, "step": 580 }, { "epoch": 0.13, "learning_rate": 9.969358649903415e-06, "logits/chosen": -0.8428787589073181, "logits/rejected": -0.8182762265205383, "logps/chosen": -81.02518463134766, "logps/rejected": -136.5063018798828, "loss": 0.4488, "rewards/accuracies": 0.0, "rewards/chosen": -0.89779132604599, "rewards/margins": -0.3718864917755127, "rewards/rejected": -0.5259048342704773, "step": 581 }, { "epoch": 0.13, "learning_rate": 9.969160207556225e-06, "logits/chosen": -0.6053124666213989, "logits/rejected": -0.5971749424934387, "logps/chosen": -86.13220977783203, "logps/rejected": -31.40471076965332, "loss": 0.4686, "rewards/accuracies": 1.0, "rewards/chosen": 0.5739464163780212, "rewards/margins": 1.4083929061889648, "rewards/rejected": -0.8344465494155884, "step": 582 }, { "epoch": 0.13, "learning_rate": 9.968961126683893e-06, "logits/chosen": -1.0700912475585938, "logits/rejected": -0.9714859127998352, "logps/chosen": -84.17849731445312, "logps/rejected": -159.07615661621094, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": 0.06590576469898224, "rewards/margins": 2.727879524230957, "rewards/rejected": -2.661973714828491, "step": 583 }, { "epoch": 0.13, "learning_rate": 9.968761407312002e-06, "logits/chosen": -0.7196212410926819, "logits/rejected": -0.5476186871528625, "logps/chosen": -185.07594299316406, "logps/rejected": -276.6474609375, "loss": 0.347, "rewards/accuracies": 1.0, "rewards/chosen": 2.209930419921875, "rewards/margins": 7.100625514984131, "rewards/rejected": -4.890695095062256, "step": 584 }, { "epoch": 0.13, "learning_rate": 9.968561049466214e-06, "logits/chosen": -0.6591249704360962, "logits/rejected": -0.6086928844451904, "logps/chosen": -105.86321258544922, "logps/rejected": -154.07594299316406, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.4078781306743622, "rewards/margins": 5.776369571685791, "rewards/rejected": -6.1842474937438965, "step": 585 }, { "epoch": 0.13, "learning_rate": 9.968360053172275e-06, "logits/chosen": -0.6047197580337524, "logits/rejected": -0.6077901124954224, "logps/chosen": -124.09578704833984, "logps/rejected": -80.74024200439453, "loss": 0.3237, "rewards/accuracies": 1.0, "rewards/chosen": 0.4289131164550781, "rewards/margins": 0.16519621014595032, "rewards/rejected": 0.2637169063091278, "step": 586 }, { "epoch": 0.13, "learning_rate": 9.968158418456013e-06, "logits/chosen": -0.7068358063697815, "logits/rejected": -0.7068358063697815, "logps/chosen": -131.02684020996094, "logps/rejected": -131.02684020996094, "loss": 1.1533, "rewards/accuracies": 0.0, "rewards/chosen": -3.6153838634490967, "rewards/margins": 0.0, "rewards/rejected": -3.6153838634490967, "step": 587 }, { "epoch": 0.13, "learning_rate": 9.967956145343339e-06, "logits/chosen": -0.35646456480026245, "logits/rejected": -0.3726999759674072, "logps/chosen": -101.71627044677734, "logps/rejected": -91.26908874511719, "loss": 1.3646, "rewards/accuracies": 0.0, "rewards/chosen": -3.9134910106658936, "rewards/margins": -1.8960762023925781, "rewards/rejected": -2.0174148082733154, "step": 588 }, { "epoch": 0.13, "learning_rate": 9.96775323386024e-06, "logits/chosen": -0.47102540731430054, "logits/rejected": -0.42540591955184937, "logps/chosen": -137.65554809570312, "logps/rejected": -132.70632934570312, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -0.080902099609375, "rewards/margins": 3.318291425704956, "rewards/rejected": -3.399193525314331, "step": 589 }, { "epoch": 0.13, "learning_rate": 9.967549684032796e-06, "logits/chosen": -0.9883081912994385, "logits/rejected": -1.0122313499450684, "logps/chosen": -158.02853393554688, "logps/rejected": -163.85488891601562, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": 1.438970923423767, "rewards/margins": 7.227560520172119, "rewards/rejected": -5.7885894775390625, "step": 590 }, { "epoch": 0.13, "learning_rate": 9.967345495887157e-06, "logits/chosen": -0.7786279320716858, "logits/rejected": -0.7794106006622314, "logps/chosen": -83.4147720336914, "logps/rejected": -230.97027587890625, "loss": 0.5643, "rewards/accuracies": 1.0, "rewards/chosen": 0.5056602358818054, "rewards/margins": 3.469641923904419, "rewards/rejected": -2.9639816284179688, "step": 591 }, { "epoch": 0.13, "learning_rate": 9.967140669449562e-06, "logits/chosen": -0.7271188497543335, "logits/rejected": -0.00932527519762516, "logps/chosen": -85.29789733886719, "logps/rejected": -242.99974060058594, "loss": 0.2289, "rewards/accuracies": 1.0, "rewards/chosen": 0.17845001816749573, "rewards/margins": 12.667667388916016, "rewards/rejected": -12.489217758178711, "step": 592 }, { "epoch": 0.13, "learning_rate": 9.966935204746332e-06, "logits/chosen": -0.6368160843849182, "logits/rejected": -0.5619107484817505, "logps/chosen": -129.48065185546875, "logps/rejected": -181.05474853515625, "loss": 0.0762, "rewards/accuracies": 1.0, "rewards/chosen": 0.744519054889679, "rewards/margins": 1.812265157699585, "rewards/rejected": -1.0677460432052612, "step": 593 }, { "epoch": 0.13, "learning_rate": 9.966729101803872e-06, "logits/chosen": -0.9739222526550293, "logits/rejected": -1.0178691148757935, "logps/chosen": -221.97891235351562, "logps/rejected": -80.76995849609375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 4.944827556610107, "rewards/margins": 8.975384712219238, "rewards/rejected": -4.030557155609131, "step": 594 }, { "epoch": 0.13, "learning_rate": 9.966522360648659e-06, "logits/chosen": -0.7281407713890076, "logits/rejected": -0.6657847762107849, "logps/chosen": -106.98793029785156, "logps/rejected": -169.53001403808594, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": 0.9245972037315369, "rewards/margins": 4.403246879577637, "rewards/rejected": -3.478649854660034, "step": 595 }, { "epoch": 0.13, "learning_rate": 9.966314981307261e-06, "logits/chosen": -0.45519453287124634, "logits/rejected": -0.3518575429916382, "logps/chosen": -85.07466888427734, "logps/rejected": -215.32699584960938, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -1.7757469415664673, "rewards/margins": 7.516022205352783, "rewards/rejected": -9.291769027709961, "step": 596 }, { "epoch": 0.13, "learning_rate": 9.96610696380633e-06, "logits/chosen": -0.8111838102340698, "logits/rejected": -0.8084216713905334, "logps/chosen": -82.87492370605469, "logps/rejected": -91.91735076904297, "loss": 0.3185, "rewards/accuracies": 1.0, "rewards/chosen": -0.6210388541221619, "rewards/margins": 0.24225538969039917, "rewards/rejected": -0.863294243812561, "step": 597 }, { "epoch": 0.13, "learning_rate": 9.965898308172589e-06, "logits/chosen": -0.9147120714187622, "logits/rejected": -0.8875775933265686, "logps/chosen": -143.4915771484375, "logps/rejected": -152.7710418701172, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": 1.8754853010177612, "rewards/margins": 5.886160373687744, "rewards/rejected": -4.010674953460693, "step": 598 }, { "epoch": 0.13, "learning_rate": 9.965689014432854e-06, "logits/chosen": -0.7752406597137451, "logits/rejected": -0.8170570731163025, "logps/chosen": -97.87582397460938, "logps/rejected": -79.538330078125, "loss": 0.3886, "rewards/accuracies": 1.0, "rewards/chosen": 0.6153358817100525, "rewards/margins": 0.12633439898490906, "rewards/rejected": 0.48900148272514343, "step": 599 }, { "epoch": 0.13, "learning_rate": 9.965479082614019e-06, "logits/chosen": -0.6926440596580505, "logits/rejected": -0.6447159051895142, "logps/chosen": -58.96878433227539, "logps/rejected": -75.6257095336914, "loss": 0.1119, "rewards/accuracies": 1.0, "rewards/chosen": -0.24102745950222015, "rewards/margins": 1.3855540752410889, "rewards/rejected": -1.6265815496444702, "step": 600 }, { "epoch": 0.13, "learning_rate": 9.965268512743058e-06, "logits/chosen": -0.9870830178260803, "logits/rejected": -0.9300782084465027, "logps/chosen": -88.17535400390625, "logps/rejected": -166.2042236328125, "loss": 0.0985, "rewards/accuracies": 1.0, "rewards/chosen": -0.23421326279640198, "rewards/margins": 1.898046851158142, "rewards/rejected": -2.1322600841522217, "step": 601 }, { "epoch": 0.13, "learning_rate": 9.965057304847029e-06, "logits/chosen": -0.5035108923912048, "logits/rejected": -0.512421190738678, "logps/chosen": -94.15364074707031, "logps/rejected": -156.52597045898438, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -1.2802146673202515, "rewards/margins": 3.292449951171875, "rewards/rejected": -4.572664737701416, "step": 602 }, { "epoch": 0.13, "learning_rate": 9.964845458953072e-06, "logits/chosen": -0.873185396194458, "logits/rejected": -0.8841907978057861, "logps/chosen": -162.98997497558594, "logps/rejected": -139.27410888671875, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": 3.6241455078125, "rewards/margins": 3.0469069480895996, "rewards/rejected": 0.5772385001182556, "step": 603 }, { "epoch": 0.13, "learning_rate": 9.964632975088408e-06, "logits/chosen": -0.8460515141487122, "logits/rejected": -0.808098554611206, "logps/chosen": -272.9090881347656, "logps/rejected": -244.1512451171875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 3.3642303943634033, "rewards/margins": 7.840888977050781, "rewards/rejected": -4.476658821105957, "step": 604 }, { "epoch": 0.13, "learning_rate": 9.964419853280343e-06, "logits/chosen": -0.5754169225692749, "logits/rejected": -0.5404506921768188, "logps/chosen": -73.49124145507812, "logps/rejected": -159.44070434570312, "loss": 0.0513, "rewards/accuracies": 1.0, "rewards/chosen": 0.5129684805870056, "rewards/margins": 4.927146911621094, "rewards/rejected": -4.414178371429443, "step": 605 }, { "epoch": 0.13, "learning_rate": 9.96420609355626e-06, "logits/chosen": -0.6368532776832581, "logits/rejected": -0.6755903959274292, "logps/chosen": -141.70352172851562, "logps/rejected": -95.57899475097656, "loss": 1.4982, "rewards/accuracies": 0.0, "rewards/chosen": -0.5554870963096619, "rewards/margins": -1.0762726068496704, "rewards/rejected": 0.5207855105400085, "step": 606 }, { "epoch": 0.13, "learning_rate": 9.963991695943627e-06, "logits/chosen": -0.7463091015815735, "logits/rejected": -0.7054617404937744, "logps/chosen": -98.26402282714844, "logps/rejected": -113.39689636230469, "loss": 0.0491, "rewards/accuracies": 1.0, "rewards/chosen": 0.6963836550712585, "rewards/margins": 2.4197487831115723, "rewards/rejected": -1.723365068435669, "step": 607 }, { "epoch": 0.13, "learning_rate": 9.963776660469996e-06, "logits/chosen": -0.8955424427986145, "logits/rejected": -0.9066426157951355, "logps/chosen": -105.18397521972656, "logps/rejected": -82.69798278808594, "loss": 0.0507, "rewards/accuracies": 1.0, "rewards/chosen": 0.6083335876464844, "rewards/margins": 2.715892791748047, "rewards/rejected": -2.1075592041015625, "step": 608 }, { "epoch": 0.13, "learning_rate": 9.963560987162994e-06, "logits/chosen": -0.6671463847160339, "logits/rejected": -0.6615098714828491, "logps/chosen": -65.37678527832031, "logps/rejected": -55.297454833984375, "loss": 0.1778, "rewards/accuracies": 1.0, "rewards/chosen": -0.43266984820365906, "rewards/margins": 0.9591426849365234, "rewards/rejected": -1.3918125629425049, "step": 609 }, { "epoch": 0.14, "learning_rate": 9.96334467605034e-06, "logits/chosen": -0.6519451141357422, "logits/rejected": -0.6481984257698059, "logps/chosen": -70.329345703125, "logps/rejected": -132.64166259765625, "loss": 0.2131, "rewards/accuracies": 1.0, "rewards/chosen": -0.5716087222099304, "rewards/margins": 0.6350060105323792, "rewards/rejected": -1.2066147327423096, "step": 610 }, { "epoch": 0.14, "learning_rate": 9.963127727159825e-06, "logits/chosen": -0.3152466118335724, "logits/rejected": -0.3485489785671234, "logps/chosen": -117.91090393066406, "logps/rejected": -137.02696228027344, "loss": 0.399, "rewards/accuracies": 0.0, "rewards/chosen": -2.252977132797241, "rewards/margins": -0.199249267578125, "rewards/rejected": -2.053727865219116, "step": 611 }, { "epoch": 0.14, "learning_rate": 9.962910140519328e-06, "logits/chosen": -0.24608831107616425, "logits/rejected": -0.2611187696456909, "logps/chosen": -142.0095672607422, "logps/rejected": -133.00604248046875, "loss": 0.8983, "rewards/accuracies": 0.0, "rewards/chosen": -0.6293121576309204, "rewards/margins": -1.6151397228240967, "rewards/rejected": 0.985827624797821, "step": 612 }, { "epoch": 0.14, "learning_rate": 9.96269191615681e-06, "logits/chosen": -1.0177814960479736, "logits/rejected": -0.9871042966842651, "logps/chosen": -81.62661743164062, "logps/rejected": -149.9569549560547, "loss": 0.0523, "rewards/accuracies": 1.0, "rewards/chosen": -0.3577438294887543, "rewards/margins": 2.610102891921997, "rewards/rejected": -2.967846632003784, "step": 613 }, { "epoch": 0.14, "learning_rate": 9.96247305410031e-06, "logits/chosen": -0.7646725177764893, "logits/rejected": -0.6615705490112305, "logps/chosen": -203.72589111328125, "logps/rejected": -172.86512756347656, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 2.5729997158050537, "rewards/margins": 9.954874992370605, "rewards/rejected": -7.381875038146973, "step": 614 }, { "epoch": 0.14, "learning_rate": 9.962253554377952e-06, "logits/chosen": -0.8798484802246094, "logits/rejected": -0.7812602519989014, "logps/chosen": -95.9911117553711, "logps/rejected": -205.89468383789062, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 1.1186516284942627, "rewards/margins": 7.777514457702637, "rewards/rejected": -6.658862590789795, "step": 615 }, { "epoch": 0.14, "learning_rate": 9.96203341701794e-06, "logits/chosen": -0.5546799302101135, "logits/rejected": -0.5511565208435059, "logps/chosen": -109.42475891113281, "logps/rejected": -128.7490234375, "loss": 0.1031, "rewards/accuracies": 1.0, "rewards/chosen": -4.111735820770264, "rewards/margins": 1.4813156127929688, "rewards/rejected": -5.593051433563232, "step": 616 }, { "epoch": 0.14, "learning_rate": 9.961812642048563e-06, "logits/chosen": -0.5370333194732666, "logits/rejected": -0.5527738332748413, "logps/chosen": -215.5774383544922, "logps/rejected": -172.37127685546875, "loss": 0.5807, "rewards/accuracies": 0.0, "rewards/chosen": 0.7426925897598267, "rewards/margins": -0.7804549932479858, "rewards/rejected": 1.5231475830078125, "step": 617 }, { "epoch": 0.14, "learning_rate": 9.961591229498192e-06, "logits/chosen": -0.6865578889846802, "logits/rejected": -0.7105041146278381, "logps/chosen": -165.4117431640625, "logps/rejected": -130.18508911132812, "loss": 0.1674, "rewards/accuracies": 1.0, "rewards/chosen": 0.331106573343277, "rewards/margins": 1.0503616333007812, "rewards/rejected": -0.7192550897598267, "step": 618 }, { "epoch": 0.14, "learning_rate": 9.96136917939527e-06, "logits/chosen": -0.42688193917274475, "logits/rejected": -0.36801934242248535, "logps/chosen": -83.81946563720703, "logps/rejected": -90.34321594238281, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": 1.3748406171798706, "rewards/margins": 3.1757469177246094, "rewards/rejected": -1.8009064197540283, "step": 619 }, { "epoch": 0.14, "learning_rate": 9.961146491768338e-06, "logits/chosen": -0.6929846405982971, "logits/rejected": -0.6563168168067932, "logps/chosen": -96.06368255615234, "logps/rejected": -136.172119140625, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/chosen": 0.3548935055732727, "rewards/margins": 3.552806854248047, "rewards/rejected": -3.197913408279419, "step": 620 }, { "epoch": 0.14, "learning_rate": 9.96092316664601e-06, "logits/chosen": -0.8494648337364197, "logits/rejected": -0.8792082071304321, "logps/chosen": -103.62037658691406, "logps/rejected": -91.69540405273438, "loss": 0.2318, "rewards/accuracies": 1.0, "rewards/chosen": 1.3461945056915283, "rewards/margins": 1.4024971723556519, "rewards/rejected": -0.056302644312381744, "step": 621 }, { "epoch": 0.14, "learning_rate": 9.960699204056978e-06, "logits/chosen": -0.3580070734024048, "logits/rejected": -0.39344826340675354, "logps/chosen": -135.40475463867188, "logps/rejected": -174.55299377441406, "loss": 1.9209, "rewards/accuracies": 0.0, "rewards/chosen": -2.4666390419006348, "rewards/margins": -3.7889442443847656, "rewards/rejected": 1.3223053216934204, "step": 622 }, { "epoch": 0.14, "learning_rate": 9.960474604030026e-06, "logits/chosen": -0.5716351270675659, "logits/rejected": -0.5821781158447266, "logps/chosen": -34.02873992919922, "logps/rejected": -102.00434875488281, "loss": 0.3439, "rewards/accuracies": 1.0, "rewards/chosen": -0.3663295805454254, "rewards/margins": 0.010792911052703857, "rewards/rejected": -0.3771224915981293, "step": 623 }, { "epoch": 0.14, "learning_rate": 9.96024936659401e-06, "logits/chosen": -0.557041347026825, "logits/rejected": -0.5632598400115967, "logps/chosen": -123.61965942382812, "logps/rejected": -196.29290771484375, "loss": 0.5135, "rewards/accuracies": 0.0, "rewards/chosen": 0.4163146913051605, "rewards/margins": -0.4573913514614105, "rewards/rejected": 0.873706042766571, "step": 624 }, { "epoch": 0.14, "learning_rate": 9.960023491777875e-06, "logits/chosen": -0.9474216103553772, "logits/rejected": -0.9652628898620605, "logps/chosen": -71.46347045898438, "logps/rejected": -78.65605163574219, "loss": 0.2221, "rewards/accuracies": 1.0, "rewards/chosen": 0.46787795424461365, "rewards/margins": 1.5314819812774658, "rewards/rejected": -1.0636039972305298, "step": 625 }, { "epoch": 0.14, "learning_rate": 9.959796979610646e-06, "logits/chosen": -0.8531383275985718, "logits/rejected": -0.7907811403274536, "logps/chosen": -115.48835754394531, "logps/rejected": -165.11561584472656, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": 1.4716873168945312, "rewards/margins": 6.396158695220947, "rewards/rejected": -4.924471378326416, "step": 626 }, { "epoch": 0.14, "learning_rate": 9.959569830121427e-06, "logits/chosen": -0.8520106673240662, "logits/rejected": -0.8090077638626099, "logps/chosen": -75.63311767578125, "logps/rejected": -184.03929138183594, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": 1.222737193107605, "rewards/margins": 2.64438796043396, "rewards/rejected": -1.421650767326355, "step": 627 }, { "epoch": 0.14, "learning_rate": 9.959342043339406e-06, "logits/chosen": -0.5250169038772583, "logits/rejected": -0.4391081929206848, "logps/chosen": -197.60353088378906, "logps/rejected": -174.0544891357422, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 2.321856737136841, "rewards/margins": 7.524084091186523, "rewards/rejected": -5.2022271156311035, "step": 628 }, { "epoch": 0.14, "learning_rate": 9.959113619293857e-06, "logits/chosen": -0.47685158252716064, "logits/rejected": -0.47498419880867004, "logps/chosen": -82.82752990722656, "logps/rejected": -106.568603515625, "loss": 0.6995, "rewards/accuracies": 0.0, "rewards/chosen": 0.269052118062973, "rewards/margins": -0.1784713864326477, "rewards/rejected": 0.4475235044956207, "step": 629 }, { "epoch": 0.14, "learning_rate": 9.958884558014128e-06, "logits/chosen": -0.8518475294113159, "logits/rejected": -0.6883416175842285, "logps/chosen": -168.39434814453125, "logps/rejected": -303.7294921875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": 1.3516632318496704, "rewards/margins": 4.719076633453369, "rewards/rejected": -3.367413282394409, "step": 630 }, { "epoch": 0.14, "learning_rate": 9.958654859529654e-06, "logits/chosen": -0.4316720962524414, "logits/rejected": 0.22066468000411987, "logps/chosen": -155.46400451660156, "logps/rejected": -397.21624755859375, "loss": 0.594, "rewards/accuracies": 1.0, "rewards/chosen": 2.4318466186523438, "rewards/margins": 15.991291999816895, "rewards/rejected": -13.55944538116455, "step": 631 }, { "epoch": 0.14, "learning_rate": 9.958424523869952e-06, "logits/chosen": -0.6188042163848877, "logits/rejected": -0.6390265226364136, "logps/chosen": -157.62403869628906, "logps/rejected": -114.53213500976562, "loss": 0.0448, "rewards/accuracies": 1.0, "rewards/chosen": 0.3056655824184418, "rewards/margins": 2.369610548019409, "rewards/rejected": -2.0639450550079346, "step": 632 }, { "epoch": 0.14, "learning_rate": 9.958193551064617e-06, "logits/chosen": -0.7054235935211182, "logits/rejected": -0.7054235935211182, "logps/chosen": -152.74903869628906, "logps/rejected": -152.74903869628906, "loss": 0.3482, "rewards/accuracies": 0.0, "rewards/chosen": -4.936402320861816, "rewards/margins": 0.0, "rewards/rejected": -4.936402320861816, "step": 633 }, { "epoch": 0.14, "learning_rate": 9.95796194114333e-06, "logits/chosen": -0.8127259016036987, "logits/rejected": -0.7636592984199524, "logps/chosen": -161.42515563964844, "logps/rejected": -272.95867919921875, "loss": 0.3936, "rewards/accuracies": 1.0, "rewards/chosen": 1.0113509893417358, "rewards/margins": 0.8037704229354858, "rewards/rejected": 0.20758056640625, "step": 634 }, { "epoch": 0.14, "learning_rate": 9.957729694135852e-06, "logits/chosen": -0.7505323886871338, "logits/rejected": -0.7254956960678101, "logps/chosen": -144.78350830078125, "logps/rejected": -195.0177001953125, "loss": 0.0578, "rewards/accuracies": 1.0, "rewards/chosen": -2.3099381923675537, "rewards/margins": 3.776641607284546, "rewards/rejected": -6.0865797996521, "step": 635 }, { "epoch": 0.14, "learning_rate": 9.957496810072027e-06, "logits/chosen": -0.7901780009269714, "logits/rejected": -0.8287870287895203, "logps/chosen": -108.02935791015625, "logps/rejected": -79.87278747558594, "loss": 0.3021, "rewards/accuracies": 1.0, "rewards/chosen": 0.27112123370170593, "rewards/margins": 0.3406219780445099, "rewards/rejected": -0.06950073689222336, "step": 636 }, { "epoch": 0.14, "learning_rate": 9.957263288981779e-06, "logits/chosen": -0.8157278299331665, "logits/rejected": -0.8184711337089539, "logps/chosen": -110.44183349609375, "logps/rejected": -76.0968017578125, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": 0.7601837515830994, "rewards/margins": 2.536390781402588, "rewards/rejected": -1.7762069702148438, "step": 637 }, { "epoch": 0.14, "learning_rate": 9.957029130895116e-06, "logits/chosen": -0.808765172958374, "logits/rejected": -0.8207703828811646, "logps/chosen": -130.59986877441406, "logps/rejected": -206.53729248046875, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.4739181697368622, "rewards/margins": 4.564921855926514, "rewards/rejected": -5.038839817047119, "step": 638 }, { "epoch": 0.14, "learning_rate": 9.956794335842126e-06, "logits/chosen": -0.40051180124282837, "logits/rejected": -0.4016727805137634, "logps/chosen": -101.22238159179688, "logps/rejected": -110.5899429321289, "loss": 0.3425, "rewards/accuracies": 1.0, "rewards/chosen": -2.1522903442382812, "rewards/margins": 0.016735076904296875, "rewards/rejected": -2.169025421142578, "step": 639 }, { "epoch": 0.14, "learning_rate": 9.956558903852978e-06, "logits/chosen": -0.8110934495925903, "logits/rejected": -0.7747716307640076, "logps/chosen": -147.45870971679688, "logps/rejected": -111.85618591308594, "loss": 0.3301, "rewards/accuracies": 1.0, "rewards/chosen": 0.3672897517681122, "rewards/margins": 1.4195542335510254, "rewards/rejected": -1.0522644519805908, "step": 640 }, { "epoch": 0.14, "learning_rate": 9.956322834957929e-06, "logits/chosen": -0.736142635345459, "logits/rejected": -0.7042021751403809, "logps/chosen": -73.74962615966797, "logps/rejected": -168.73916625976562, "loss": 0.1341, "rewards/accuracies": 1.0, "rewards/chosen": -0.4885459840297699, "rewards/margins": 1.3715095520019531, "rewards/rejected": -1.8600555658340454, "step": 641 }, { "epoch": 0.14, "learning_rate": 9.956086129187308e-06, "logits/chosen": -0.41061869263648987, "logits/rejected": -0.32717493176460266, "logps/chosen": -109.4404525756836, "logps/rejected": -45.913089752197266, "loss": 0.0994, "rewards/accuracies": 1.0, "rewards/chosen": -0.17252731323242188, "rewards/margins": 1.5189167261123657, "rewards/rejected": -1.6914440393447876, "step": 642 }, { "epoch": 0.14, "learning_rate": 9.955848786571534e-06, "logits/chosen": -1.1444005966186523, "logits/rejected": -1.1211397647857666, "logps/chosen": -124.3677978515625, "logps/rejected": -229.46121215820312, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 0.5894744992256165, "rewards/margins": 9.613530158996582, "rewards/rejected": -9.024055480957031, "step": 643 }, { "epoch": 0.14, "learning_rate": 9.955610807141105e-06, "logits/chosen": -0.5207417011260986, "logits/rejected": -0.4211256802082062, "logps/chosen": -70.65790557861328, "logps/rejected": -181.12225341796875, "loss": 0.1059, "rewards/accuracies": 1.0, "rewards/chosen": 0.11567306518554688, "rewards/margins": 6.861295223236084, "rewards/rejected": -6.745622158050537, "step": 644 }, { "epoch": 0.14, "learning_rate": 9.9553721909266e-06, "logits/chosen": -0.5532981157302856, "logits/rejected": -0.5652771592140198, "logps/chosen": -184.96530151367188, "logps/rejected": -145.96871948242188, "loss": 0.2098, "rewards/accuracies": 1.0, "rewards/chosen": -0.397796630859375, "rewards/margins": 0.6516128778457642, "rewards/rejected": -1.0494095087051392, "step": 645 }, { "epoch": 0.14, "learning_rate": 9.95513293795868e-06, "logits/chosen": -0.661278247833252, "logits/rejected": -0.6165628433227539, "logps/chosen": -153.76773071289062, "logps/rejected": -250.9885711669922, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -4.208237648010254, "rewards/margins": 5.266298294067383, "rewards/rejected": -9.474535942077637, "step": 646 }, { "epoch": 0.14, "learning_rate": 9.95489304826809e-06, "logits/chosen": -0.8295455574989319, "logits/rejected": -0.806641697883606, "logps/chosen": -182.72528076171875, "logps/rejected": -117.6801528930664, "loss": 0.068, "rewards/accuracies": 1.0, "rewards/chosen": -2.9516937732696533, "rewards/margins": 1.929569959640503, "rewards/rejected": -4.881263732910156, "step": 647 }, { "epoch": 0.14, "learning_rate": 9.954652521885656e-06, "logits/chosen": -0.6887733936309814, "logits/rejected": -0.571419358253479, "logps/chosen": -212.8786163330078, "logps/rejected": -329.47601318359375, "loss": 0.5249, "rewards/accuracies": 1.0, "rewards/chosen": 3.226149082183838, "rewards/margins": 11.217113494873047, "rewards/rejected": -7.990963935852051, "step": 648 }, { "epoch": 0.14, "learning_rate": 9.954411358842282e-06, "logits/chosen": -0.6880359053611755, "logits/rejected": -0.6862469911575317, "logps/chosen": -27.352399826049805, "logps/rejected": -71.95834350585938, "loss": 0.3252, "rewards/accuracies": 1.0, "rewards/chosen": -0.11281318962574005, "rewards/margins": 0.6074680685997009, "rewards/rejected": -0.7202812433242798, "step": 649 }, { "epoch": 0.14, "learning_rate": 9.954169559168958e-06, "logits/chosen": -0.5454768538475037, "logits/rejected": -0.4069911539554596, "logps/chosen": -231.58456420898438, "logps/rejected": -390.28057861328125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 0.6260238885879517, "rewards/margins": 5.297538757324219, "rewards/rejected": -4.671514987945557, "step": 650 }, { "epoch": 0.14, "learning_rate": 9.953927122896756e-06, "logits/chosen": -0.631294846534729, "logits/rejected": -0.6351748108863831, "logps/chosen": -248.4317626953125, "logps/rejected": -187.83795166015625, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 2.30094313621521, "rewards/margins": 4.136941909790039, "rewards/rejected": -1.83599853515625, "step": 651 }, { "epoch": 0.14, "learning_rate": 9.953684050056827e-06, "logits/chosen": -0.6009013652801514, "logits/rejected": -0.5466099381446838, "logps/chosen": -93.16041564941406, "logps/rejected": -134.86436462402344, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": 1.0270065069198608, "rewards/margins": 4.1030378341674805, "rewards/rejected": -3.076031446456909, "step": 652 }, { "epoch": 0.14, "learning_rate": 9.953440340680407e-06, "logits/chosen": -0.7014251351356506, "logits/rejected": -0.677329421043396, "logps/chosen": -200.4783172607422, "logps/rejected": -258.0850830078125, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": 0.08802337944507599, "rewards/margins": 2.6209611892700195, "rewards/rejected": -2.53293776512146, "step": 653 }, { "epoch": 0.14, "learning_rate": 9.95319599479881e-06, "logits/chosen": -0.7350561618804932, "logits/rejected": -0.774684727191925, "logps/chosen": -99.35539245605469, "logps/rejected": -32.438987731933594, "loss": 0.4919, "rewards/accuracies": 1.0, "rewards/chosen": -0.4621421992778778, "rewards/margins": 0.19512787461280823, "rewards/rejected": -0.657270073890686, "step": 654 }, { "epoch": 0.14, "learning_rate": 9.952951012443434e-06, "logits/chosen": -0.6225666403770447, "logits/rejected": -0.6507207751274109, "logps/chosen": -200.30221557617188, "logps/rejected": -84.8976058959961, "loss": 0.1905, "rewards/accuracies": 1.0, "rewards/chosen": -0.3911590576171875, "rewards/margins": 0.769080400466919, "rewards/rejected": -1.1602394580841064, "step": 655 }, { "epoch": 0.15, "learning_rate": 9.952705393645761e-06, "logits/chosen": -0.48452886939048767, "logits/rejected": -0.4516555070877075, "logps/chosen": -160.86337280273438, "logps/rejected": -148.865478515625, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/chosen": -0.782122790813446, "rewards/margins": 5.406025409698486, "rewards/rejected": -6.188148021697998, "step": 656 }, { "epoch": 0.15, "learning_rate": 9.952459138437352e-06, "logits/chosen": -0.705045223236084, "logits/rejected": -0.7139202952384949, "logps/chosen": -158.16390991210938, "logps/rejected": -82.21491241455078, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": 0.14356385171413422, "rewards/margins": 4.569860458374023, "rewards/rejected": -4.426296710968018, "step": 657 }, { "epoch": 0.15, "learning_rate": 9.952212246849847e-06, "logits/chosen": -1.2157083749771118, "logits/rejected": -1.2451750040054321, "logps/chosen": -112.58889770507812, "logps/rejected": -48.68156814575195, "loss": 0.1844, "rewards/accuracies": 1.0, "rewards/chosen": 1.3784812688827515, "rewards/margins": 2.792652130126953, "rewards/rejected": -1.4141708612442017, "step": 658 }, { "epoch": 0.15, "learning_rate": 9.951964718914972e-06, "logits/chosen": -0.8817850351333618, "logits/rejected": -0.9053859710693359, "logps/chosen": -235.354736328125, "logps/rejected": -105.78520202636719, "loss": 0.1077, "rewards/accuracies": 1.0, "rewards/chosen": 0.8023681640625, "rewards/margins": 1.4475083351135254, "rewards/rejected": -0.6451401114463806, "step": 659 }, { "epoch": 0.15, "learning_rate": 9.951716554664537e-06, "logits/chosen": -1.05990731716156, "logits/rejected": -1.0964329242706299, "logps/chosen": -108.69595336914062, "logps/rejected": -49.33992004394531, "loss": 0.3744, "rewards/accuracies": 0.0, "rewards/chosen": -2.494713544845581, "rewards/margins": -0.10822701454162598, "rewards/rejected": -2.386486530303955, "step": 660 }, { "epoch": 0.15, "learning_rate": 9.951467754130429e-06, "logits/chosen": -0.9573118686676025, "logits/rejected": -0.9480525255203247, "logps/chosen": -76.84033203125, "logps/rejected": -140.52996826171875, "loss": 0.0535, "rewards/accuracies": 1.0, "rewards/chosen": 0.14913177490234375, "rewards/margins": 2.1810340881347656, "rewards/rejected": -2.031902313232422, "step": 661 }, { "epoch": 0.15, "learning_rate": 9.951218317344615e-06, "logits/chosen": -0.7926303148269653, "logits/rejected": -0.8079673051834106, "logps/chosen": -183.3144989013672, "logps/rejected": -154.42288208007812, "loss": 0.1116, "rewards/accuracies": 1.0, "rewards/chosen": 0.8369705080986023, "rewards/margins": 2.0404725074768066, "rewards/rejected": -1.2035019397735596, "step": 662 }, { "epoch": 0.15, "learning_rate": 9.950968244339152e-06, "logits/chosen": -0.6197423338890076, "logits/rejected": -0.6348015666007996, "logps/chosen": -111.22779846191406, "logps/rejected": -139.66293334960938, "loss": 0.675, "rewards/accuracies": 1.0, "rewards/chosen": 0.6275665163993835, "rewards/margins": 1.1951217651367188, "rewards/rejected": -0.5675552487373352, "step": 663 }, { "epoch": 0.15, "learning_rate": 9.95071753514617e-06, "logits/chosen": -0.7569184899330139, "logits/rejected": -0.7324503064155579, "logps/chosen": -292.16796875, "logps/rejected": -207.58099365234375, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": -1.063079833984375, "rewards/margins": 3.0777788162231445, "rewards/rejected": -4.1408586502075195, "step": 664 }, { "epoch": 0.15, "learning_rate": 9.950466189797885e-06, "logits/chosen": -0.7630921006202698, "logits/rejected": -0.6989598870277405, "logps/chosen": -107.73334503173828, "logps/rejected": -191.51370239257812, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.603711724281311, "rewards/margins": 7.771876335144043, "rewards/rejected": -7.1681647300720215, "step": 665 }, { "epoch": 0.15, "learning_rate": 9.950214208326598e-06, "logits/chosen": -0.6747878193855286, "logits/rejected": -0.6238076686859131, "logps/chosen": -74.88680267333984, "logps/rejected": -110.17585754394531, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": 0.5682548880577087, "rewards/margins": 4.633569717407227, "rewards/rejected": -4.065314769744873, "step": 666 }, { "epoch": 0.15, "learning_rate": 9.949961590764682e-06, "logits/chosen": -0.5866501331329346, "logits/rejected": -0.5863772034645081, "logps/chosen": -86.1622314453125, "logps/rejected": -91.3672103881836, "loss": 0.0713, "rewards/accuracies": 1.0, "rewards/chosen": 0.42109987139701843, "rewards/margins": 1.8841698169708252, "rewards/rejected": -1.4630699157714844, "step": 667 }, { "epoch": 0.15, "learning_rate": 9.949708337144603e-06, "logits/chosen": -0.7119236588478088, "logits/rejected": -0.6874783039093018, "logps/chosen": -105.914306640625, "logps/rejected": -43.072261810302734, "loss": 0.1767, "rewards/accuracies": 1.0, "rewards/chosen": -1.0016746520996094, "rewards/margins": 1.131951093673706, "rewards/rejected": -2.1336257457733154, "step": 668 }, { "epoch": 0.15, "learning_rate": 9.949454447498901e-06, "logits/chosen": -0.7100079655647278, "logits/rejected": -0.7010912299156189, "logps/chosen": -71.69039154052734, "logps/rejected": -132.0387420654297, "loss": 0.0905, "rewards/accuracies": 1.0, "rewards/chosen": -0.09669723361730576, "rewards/margins": 1.6192626953125, "rewards/rejected": -1.715959906578064, "step": 669 }, { "epoch": 0.15, "learning_rate": 9.949199921860202e-06, "logits/chosen": -0.5049525499343872, "logits/rejected": -0.49229156970977783, "logps/chosen": -262.9556884765625, "logps/rejected": -186.789794921875, "loss": 0.1147, "rewards/accuracies": 1.0, "rewards/chosen": -1.9456939697265625, "rewards/margins": 1.9380309581756592, "rewards/rejected": -3.8837249279022217, "step": 670 }, { "epoch": 0.15, "learning_rate": 9.94894476026121e-06, "logits/chosen": -0.8471182584762573, "logits/rejected": -0.7713744044303894, "logps/chosen": -127.20188903808594, "logps/rejected": -130.48268127441406, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 3.2335662841796875, "rewards/margins": 5.576037406921387, "rewards/rejected": -2.3424713611602783, "step": 671 }, { "epoch": 0.15, "learning_rate": 9.948688962734711e-06, "logits/chosen": -0.5824925303459167, "logits/rejected": -0.5991524457931519, "logps/chosen": -69.9437484741211, "logps/rejected": -103.18350219726562, "loss": 0.3203, "rewards/accuracies": 1.0, "rewards/chosen": -1.0898770093917847, "rewards/margins": 0.14735984802246094, "rewards/rejected": -1.2372368574142456, "step": 672 }, { "epoch": 0.15, "learning_rate": 9.94843252931358e-06, "logits/chosen": -0.7371792793273926, "logits/rejected": -0.7371792793273926, "logps/chosen": -156.50186157226562, "logps/rejected": -156.50186157226562, "loss": 0.3488, "rewards/accuracies": 0.0, "rewards/chosen": -4.554572582244873, "rewards/margins": 0.0, "rewards/rejected": -4.554572582244873, "step": 673 }, { "epoch": 0.15, "learning_rate": 9.948175460030762e-06, "logits/chosen": -0.7560292482376099, "logits/rejected": -0.7596989274024963, "logps/chosen": -193.1011962890625, "logps/rejected": -110.3720932006836, "loss": 0.0696, "rewards/accuracies": 1.0, "rewards/chosen": -0.28364259004592896, "rewards/margins": 2.002432346343994, "rewards/rejected": -2.2860748767852783, "step": 674 }, { "epoch": 0.15, "learning_rate": 9.947917754919293e-06, "logits/chosen": -0.47713109850883484, "logits/rejected": -0.47168394923210144, "logps/chosen": -61.598182678222656, "logps/rejected": -82.84627532958984, "loss": 0.0428, "rewards/accuracies": 1.0, "rewards/chosen": 0.6289100646972656, "rewards/margins": 2.7858598232269287, "rewards/rejected": -2.156949758529663, "step": 675 }, { "epoch": 0.15, "learning_rate": 9.947659414012287e-06, "logits/chosen": -0.7475425601005554, "logits/rejected": -0.697942316532135, "logps/chosen": -142.5628204345703, "logps/rejected": -256.3187255859375, "loss": 0.1551, "rewards/accuracies": 1.0, "rewards/chosen": -0.5539321899414062, "rewards/margins": 5.646917819976807, "rewards/rejected": -6.200850009918213, "step": 676 }, { "epoch": 0.15, "learning_rate": 9.94740043734294e-06, "logits/chosen": -0.6317248940467834, "logits/rejected": -0.5669683814048767, "logps/chosen": -70.36227416992188, "logps/rejected": -177.04376220703125, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.1182163953781128, "rewards/margins": 4.909827709197998, "rewards/rejected": -6.0280442237854, "step": 677 }, { "epoch": 0.15, "learning_rate": 9.947140824944533e-06, "logits/chosen": -0.7795125842094421, "logits/rejected": -0.684629499912262, "logps/chosen": -223.9787139892578, "logps/rejected": -186.84967041015625, "loss": 0.4479, "rewards/accuracies": 1.0, "rewards/chosen": 1.5785690546035767, "rewards/margins": 5.428048610687256, "rewards/rejected": -3.8494796752929688, "step": 678 }, { "epoch": 0.15, "learning_rate": 9.946880576850418e-06, "logits/chosen": -0.8738836646080017, "logits/rejected": -0.8899170160293579, "logps/chosen": -267.5547790527344, "logps/rejected": -184.13528442382812, "loss": 0.255, "rewards/accuracies": 1.0, "rewards/chosen": 3.2258026599884033, "rewards/margins": 5.162966728210449, "rewards/rejected": -1.937164306640625, "step": 679 }, { "epoch": 0.15, "learning_rate": 9.946619693094044e-06, "logits/chosen": -0.9529820084571838, "logits/rejected": -0.5029073357582092, "logps/chosen": -47.194618225097656, "logps/rejected": -362.706298828125, "loss": 0.3328, "rewards/accuracies": 1.0, "rewards/chosen": 0.43850404024124146, "rewards/margins": 14.174240112304688, "rewards/rejected": -13.735735893249512, "step": 680 }, { "epoch": 0.15, "learning_rate": 9.94635817370893e-06, "logits/chosen": -0.6653857231140137, "logits/rejected": -0.6650403141975403, "logps/chosen": -31.788909912109375, "logps/rejected": -23.829988479614258, "loss": 0.473, "rewards/accuracies": 0.0, "rewards/chosen": -0.8550153970718384, "rewards/margins": -0.4545236825942993, "rewards/rejected": -0.40049171447753906, "step": 681 }, { "epoch": 0.15, "learning_rate": 9.94609601872868e-06, "logits/chosen": -1.102862000465393, "logits/rejected": -1.08744478225708, "logps/chosen": -207.015625, "logps/rejected": -210.43597412109375, "loss": 0.0428, "rewards/accuracies": 1.0, "rewards/chosen": 3.5409302711486816, "rewards/margins": 2.4200286865234375, "rewards/rejected": 1.1209014654159546, "step": 682 }, { "epoch": 0.15, "learning_rate": 9.945833228186984e-06, "logits/chosen": -0.6852118968963623, "logits/rejected": -0.6852118968963623, "logps/chosen": -37.519962310791016, "logps/rejected": -37.519962310791016, "loss": 0.3493, "rewards/accuracies": 0.0, "rewards/chosen": -1.8697054386138916, "rewards/margins": 0.0, "rewards/rejected": -1.8697054386138916, "step": 683 }, { "epoch": 0.15, "learning_rate": 9.945569802117604e-06, "logits/chosen": -0.6119074821472168, "logits/rejected": -0.6119074821472168, "logps/chosen": -118.25222778320312, "logps/rejected": -118.25222778320312, "loss": 0.3841, "rewards/accuracies": 0.0, "rewards/chosen": -4.6590576171875, "rewards/margins": 0.0, "rewards/rejected": -4.6590576171875, "step": 684 }, { "epoch": 0.15, "learning_rate": 9.945305740554397e-06, "logits/chosen": -0.7887765765190125, "logits/rejected": -0.7106807231903076, "logps/chosen": -73.99156188964844, "logps/rejected": -60.53763198852539, "loss": 1.5078, "rewards/accuracies": 1.0, "rewards/chosen": 1.1520065069198608, "rewards/margins": 2.686114549636841, "rewards/rejected": -1.53410804271698, "step": 685 }, { "epoch": 0.15, "learning_rate": 9.945041043531289e-06, "logits/chosen": -0.40564975142478943, "logits/rejected": -0.40564975142478943, "logps/chosen": -146.08074951171875, "logps/rejected": -146.08074951171875, "loss": 0.3529, "rewards/accuracies": 0.0, "rewards/chosen": -2.541252851486206, "rewards/margins": 0.0, "rewards/rejected": -2.541252851486206, "step": 686 }, { "epoch": 0.15, "learning_rate": 9.944775711082296e-06, "logits/chosen": -0.5945679545402527, "logits/rejected": -0.5879628658294678, "logps/chosen": -85.75811004638672, "logps/rejected": -147.7642059326172, "loss": 0.1673, "rewards/accuracies": 1.0, "rewards/chosen": -0.9834983944892883, "rewards/margins": 1.194117784500122, "rewards/rejected": -2.1776161193847656, "step": 687 }, { "epoch": 0.15, "learning_rate": 9.944509743241508e-06, "logits/chosen": -0.8243941068649292, "logits/rejected": -0.769722044467926, "logps/chosen": -133.72157287597656, "logps/rejected": -160.84915161132812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.2724960446357727, "rewards/margins": 5.6663312911987305, "rewards/rejected": -5.9388275146484375, "step": 688 }, { "epoch": 0.15, "learning_rate": 9.944243140043106e-06, "logits/chosen": -0.824384868144989, "logits/rejected": -0.5567418932914734, "logps/chosen": -243.26829528808594, "logps/rejected": -334.0284423828125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 1.9689925909042358, "rewards/margins": 6.3998703956604, "rewards/rejected": -4.430877685546875, "step": 689 }, { "epoch": 0.15, "learning_rate": 9.943975901521347e-06, "logits/chosen": -0.5937403440475464, "logits/rejected": -0.5652663111686707, "logps/chosen": -138.82666015625, "logps/rejected": -75.162353515625, "loss": 0.462, "rewards/accuracies": 0.0, "rewards/chosen": -3.1121628284454346, "rewards/margins": -0.4181373119354248, "rewards/rejected": -2.6940255165100098, "step": 690 }, { "epoch": 0.15, "learning_rate": 9.943708027710567e-06, "logits/chosen": -0.5601671934127808, "logits/rejected": -0.5601671934127808, "logps/chosen": -84.20751953125, "logps/rejected": -84.20751953125, "loss": 0.3471, "rewards/accuracies": 0.0, "rewards/chosen": -2.8935494422912598, "rewards/margins": 0.0, "rewards/rejected": -2.8935494422912598, "step": 691 }, { "epoch": 0.15, "learning_rate": 9.943439518645193e-06, "logits/chosen": -0.5888182520866394, "logits/rejected": -0.594252347946167, "logps/chosen": -77.99995422363281, "logps/rejected": -129.92144775390625, "loss": 0.0579, "rewards/accuracies": 1.0, "rewards/chosen": -1.4309062957763672, "rewards/margins": 2.3436176776885986, "rewards/rejected": -3.774523973464966, "step": 692 }, { "epoch": 0.15, "learning_rate": 9.943170374359722e-06, "logits/chosen": -0.5674927830696106, "logits/rejected": -0.514163076877594, "logps/chosen": -207.0606231689453, "logps/rejected": -160.27076721191406, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": 3.5547409057617188, "rewards/margins": 2.9946351051330566, "rewards/rejected": 0.5601059198379517, "step": 693 }, { "epoch": 0.15, "learning_rate": 9.942900594888743e-06, "logits/chosen": -0.68790203332901, "logits/rejected": -0.6656088829040527, "logps/chosen": -188.59963989257812, "logps/rejected": -131.61337280273438, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": 1.0591049194335938, "rewards/margins": 3.999371290206909, "rewards/rejected": -2.9402663707733154, "step": 694 }, { "epoch": 0.15, "learning_rate": 9.94263018026692e-06, "logits/chosen": -0.8582826256752014, "logits/rejected": -0.8168206810951233, "logps/chosen": -79.5688705444336, "logps/rejected": -170.8435821533203, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.28962936997413635, "rewards/margins": 3.9175987243652344, "rewards/rejected": -4.207228183746338, "step": 695 }, { "epoch": 0.15, "learning_rate": 9.942359130528998e-06, "logits/chosen": -0.9656427502632141, "logits/rejected": -1.0140671730041504, "logps/chosen": -184.79586791992188, "logps/rejected": -169.88546752929688, "loss": 1.2114, "rewards/accuracies": 0.0, "rewards/chosen": -3.492990255355835, "rewards/margins": -2.3286166191101074, "rewards/rejected": -1.164373755455017, "step": 696 }, { "epoch": 0.15, "learning_rate": 9.942087445709811e-06, "logits/chosen": -0.6120213866233826, "logits/rejected": -0.5807644128799438, "logps/chosen": -110.76747131347656, "logps/rejected": -187.9392852783203, "loss": 1.0532, "rewards/accuracies": 0.0, "rewards/chosen": -2.7356858253479004, "rewards/margins": -1.9762482643127441, "rewards/rejected": -0.7594375610351562, "step": 697 }, { "epoch": 0.15, "learning_rate": 9.941815125844267e-06, "logits/chosen": -0.7175211310386658, "logits/rejected": -0.6976735591888428, "logps/chosen": -100.49666595458984, "logps/rejected": -169.32894897460938, "loss": 0.0435, "rewards/accuracies": 1.0, "rewards/chosen": -2.4502625465393066, "rewards/margins": 3.2658944129943848, "rewards/rejected": -5.716156959533691, "step": 698 }, { "epoch": 0.15, "learning_rate": 9.94154217096736e-06, "logits/chosen": -0.8225955367088318, "logits/rejected": -0.8236141204833984, "logps/chosen": -194.97312927246094, "logps/rejected": -75.89744567871094, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": 3.393550157546997, "rewards/margins": 5.403137683868408, "rewards/rejected": -2.009587526321411, "step": 699 }, { "epoch": 0.15, "learning_rate": 9.941268581114162e-06, "logits/chosen": -0.72819983959198, "logits/rejected": -0.6907239556312561, "logps/chosen": -138.08888244628906, "logps/rejected": -230.49169921875, "loss": 1.1167, "rewards/accuracies": 0.0, "rewards/chosen": -0.33586883544921875, "rewards/margins": -2.1197738647460938, "rewards/rejected": 1.783905029296875, "step": 700 }, { "epoch": 0.16, "learning_rate": 9.94099435631983e-06, "logits/chosen": -1.0372153520584106, "logits/rejected": -1.0678279399871826, "logps/chosen": -179.0763702392578, "logps/rejected": -347.30816650390625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 1.9260635375976562, "rewards/margins": 5.960884094238281, "rewards/rejected": -4.034820556640625, "step": 701 }, { "epoch": 0.16, "learning_rate": 9.940719496619601e-06, "logits/chosen": -1.0372480154037476, "logits/rejected": -0.9880037903785706, "logps/chosen": -102.565673828125, "logps/rejected": -159.0198211669922, "loss": 0.3547, "rewards/accuracies": 1.0, "rewards/chosen": -0.37372589111328125, "rewards/margins": 4.1088409423828125, "rewards/rejected": -4.482566833496094, "step": 702 }, { "epoch": 0.16, "learning_rate": 9.940444002048794e-06, "logits/chosen": -0.6925361752510071, "logits/rejected": -0.728569746017456, "logps/chosen": -172.52523803710938, "logps/rejected": -111.59303283691406, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": 1.1894043684005737, "rewards/margins": 3.5465545654296875, "rewards/rejected": -2.3571503162384033, "step": 703 }, { "epoch": 0.16, "learning_rate": 9.94016787264281e-06, "logits/chosen": -0.8159576654434204, "logits/rejected": -0.763891339302063, "logps/chosen": -189.18563842773438, "logps/rejected": -301.8697509765625, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": 2.128222703933716, "rewards/margins": 4.875317573547363, "rewards/rejected": -2.7470948696136475, "step": 704 }, { "epoch": 0.16, "learning_rate": 9.939891108437129e-06, "logits/chosen": -0.8491391539573669, "logits/rejected": -0.8046664595603943, "logps/chosen": -89.34552001953125, "logps/rejected": -74.68063354492188, "loss": 0.195, "rewards/accuracies": 1.0, "rewards/chosen": -1.0989593267440796, "rewards/margins": 0.745238184928894, "rewards/rejected": -1.8441975116729736, "step": 705 }, { "epoch": 0.16, "learning_rate": 9.939613709467317e-06, "logits/chosen": -0.33887070417404175, "logits/rejected": -0.28875207901000977, "logps/chosen": -156.88111877441406, "logps/rejected": -91.83222198486328, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": 1.8939651250839233, "rewards/margins": 3.800544023513794, "rewards/rejected": -1.9065788984298706, "step": 706 }, { "epoch": 0.16, "learning_rate": 9.939335675769017e-06, "logits/chosen": -0.8331590890884399, "logits/rejected": -0.8125602006912231, "logps/chosen": -129.02508544921875, "logps/rejected": -124.8375473022461, "loss": 0.5125, "rewards/accuracies": 0.0, "rewards/chosen": -0.4853553771972656, "rewards/margins": -0.5795501470565796, "rewards/rejected": 0.09419479221105576, "step": 707 }, { "epoch": 0.16, "learning_rate": 9.939057007377955e-06, "logits/chosen": -0.4603235423564911, "logits/rejected": -0.39187201857566833, "logps/chosen": -234.12570190429688, "logps/rejected": -204.8282928466797, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": 1.5465744733810425, "rewards/margins": 7.935863018035889, "rewards/rejected": -6.389288425445557, "step": 708 }, { "epoch": 0.16, "learning_rate": 9.938777704329943e-06, "logits/chosen": -1.0706580877304077, "logits/rejected": -1.07732355594635, "logps/chosen": -71.46368408203125, "logps/rejected": -147.19287109375, "loss": 0.0691, "rewards/accuracies": 1.0, "rewards/chosen": -0.3593559265136719, "rewards/margins": 2.4961020946502686, "rewards/rejected": -2.8554580211639404, "step": 709 }, { "epoch": 0.16, "learning_rate": 9.938497766660869e-06, "logits/chosen": -0.48842597007751465, "logits/rejected": -0.48842597007751465, "logps/chosen": -215.75830078125, "logps/rejected": -215.75830078125, "loss": 0.3483, "rewards/accuracies": 0.0, "rewards/chosen": -3.4892303943634033, "rewards/margins": 0.0, "rewards/rejected": -3.4892303943634033, "step": 710 }, { "epoch": 0.16, "learning_rate": 9.938217194406701e-06, "logits/chosen": -0.8485292792320251, "logits/rejected": -0.8565192222595215, "logps/chosen": -188.89126586914062, "logps/rejected": -134.7960968017578, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": 1.2967499494552612, "rewards/margins": 6.227966785430908, "rewards/rejected": -4.931216716766357, "step": 711 }, { "epoch": 0.16, "learning_rate": 9.937935987603497e-06, "logits/chosen": -1.0103977918624878, "logits/rejected": -0.9093354940414429, "logps/chosen": -115.89471435546875, "logps/rejected": -213.36239624023438, "loss": 0.4143, "rewards/accuracies": 0.0, "rewards/chosen": -2.631977081298828, "rewards/margins": -0.2536964416503906, "rewards/rejected": -2.3782806396484375, "step": 712 }, { "epoch": 0.16, "learning_rate": 9.937654146287388e-06, "logits/chosen": -0.8398828506469727, "logits/rejected": -0.8319463729858398, "logps/chosen": -111.21804809570312, "logps/rejected": -51.11450958251953, "loss": 0.2996, "rewards/accuracies": 1.0, "rewards/chosen": -1.170050024986267, "rewards/margins": 1.62946355342865, "rewards/rejected": -2.799513578414917, "step": 713 }, { "epoch": 0.16, "learning_rate": 9.937371670494591e-06, "logits/chosen": -0.7954955101013184, "logits/rejected": -0.753108561038971, "logps/chosen": -171.5211181640625, "logps/rejected": -130.86688232421875, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -0.02846374548971653, "rewards/margins": 3.5406432151794434, "rewards/rejected": -3.5691070556640625, "step": 714 }, { "epoch": 0.16, "learning_rate": 9.937088560261404e-06, "logits/chosen": -1.0198144912719727, "logits/rejected": -1.0995315313339233, "logps/chosen": -95.95242309570312, "logps/rejected": -37.37078857421875, "loss": 0.219, "rewards/accuracies": 1.0, "rewards/chosen": 0.16833268105983734, "rewards/margins": 0.6048507690429688, "rewards/rejected": -0.4365181028842926, "step": 715 }, { "epoch": 0.16, "learning_rate": 9.936804815624205e-06, "logits/chosen": -0.8958867192268372, "logits/rejected": -0.2650817036628723, "logps/chosen": -97.92961120605469, "logps/rejected": -165.8607940673828, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6225425601005554, "rewards/margins": 7.867439270019531, "rewards/rejected": -8.489981651306152, "step": 716 }, { "epoch": 0.16, "learning_rate": 9.936520436619455e-06, "logits/chosen": -1.0741039514541626, "logits/rejected": -1.0316405296325684, "logps/chosen": -226.80564880371094, "logps/rejected": -288.2285461425781, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": 0.9758774042129517, "rewards/margins": 2.7646989822387695, "rewards/rejected": -1.7888214588165283, "step": 717 }, { "epoch": 0.16, "learning_rate": 9.936235423283696e-06, "logits/chosen": -0.8768467307090759, "logits/rejected": -0.8346125483512878, "logps/chosen": -82.52241516113281, "logps/rejected": -149.3692626953125, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.6749901175498962, "rewards/margins": 4.016912937164307, "rewards/rejected": -4.691903114318848, "step": 718 }, { "epoch": 0.16, "learning_rate": 9.935949775653554e-06, "logits/chosen": -0.5443806052207947, "logits/rejected": -0.5443806052207947, "logps/chosen": -84.9468765258789, "logps/rejected": -84.9468765258789, "loss": 0.3482, "rewards/accuracies": 0.0, "rewards/chosen": -2.4859542846679688, "rewards/margins": 0.0, "rewards/rejected": -2.4859542846679688, "step": 719 }, { "epoch": 0.16, "learning_rate": 9.935663493765726e-06, "logits/chosen": -0.7484025955200195, "logits/rejected": -0.7437800168991089, "logps/chosen": -196.92848205566406, "logps/rejected": -161.38525390625, "loss": 0.0807, "rewards/accuracies": 1.0, "rewards/chosen": 1.8612747192382812, "rewards/margins": 6.136734962463379, "rewards/rejected": -4.275460243225098, "step": 720 }, { "epoch": 0.16, "learning_rate": 9.935376577657008e-06, "logits/chosen": -0.7806230783462524, "logits/rejected": -0.842029333114624, "logps/chosen": -322.3739929199219, "logps/rejected": -237.76654052734375, "loss": 0.3875, "rewards/accuracies": 0.0, "rewards/chosen": -4.592688083648682, "rewards/margins": -0.13657093048095703, "rewards/rejected": -4.456117153167725, "step": 721 }, { "epoch": 0.16, "learning_rate": 9.935089027364264e-06, "logits/chosen": -0.743400514125824, "logits/rejected": -0.743400514125824, "logps/chosen": -69.65494537353516, "logps/rejected": -69.65494537353516, "loss": 1.8529, "rewards/accuracies": 0.0, "rewards/chosen": -2.1069114208221436, "rewards/margins": 0.0, "rewards/rejected": -2.1069114208221436, "step": 722 }, { "epoch": 0.16, "learning_rate": 9.934800842924443e-06, "logits/chosen": -0.9370388984680176, "logits/rejected": -0.9723039269447327, "logps/chosen": -191.71478271484375, "logps/rejected": -223.6702117919922, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 1.6013702154159546, "rewards/margins": 9.928284645080566, "rewards/rejected": -8.32691478729248, "step": 723 }, { "epoch": 0.16, "learning_rate": 9.934512024374577e-06, "logits/chosen": -0.7829892635345459, "logits/rejected": -0.7746087908744812, "logps/chosen": -127.51251220703125, "logps/rejected": -151.69436645507812, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/chosen": -0.6332748532295227, "rewards/margins": 3.6567542552948, "rewards/rejected": -4.290029048919678, "step": 724 }, { "epoch": 0.16, "learning_rate": 9.934222571751777e-06, "logits/chosen": -0.7587982416152954, "logits/rejected": -0.7606093287467957, "logps/chosen": -103.31404876708984, "logps/rejected": -101.9323959350586, "loss": 0.0476, "rewards/accuracies": 1.0, "rewards/chosen": 0.9239814877510071, "rewards/margins": 2.311007022857666, "rewards/rejected": -1.3870254755020142, "step": 725 }, { "epoch": 0.16, "learning_rate": 9.933932485093239e-06, "logits/chosen": -1.0634450912475586, "logits/rejected": -1.0494390726089478, "logps/chosen": -104.45906829833984, "logps/rejected": -185.23110961914062, "loss": 0.188, "rewards/accuracies": 1.0, "rewards/chosen": 0.6673622131347656, "rewards/margins": 0.8824882507324219, "rewards/rejected": -0.21512603759765625, "step": 726 }, { "epoch": 0.16, "learning_rate": 9.933641764436237e-06, "logits/chosen": -0.8104733228683472, "logits/rejected": -0.8104733228683472, "logps/chosen": -104.86167907714844, "logps/rejected": -104.86167907714844, "loss": 0.7533, "rewards/accuracies": 0.0, "rewards/chosen": -1.604287028312683, "rewards/margins": 0.0, "rewards/rejected": -1.604287028312683, "step": 727 }, { "epoch": 0.16, "learning_rate": 9.933350409818128e-06, "logits/chosen": -0.8045123219490051, "logits/rejected": -0.8348814249038696, "logps/chosen": -84.56846618652344, "logps/rejected": -30.33486557006836, "loss": 0.0777, "rewards/accuracies": 1.0, "rewards/chosen": 0.8191856741905212, "rewards/margins": 1.7921408414840698, "rewards/rejected": -0.9729551672935486, "step": 728 }, { "epoch": 0.16, "learning_rate": 9.933058421276351e-06, "logits/chosen": -0.6379746794700623, "logits/rejected": -0.43041470646858215, "logps/chosen": -179.13198852539062, "logps/rejected": -220.30215454101562, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 2.9352874755859375, "rewards/margins": 10.010517120361328, "rewards/rejected": -7.075229167938232, "step": 729 }, { "epoch": 0.16, "learning_rate": 9.932765798848428e-06, "logits/chosen": -0.8880457878112793, "logits/rejected": -0.8722716569900513, "logps/chosen": -120.27890014648438, "logps/rejected": -126.99885559082031, "loss": 0.0746, "rewards/accuracies": 1.0, "rewards/chosen": 0.4407714903354645, "rewards/margins": 1.8405296802520752, "rewards/rejected": -1.399758219718933, "step": 730 }, { "epoch": 0.16, "learning_rate": 9.932472542571954e-06, "logits/chosen": -1.262825846672058, "logits/rejected": -1.3756098747253418, "logps/chosen": -118.96282958984375, "logps/rejected": -90.9540786743164, "loss": 0.0989, "rewards/accuracies": 1.0, "rewards/chosen": 0.9081940054893494, "rewards/margins": 3.1731042861938477, "rewards/rejected": -2.2649102210998535, "step": 731 }, { "epoch": 0.16, "learning_rate": 9.932178652484617e-06, "logits/chosen": -0.38548028469085693, "logits/rejected": -0.38548028469085693, "logps/chosen": -71.68321228027344, "logps/rejected": -71.68321228027344, "loss": 0.3644, "rewards/accuracies": 0.0, "rewards/chosen": 0.042984772473573685, "rewards/margins": 0.0, "rewards/rejected": 0.042984772473573685, "step": 732 }, { "epoch": 0.16, "learning_rate": 9.931884128624181e-06, "logits/chosen": -0.6925452351570129, "logits/rejected": -0.6468424797058105, "logps/chosen": -120.72386932373047, "logps/rejected": -131.86666870117188, "loss": 0.152, "rewards/accuracies": 1.0, "rewards/chosen": -0.684992253780365, "rewards/margins": 2.895688533782959, "rewards/rejected": -3.5806808471679688, "step": 733 }, { "epoch": 0.16, "learning_rate": 9.93158897102849e-06, "logits/chosen": -0.7389129996299744, "logits/rejected": -0.6981230974197388, "logps/chosen": -225.50088500976562, "logps/rejected": -247.74734497070312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.9425903558731079, "rewards/margins": 10.682924270629883, "rewards/rejected": -9.740333557128906, "step": 734 }, { "epoch": 0.16, "learning_rate": 9.93129317973547e-06, "logits/chosen": -0.8459729552268982, "logits/rejected": -0.8530845642089844, "logps/chosen": -171.976806640625, "logps/rejected": -167.788330078125, "loss": 0.0914, "rewards/accuracies": 1.0, "rewards/chosen": 2.422886610031128, "rewards/margins": 6.93350887298584, "rewards/rejected": -4.510622501373291, "step": 735 }, { "epoch": 0.16, "learning_rate": 9.930996754783134e-06, "logits/chosen": -1.0477579832077026, "logits/rejected": -1.0446068048477173, "logps/chosen": -51.552284240722656, "logps/rejected": -53.37398910522461, "loss": 0.3989, "rewards/accuracies": 1.0, "rewards/chosen": -1.0018341541290283, "rewards/margins": 0.17122268676757812, "rewards/rejected": -1.1730568408966064, "step": 736 }, { "epoch": 0.16, "learning_rate": 9.930699696209566e-06, "logits/chosen": -0.8310670852661133, "logits/rejected": -0.8083633184432983, "logps/chosen": -92.4161376953125, "logps/rejected": -160.9918212890625, "loss": 0.6382, "rewards/accuracies": 0.0, "rewards/chosen": -1.450042724609375, "rewards/margins": -0.7176772952079773, "rewards/rejected": -0.7323654294013977, "step": 737 }, { "epoch": 0.16, "learning_rate": 9.93040200405294e-06, "logits/chosen": -0.7906588315963745, "logits/rejected": -0.7467375993728638, "logps/chosen": -117.73405456542969, "logps/rejected": -236.56321716308594, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": 1.9151077270507812, "rewards/margins": 4.862512588500977, "rewards/rejected": -2.947404623031616, "step": 738 }, { "epoch": 0.16, "learning_rate": 9.930103678351511e-06, "logits/chosen": -0.7019058465957642, "logits/rejected": -0.6875248551368713, "logps/chosen": -81.09029388427734, "logps/rejected": -139.010498046875, "loss": 1.2348, "rewards/accuracies": 1.0, "rewards/chosen": -0.3896034359931946, "rewards/margins": 3.118727207183838, "rewards/rejected": -3.5083305835723877, "step": 739 }, { "epoch": 0.16, "learning_rate": 9.92980471914361e-06, "logits/chosen": -0.8561097979545593, "logits/rejected": -0.8344160318374634, "logps/chosen": -155.21365356445312, "logps/rejected": -236.01248168945312, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 0.7726272940635681, "rewards/margins": 5.096687316894531, "rewards/rejected": -4.324059963226318, "step": 740 }, { "epoch": 0.16, "learning_rate": 9.929505126467653e-06, "logits/chosen": -0.7361299395561218, "logits/rejected": -0.7386963367462158, "logps/chosen": -123.8780517578125, "logps/rejected": -101.44356536865234, "loss": 1.6943, "rewards/accuracies": 0.0, "rewards/chosen": -2.8221497535705566, "rewards/margins": -1.8291130065917969, "rewards/rejected": -0.993036687374115, "step": 741 }, { "epoch": 0.16, "learning_rate": 9.929204900362137e-06, "logits/chosen": -0.7516152858734131, "logits/rejected": -0.7336148619651794, "logps/chosen": -91.6033706665039, "logps/rejected": -110.40469360351562, "loss": 0.115, "rewards/accuracies": 1.0, "rewards/chosen": 1.6112167835235596, "rewards/margins": 1.9224457740783691, "rewards/rejected": -0.3112289607524872, "step": 742 }, { "epoch": 0.16, "learning_rate": 9.928904040865642e-06, "logits/chosen": -0.6219491362571716, "logits/rejected": -0.5749164819717407, "logps/chosen": -96.03279113769531, "logps/rejected": -105.35240936279297, "loss": 0.8452, "rewards/accuracies": 1.0, "rewards/chosen": 1.201557993888855, "rewards/margins": 1.4731240272521973, "rewards/rejected": -0.2715660035610199, "step": 743 }, { "epoch": 0.16, "learning_rate": 9.928602548016826e-06, "logits/chosen": -0.7917911410331726, "logits/rejected": -0.7733204364776611, "logps/chosen": -183.37451171875, "logps/rejected": -328.83740234375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 3.3108232021331787, "rewards/margins": 7.450469017028809, "rewards/rejected": -4.139645576477051, "step": 744 }, { "epoch": 0.16, "learning_rate": 9.92830042185443e-06, "logits/chosen": -0.5222523212432861, "logits/rejected": -0.5238097906112671, "logps/chosen": -79.32131958007812, "logps/rejected": -135.71966552734375, "loss": 2.4083, "rewards/accuracies": 1.0, "rewards/chosen": 0.39366912841796875, "rewards/margins": 1.4457489252090454, "rewards/rejected": -1.0520797967910767, "step": 745 }, { "epoch": 0.17, "learning_rate": 9.927997662417277e-06, "logits/chosen": -0.31036391854286194, "logits/rejected": -0.27141961455345154, "logps/chosen": -69.55039978027344, "logps/rejected": -169.91111755371094, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.2891174256801605, "rewards/margins": 7.021215915679932, "rewards/rejected": -7.310333251953125, "step": 746 }, { "epoch": 0.17, "learning_rate": 9.927694269744273e-06, "logits/chosen": -0.7898727655410767, "logits/rejected": -0.7692645192146301, "logps/chosen": -86.37176513671875, "logps/rejected": -137.12246704101562, "loss": 0.117, "rewards/accuracies": 1.0, "rewards/chosen": -0.04261932522058487, "rewards/margins": 1.369775414466858, "rewards/rejected": -1.4123947620391846, "step": 747 }, { "epoch": 0.17, "learning_rate": 9.9273902438744e-06, "logits/chosen": -0.847508430480957, "logits/rejected": -0.8041467070579529, "logps/chosen": -90.51393127441406, "logps/rejected": -240.643310546875, "loss": 0.1247, "rewards/accuracies": 1.0, "rewards/chosen": -0.5737442374229431, "rewards/margins": 1.3672165870666504, "rewards/rejected": -1.9409607648849487, "step": 748 }, { "epoch": 0.17, "learning_rate": 9.927085584846725e-06, "logits/chosen": -0.6660175919532776, "logits/rejected": -0.6376937627792358, "logps/chosen": -127.3783187866211, "logps/rejected": -81.01646423339844, "loss": 0.0923, "rewards/accuracies": 1.0, "rewards/chosen": 0.3524482846260071, "rewards/margins": 1.6065576076507568, "rewards/rejected": -1.254109263420105, "step": 749 }, { "epoch": 0.17, "learning_rate": 9.926780292700397e-06, "logits/chosen": -0.6595354676246643, "logits/rejected": -0.6290857195854187, "logps/chosen": -235.76390075683594, "logps/rejected": -289.78515625, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": 2.385145664215088, "rewards/margins": 4.268107891082764, "rewards/rejected": -1.8829621076583862, "step": 750 }, { "epoch": 0.17, "learning_rate": 9.926474367474646e-06, "logits/chosen": -0.3096904754638672, "logits/rejected": -0.2936011254787445, "logps/chosen": -63.1975212097168, "logps/rejected": -98.85755920410156, "loss": 0.226, "rewards/accuracies": 1.0, "rewards/chosen": 0.021284867078065872, "rewards/margins": 0.5701534748077393, "rewards/rejected": -0.5488685965538025, "step": 751 }, { "epoch": 0.17, "learning_rate": 9.92616780920878e-06, "logits/chosen": -0.8825235366821289, "logits/rejected": -0.8393343687057495, "logps/chosen": -119.89569091796875, "logps/rejected": -138.37017822265625, "loss": 0.6087, "rewards/accuracies": 1.0, "rewards/chosen": -0.3040351867675781, "rewards/margins": 0.3722984194755554, "rewards/rejected": -0.6763336062431335, "step": 752 }, { "epoch": 0.17, "learning_rate": 9.925860617942195e-06, "logits/chosen": -0.9375701546669006, "logits/rejected": -0.8924724459648132, "logps/chosen": -216.18878173828125, "logps/rejected": -215.47402954101562, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": 2.928790330886841, "rewards/margins": 2.8317627906799316, "rewards/rejected": 0.09702759236097336, "step": 753 }, { "epoch": 0.17, "learning_rate": 9.92555279371436e-06, "logits/chosen": -0.7884229421615601, "logits/rejected": -0.7737162113189697, "logps/chosen": -96.48738098144531, "logps/rejected": -125.03390502929688, "loss": 0.1128, "rewards/accuracies": 1.0, "rewards/chosen": 0.3273269832134247, "rewards/margins": 1.596174716949463, "rewards/rejected": -1.2688477039337158, "step": 754 }, { "epoch": 0.17, "learning_rate": 9.925244336564831e-06, "logits/chosen": -0.6651174426078796, "logits/rejected": -0.6106011271476746, "logps/chosen": -94.38585662841797, "logps/rejected": -107.59243774414062, "loss": 0.0429, "rewards/accuracies": 1.0, "rewards/chosen": 0.4701576232910156, "rewards/margins": 3.0711212158203125, "rewards/rejected": -2.600963592529297, "step": 755 }, { "epoch": 0.17, "learning_rate": 9.924935246533249e-06, "logits/chosen": -0.8253223299980164, "logits/rejected": -0.8903409242630005, "logps/chosen": -210.25843811035156, "logps/rejected": -161.1576690673828, "loss": 0.0978, "rewards/accuracies": 1.0, "rewards/chosen": 2.3518905639648438, "rewards/margins": 4.104424953460693, "rewards/rejected": -1.7525345087051392, "step": 756 }, { "epoch": 0.17, "learning_rate": 9.924625523659324e-06, "logits/chosen": -0.41569986939430237, "logits/rejected": -0.361934095621109, "logps/chosen": -79.2894515991211, "logps/rejected": -32.653236389160156, "loss": 0.5205, "rewards/accuracies": 1.0, "rewards/chosen": -0.15449142456054688, "rewards/margins": 0.8767445087432861, "rewards/rejected": -1.031235933303833, "step": 757 }, { "epoch": 0.17, "learning_rate": 9.924315167982858e-06, "logits/chosen": -1.1561022996902466, "logits/rejected": -1.2001292705535889, "logps/chosen": -113.58086395263672, "logps/rejected": -88.57240295410156, "loss": 0.0666, "rewards/accuracies": 1.0, "rewards/chosen": 0.9606949090957642, "rewards/margins": 2.614737033843994, "rewards/rejected": -1.65404212474823, "step": 758 }, { "epoch": 0.17, "learning_rate": 9.924004179543728e-06, "logits/chosen": -0.5870336294174194, "logits/rejected": -0.6031515002250671, "logps/chosen": -120.25141906738281, "logps/rejected": -114.64883422851562, "loss": 1.0304, "rewards/accuracies": 0.0, "rewards/chosen": -1.479112982749939, "rewards/margins": -0.5277419686317444, "rewards/rejected": -0.9513710141181946, "step": 759 }, { "epoch": 0.17, "learning_rate": 9.923692558381902e-06, "logits/chosen": -0.5974103212356567, "logits/rejected": -0.5851932168006897, "logps/chosen": -111.56425476074219, "logps/rejected": -180.70407104492188, "loss": 0.1769, "rewards/accuracies": 1.0, "rewards/chosen": 0.09380798786878586, "rewards/margins": 1.7235686779022217, "rewards/rejected": -1.6297607421875, "step": 760 }, { "epoch": 0.17, "learning_rate": 9.923380304537417e-06, "logits/chosen": -0.6771329641342163, "logits/rejected": -0.5953450202941895, "logps/chosen": -81.07705688476562, "logps/rejected": -106.2466049194336, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": 0.6409561038017273, "rewards/margins": 5.194447994232178, "rewards/rejected": -4.553492069244385, "step": 761 }, { "epoch": 0.17, "learning_rate": 9.923067418050399e-06, "logits/chosen": -0.729275643825531, "logits/rejected": -0.746550977230072, "logps/chosen": -101.34928894042969, "logps/rejected": -58.19541931152344, "loss": 0.1846, "rewards/accuracies": 1.0, "rewards/chosen": 0.020705414935946465, "rewards/margins": 1.684552788734436, "rewards/rejected": -1.6638473272323608, "step": 762 }, { "epoch": 0.17, "learning_rate": 9.922753898961052e-06, "logits/chosen": -0.4757852256298065, "logits/rejected": -0.4490152597427368, "logps/chosen": -94.93070220947266, "logps/rejected": -254.97711181640625, "loss": 0.089, "rewards/accuracies": 1.0, "rewards/chosen": 0.18464051187038422, "rewards/margins": 1.8594483137130737, "rewards/rejected": -1.6748077869415283, "step": 763 }, { "epoch": 0.17, "learning_rate": 9.922439747309663e-06, "logits/chosen": -0.4375793933868408, "logits/rejected": -0.380846232175827, "logps/chosen": -101.58287048339844, "logps/rejected": -162.92625427246094, "loss": 0.1008, "rewards/accuracies": 1.0, "rewards/chosen": 0.3869888484477997, "rewards/margins": 2.541830539703369, "rewards/rejected": -2.154841661453247, "step": 764 }, { "epoch": 0.17, "learning_rate": 9.922124963136599e-06, "logits/chosen": -0.4930756390094757, "logits/rejected": -0.48590049147605896, "logps/chosen": -204.77059936523438, "logps/rejected": -130.19192504882812, "loss": 0.891, "rewards/accuracies": 1.0, "rewards/chosen": 2.879647970199585, "rewards/margins": 4.759222507476807, "rewards/rejected": -1.8795746564865112, "step": 765 }, { "epoch": 0.17, "learning_rate": 9.92180954648231e-06, "logits/chosen": -0.7910720705986023, "logits/rejected": -0.7945396900177002, "logps/chosen": -89.16356658935547, "logps/rejected": -136.44381713867188, "loss": 0.725, "rewards/accuracies": 0.0, "rewards/chosen": -1.1507141590118408, "rewards/margins": -0.4770904779434204, "rewards/rejected": -0.6736236810684204, "step": 766 }, { "epoch": 0.17, "learning_rate": 9.921493497387327e-06, "logits/chosen": -0.6654009819030762, "logits/rejected": -0.6271416544914246, "logps/chosen": -181.83958435058594, "logps/rejected": -258.5426330566406, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": 1.599940538406372, "rewards/margins": 2.8326568603515625, "rewards/rejected": -1.23271644115448, "step": 767 }, { "epoch": 0.17, "learning_rate": 9.921176815892259e-06, "logits/chosen": -0.6071914434432983, "logits/rejected": -0.5680031776428223, "logps/chosen": -106.75492858886719, "logps/rejected": -121.9189682006836, "loss": 0.1183, "rewards/accuracies": 1.0, "rewards/chosen": -0.8303481936454773, "rewards/margins": 1.369694471359253, "rewards/rejected": -2.200042724609375, "step": 768 }, { "epoch": 0.17, "learning_rate": 9.920859502037801e-06, "logits/chosen": -0.49698659777641296, "logits/rejected": -0.4506547749042511, "logps/chosen": -186.73590087890625, "logps/rejected": -316.102294921875, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": 0.980419933795929, "rewards/margins": 7.019101142883301, "rewards/rejected": -6.0386810302734375, "step": 769 }, { "epoch": 0.17, "learning_rate": 9.920541555864726e-06, "logits/chosen": -0.4078371524810791, "logits/rejected": -0.3909524381160736, "logps/chosen": -60.67915725708008, "logps/rejected": -97.39451599121094, "loss": 0.0485, "rewards/accuracies": 1.0, "rewards/chosen": -0.41566202044487, "rewards/margins": 2.634744644165039, "rewards/rejected": -3.0504066944122314, "step": 770 }, { "epoch": 0.17, "learning_rate": 9.920222977413892e-06, "logits/chosen": -0.6018269658088684, "logits/rejected": -0.6018269658088684, "logps/chosen": -122.07630920410156, "logps/rejected": -122.07630920410156, "loss": 0.4756, "rewards/accuracies": 0.0, "rewards/chosen": -1.4218361377716064, "rewards/margins": 0.0, "rewards/rejected": -1.4218361377716064, "step": 771 }, { "epoch": 0.17, "learning_rate": 9.919903766726229e-06, "logits/chosen": -0.45015662908554077, "logits/rejected": -0.4729973077774048, "logps/chosen": -24.40992546081543, "logps/rejected": -8.923946380615234, "loss": 0.7445, "rewards/accuracies": 0.0, "rewards/chosen": -0.8685933947563171, "rewards/margins": -0.79520183801651, "rewards/rejected": -0.07339153438806534, "step": 772 }, { "epoch": 0.17, "learning_rate": 9.919583923842763e-06, "logits/chosen": -0.846175491809845, "logits/rejected": -0.9753189086914062, "logps/chosen": -265.35113525390625, "logps/rejected": -131.4144287109375, "loss": 0.1011, "rewards/accuracies": 1.0, "rewards/chosen": -0.3573852479457855, "rewards/margins": 1.5497124195098877, "rewards/rejected": -1.9070976972579956, "step": 773 }, { "epoch": 0.17, "learning_rate": 9.919263448804589e-06, "logits/chosen": -0.4347354471683502, "logits/rejected": -0.3864808678627014, "logps/chosen": -271.572021484375, "logps/rejected": -143.7139129638672, "loss": 1.5037, "rewards/accuracies": 0.0, "rewards/chosen": -2.8338944911956787, "rewards/margins": -0.5569779872894287, "rewards/rejected": -2.27691650390625, "step": 774 }, { "epoch": 0.17, "learning_rate": 9.918942341652885e-06, "logits/chosen": -0.853856086730957, "logits/rejected": -0.8033826351165771, "logps/chosen": -135.560546875, "logps/rejected": -87.01351928710938, "loss": 0.5306, "rewards/accuracies": 0.0, "rewards/chosen": 0.15606994926929474, "rewards/margins": -0.5239143371582031, "rewards/rejected": 0.6799842715263367, "step": 775 }, { "epoch": 0.17, "learning_rate": 9.918620602428916e-06, "logits/chosen": -0.35386398434638977, "logits/rejected": -0.37398505210876465, "logps/chosen": -157.79002380371094, "logps/rejected": -171.40179443359375, "loss": 0.1933, "rewards/accuracies": 1.0, "rewards/chosen": 1.7617324590682983, "rewards/margins": 0.7718810439109802, "rewards/rejected": 0.9898514151573181, "step": 776 }, { "epoch": 0.17, "learning_rate": 9.918298231174023e-06, "logits/chosen": -0.927229106426239, "logits/rejected": -0.8939879536628723, "logps/chosen": -77.46170043945312, "logps/rejected": -125.50382995605469, "loss": 0.5825, "rewards/accuracies": 1.0, "rewards/chosen": 1.0058250427246094, "rewards/margins": 1.496392846107483, "rewards/rejected": -0.49056777358055115, "step": 777 }, { "epoch": 0.17, "learning_rate": 9.917975227929631e-06, "logits/chosen": -0.879119873046875, "logits/rejected": -0.9156673550605774, "logps/chosen": -113.24848937988281, "logps/rejected": -117.3898696899414, "loss": 0.079, "rewards/accuracies": 1.0, "rewards/chosen": 0.791761040687561, "rewards/margins": 1.858095645904541, "rewards/rejected": -1.06633460521698, "step": 778 }, { "epoch": 0.17, "learning_rate": 9.917651592737245e-06, "logits/chosen": -0.7454540133476257, "logits/rejected": -0.674094557762146, "logps/chosen": -98.12360382080078, "logps/rejected": -205.86688232421875, "loss": 0.4187, "rewards/accuracies": 0.0, "rewards/chosen": -0.9060417413711548, "rewards/margins": -0.23896104097366333, "rewards/rejected": -0.6670807003974915, "step": 779 }, { "epoch": 0.17, "learning_rate": 9.91732732563845e-06, "logits/chosen": -1.084801435470581, "logits/rejected": -1.0377320051193237, "logps/chosen": -143.91244506835938, "logps/rejected": -154.9592742919922, "loss": 0.0361, "rewards/accuracies": 1.0, "rewards/chosen": 1.2691208124160767, "rewards/margins": 3.3631529808044434, "rewards/rejected": -2.0940322875976562, "step": 780 }, { "epoch": 0.17, "learning_rate": 9.917002426674916e-06, "logits/chosen": -0.45951029658317566, "logits/rejected": -0.44950446486473083, "logps/chosen": -275.33636474609375, "logps/rejected": -225.71456909179688, "loss": 0.54, "rewards/accuracies": 1.0, "rewards/chosen": 3.36297607421875, "rewards/margins": 0.7500243186950684, "rewards/rejected": 2.6129517555236816, "step": 781 }, { "epoch": 0.17, "learning_rate": 9.91667689588839e-06, "logits/chosen": -0.7201838493347168, "logits/rejected": -0.5496435165405273, "logps/chosen": -208.69944763183594, "logps/rejected": -142.0047607421875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 3.800096273422241, "rewards/margins": 4.848384380340576, "rewards/rejected": -1.0482879877090454, "step": 782 }, { "epoch": 0.17, "learning_rate": 9.916350733320704e-06, "logits/chosen": -0.39587104320526123, "logits/rejected": -0.39587104320526123, "logps/chosen": -89.99835205078125, "logps/rejected": -89.99835205078125, "loss": 0.6152, "rewards/accuracies": 0.0, "rewards/chosen": -1.1855201721191406, "rewards/margins": 0.0, "rewards/rejected": -1.1855201721191406, "step": 783 }, { "epoch": 0.17, "learning_rate": 9.916023939013764e-06, "logits/chosen": -0.30424922704696655, "logits/rejected": -0.2661372721195221, "logps/chosen": -163.10919189453125, "logps/rejected": -111.53285217285156, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": 2.0698869228363037, "rewards/margins": 4.141025066375732, "rewards/rejected": -2.0711381435394287, "step": 784 }, { "epoch": 0.17, "learning_rate": 9.915696513009567e-06, "logits/chosen": -0.5087134838104248, "logits/rejected": -0.4873235523700714, "logps/chosen": -113.991943359375, "logps/rejected": -104.76377868652344, "loss": 0.7063, "rewards/accuracies": 1.0, "rewards/chosen": 0.38364869356155396, "rewards/margins": 1.4021575450897217, "rewards/rejected": -1.0185089111328125, "step": 785 }, { "epoch": 0.17, "learning_rate": 9.915368455350185e-06, "logits/chosen": -0.8142722249031067, "logits/rejected": -0.819620668888092, "logps/chosen": -121.18627166748047, "logps/rejected": -109.71649169921875, "loss": 0.0637, "rewards/accuracies": 1.0, "rewards/chosen": 1.5885902643203735, "rewards/margins": 2.8557815551757812, "rewards/rejected": -1.2671912908554077, "step": 786 }, { "epoch": 0.17, "learning_rate": 9.915039766077772e-06, "logits/chosen": -0.4218410551548004, "logits/rejected": -0.37562939524650574, "logps/chosen": -216.68771362304688, "logps/rejected": -246.00103759765625, "loss": 0.1088, "rewards/accuracies": 1.0, "rewards/chosen": 1.7111297845840454, "rewards/margins": 4.047857761383057, "rewards/rejected": -2.3367278575897217, "step": 787 }, { "epoch": 0.17, "learning_rate": 9.914710445234567e-06, "logits/chosen": -0.8114022016525269, "logits/rejected": -0.8114022016525269, "logps/chosen": -140.067138671875, "logps/rejected": -140.067138671875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -1.5074386596679688, "rewards/margins": 0.0, "rewards/rejected": -1.5074386596679688, "step": 788 }, { "epoch": 0.17, "learning_rate": 9.914380492862883e-06, "logits/chosen": -0.6960569024085999, "logits/rejected": -0.7372633814811707, "logps/chosen": -194.95704650878906, "logps/rejected": -43.500797271728516, "loss": 0.1262, "rewards/accuracies": 1.0, "rewards/chosen": -0.0436248779296875, "rewards/margins": 1.4293323755264282, "rewards/rejected": -1.4729572534561157, "step": 789 }, { "epoch": 0.17, "learning_rate": 9.91404990900512e-06, "logits/chosen": -0.7017964720726013, "logits/rejected": -0.7046346664428711, "logps/chosen": -203.04443359375, "logps/rejected": -170.01011657714844, "loss": 0.0345, "rewards/accuracies": 1.0, "rewards/chosen": 0.5565094351768494, "rewards/margins": 2.645298957824707, "rewards/rejected": -2.088789463043213, "step": 790 }, { "epoch": 0.18, "learning_rate": 9.913718693703755e-06, "logits/chosen": -0.9090760946273804, "logits/rejected": -0.8307234644889832, "logps/chosen": -139.37002563476562, "logps/rejected": -307.99676513671875, "loss": 0.3037, "rewards/accuracies": 1.0, "rewards/chosen": 1.7062561511993408, "rewards/margins": 0.20825505256652832, "rewards/rejected": 1.4980010986328125, "step": 791 }, { "epoch": 0.18, "learning_rate": 9.91338684700135e-06, "logits/chosen": -0.47179582715034485, "logits/rejected": -0.38861650228500366, "logps/chosen": -136.47128295898438, "logps/rejected": -181.35708618164062, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": 1.704280138015747, "rewards/margins": 3.915309190750122, "rewards/rejected": -2.211029052734375, "step": 792 }, { "epoch": 0.18, "learning_rate": 9.91305436894055e-06, "logits/chosen": -0.7891064882278442, "logits/rejected": -0.8109891414642334, "logps/chosen": -105.83304595947266, "logps/rejected": -43.094966888427734, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/chosen": 1.7687324285507202, "rewards/margins": 2.6295878887176514, "rewards/rejected": -0.8608555197715759, "step": 793 }, { "epoch": 0.18, "learning_rate": 9.912721259564072e-06, "logits/chosen": -0.6042090654373169, "logits/rejected": -0.4072606563568115, "logps/chosen": -238.96534729003906, "logps/rejected": -439.1011962890625, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": 0.7886368036270142, "rewards/margins": 7.762147426605225, "rewards/rejected": -6.9735107421875, "step": 794 }, { "epoch": 0.18, "learning_rate": 9.91238751891472e-06, "logits/chosen": -0.774448573589325, "logits/rejected": -0.6123135685920715, "logps/chosen": -210.13568115234375, "logps/rejected": -245.81619262695312, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 5.179541110992432, "rewards/margins": 7.679315567016602, "rewards/rejected": -2.499774217605591, "step": 795 }, { "epoch": 0.18, "learning_rate": 9.912053147035383e-06, "logits/chosen": -0.7769671678543091, "logits/rejected": -0.7092117667198181, "logps/chosen": -237.67141723632812, "logps/rejected": -229.38619995117188, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 4.545398235321045, "rewards/margins": 6.829756736755371, "rewards/rejected": -2.284358263015747, "step": 796 }, { "epoch": 0.18, "learning_rate": 9.911718143969024e-06, "logits/chosen": -1.0013667345046997, "logits/rejected": -0.9806984066963196, "logps/chosen": -102.5615463256836, "logps/rejected": -128.36041259765625, "loss": 0.0769, "rewards/accuracies": 1.0, "rewards/chosen": 1.410620927810669, "rewards/margins": 1.8039588928222656, "rewards/rejected": -0.39333802461624146, "step": 797 }, { "epoch": 0.18, "learning_rate": 9.911382509758692e-06, "logits/chosen": -0.5002003908157349, "logits/rejected": -0.4971620440483093, "logps/chosen": -97.10685729980469, "logps/rejected": -178.6947021484375, "loss": 1.2021, "rewards/accuracies": 0.0, "rewards/chosen": 0.1489402800798416, "rewards/margins": -2.2524619102478027, "rewards/rejected": 2.401402235031128, "step": 798 }, { "epoch": 0.18, "learning_rate": 9.911046244447515e-06, "logits/chosen": -0.3466818034648895, "logits/rejected": -0.33234643936157227, "logps/chosen": -118.34696197509766, "logps/rejected": -186.697509765625, "loss": 3.0417, "rewards/accuracies": 0.0, "rewards/chosen": -2.667283773422241, "rewards/margins": -4.001020908355713, "rewards/rejected": 1.3337372541427612, "step": 799 }, { "epoch": 0.18, "learning_rate": 9.910709348078699e-06, "logits/chosen": -0.5637933611869812, "logits/rejected": -0.47145816683769226, "logps/chosen": -63.434844970703125, "logps/rejected": -140.04025268554688, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": 1.018579125404358, "rewards/margins": 5.469303131103516, "rewards/rejected": -4.450724124908447, "step": 800 }, { "epoch": 0.18, "learning_rate": 9.910371820695538e-06, "logits/chosen": -0.6777539253234863, "logits/rejected": -0.6393725872039795, "logps/chosen": -172.53053283691406, "logps/rejected": -202.56854248046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 1.3369919061660767, "rewards/margins": 6.425848484039307, "rewards/rejected": -5.0888566970825195, "step": 801 }, { "epoch": 0.18, "learning_rate": 9.910033662341403e-06, "logits/chosen": -0.4659447968006134, "logits/rejected": 0.18900060653686523, "logps/chosen": -81.22736358642578, "logps/rejected": -104.99253845214844, "loss": 0.1233, "rewards/accuracies": 1.0, "rewards/chosen": -1.2642624378204346, "rewards/margins": 1.3020637035369873, "rewards/rejected": -2.566326141357422, "step": 802 }, { "epoch": 0.18, "learning_rate": 9.909694873059745e-06, "logits/chosen": -0.864676296710968, "logits/rejected": -0.9639559388160706, "logps/chosen": -169.6015625, "logps/rejected": -135.23426818847656, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": 2.15207839012146, "rewards/margins": 4.4564690589904785, "rewards/rejected": -2.3043906688690186, "step": 803 }, { "epoch": 0.18, "learning_rate": 9.909355452894098e-06, "logits/chosen": -0.6371459364891052, "logits/rejected": -0.565222978591919, "logps/chosen": -229.69668579101562, "logps/rejected": -298.9894714355469, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 1.3571258783340454, "rewards/margins": 6.062963962554932, "rewards/rejected": -4.705838203430176, "step": 804 }, { "epoch": 0.18, "learning_rate": 9.909015401888077e-06, "logits/chosen": -0.49940061569213867, "logits/rejected": 0.06763704121112823, "logps/chosen": -185.20718383789062, "logps/rejected": -326.6163330078125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 2.593292236328125, "rewards/margins": 17.419870376586914, "rewards/rejected": -14.826578140258789, "step": 805 }, { "epoch": 0.18, "learning_rate": 9.908674720085378e-06, "logits/chosen": -0.5854490995407104, "logits/rejected": -0.5107518434524536, "logps/chosen": -221.16468811035156, "logps/rejected": -272.48284912109375, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": 1.000756859779358, "rewards/margins": 3.8902602195739746, "rewards/rejected": -2.8895034790039062, "step": 806 }, { "epoch": 0.18, "learning_rate": 9.908333407529779e-06, "logits/chosen": -0.6813992857933044, "logits/rejected": -0.6813992857933044, "logps/chosen": -102.47874450683594, "logps/rejected": -102.47874450683594, "loss": 0.348, "rewards/accuracies": 0.0, "rewards/chosen": -1.511572241783142, "rewards/margins": 0.0, "rewards/rejected": -1.511572241783142, "step": 807 }, { "epoch": 0.18, "learning_rate": 9.907991464265136e-06, "logits/chosen": -0.876438558101654, "logits/rejected": -0.8335138559341431, "logps/chosen": -98.42033386230469, "logps/rejected": -133.26385498046875, "loss": 0.2051, "rewards/accuracies": 1.0, "rewards/chosen": 0.19174805283546448, "rewards/margins": 4.267953872680664, "rewards/rejected": -4.076205730438232, "step": 808 }, { "epoch": 0.18, "learning_rate": 9.907648890335387e-06, "logits/chosen": -0.5974674224853516, "logits/rejected": -0.5800562500953674, "logps/chosen": -130.90773010253906, "logps/rejected": -41.16242218017578, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": 2.2114486694335938, "rewards/margins": 3.590623378753662, "rewards/rejected": -1.379174828529358, "step": 809 }, { "epoch": 0.18, "learning_rate": 9.907305685784553e-06, "logits/chosen": -0.7978003621101379, "logits/rejected": -0.7828525900840759, "logps/chosen": -96.59492492675781, "logps/rejected": -95.9257583618164, "loss": 0.5846, "rewards/accuracies": 0.0, "rewards/chosen": 0.11253662407398224, "rewards/margins": -0.5392662286758423, "rewards/rejected": 0.6518028378486633, "step": 810 }, { "epoch": 0.18, "learning_rate": 9.906961850656737e-06, "logits/chosen": -0.688819408416748, "logits/rejected": -0.6852353811264038, "logps/chosen": -127.0928726196289, "logps/rejected": -109.1422119140625, "loss": 0.2401, "rewards/accuracies": 1.0, "rewards/chosen": 1.540596842765808, "rewards/margins": 3.256434679031372, "rewards/rejected": -1.715837836265564, "step": 811 }, { "epoch": 0.18, "learning_rate": 9.906617384996118e-06, "logits/chosen": -0.5076141953468323, "logits/rejected": -0.4403987526893616, "logps/chosen": -209.64529418945312, "logps/rejected": -109.78620147705078, "loss": 0.2221, "rewards/accuracies": 1.0, "rewards/chosen": 3.939288377761841, "rewards/margins": 4.817319393157959, "rewards/rejected": -0.8780311942100525, "step": 812 }, { "epoch": 0.18, "learning_rate": 9.906272288846962e-06, "logits/chosen": -0.933037281036377, "logits/rejected": -0.9379602074623108, "logps/chosen": -168.48826599121094, "logps/rejected": -145.42498779296875, "loss": 0.0376, "rewards/accuracies": 1.0, "rewards/chosen": 2.1212356090545654, "rewards/margins": 2.8149261474609375, "rewards/rejected": -0.6936904788017273, "step": 813 }, { "epoch": 0.18, "learning_rate": 9.90592656225361e-06, "logits/chosen": -0.2736523449420929, "logits/rejected": -0.28788256645202637, "logps/chosen": -57.92076110839844, "logps/rejected": -145.6292266845703, "loss": 0.1889, "rewards/accuracies": 1.0, "rewards/chosen": 0.4183288514614105, "rewards/margins": 2.6595680713653564, "rewards/rejected": -2.241239309310913, "step": 814 }, { "epoch": 0.18, "learning_rate": 9.905580205260487e-06, "logits/chosen": -0.48279696702957153, "logits/rejected": -0.4200122654438019, "logps/chosen": -261.053955078125, "logps/rejected": -284.553955078125, "loss": 0.1013, "rewards/accuracies": 1.0, "rewards/chosen": 1.14080810546875, "rewards/margins": 1.501592993736267, "rewards/rejected": -0.3607849180698395, "step": 815 }, { "epoch": 0.18, "learning_rate": 9.905233217912102e-06, "logits/chosen": -0.9120545387268066, "logits/rejected": -0.9151323437690735, "logps/chosen": -141.1177978515625, "logps/rejected": -139.3123779296875, "loss": 0.0924, "rewards/accuracies": 1.0, "rewards/chosen": 1.3577240705490112, "rewards/margins": 2.0145065784454346, "rewards/rejected": -0.6567825675010681, "step": 816 }, { "epoch": 0.18, "learning_rate": 9.904885600253038e-06, "logits/chosen": -0.5062954425811768, "logits/rejected": -0.4445548355579376, "logps/chosen": -85.92791748046875, "logps/rejected": -255.84523010253906, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 0.07941818237304688, "rewards/margins": 4.123627662658691, "rewards/rejected": -4.0442094802856445, "step": 817 }, { "epoch": 0.18, "learning_rate": 9.904537352327968e-06, "logits/chosen": -0.9878899455070496, "logits/rejected": -0.9715358018875122, "logps/chosen": -186.02503967285156, "logps/rejected": -219.61412048339844, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": 0.3840652406215668, "rewards/margins": 4.747704982757568, "rewards/rejected": -4.363639831542969, "step": 818 }, { "epoch": 0.18, "learning_rate": 9.904188474181637e-06, "logits/chosen": -0.6727921366691589, "logits/rejected": -0.6926409006118774, "logps/chosen": -181.50686645507812, "logps/rejected": -59.986515045166016, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": 1.1683425903320312, "rewards/margins": 3.181251287460327, "rewards/rejected": -2.012908697128296, "step": 819 }, { "epoch": 0.18, "learning_rate": 9.903838965858877e-06, "logits/chosen": -0.868825376033783, "logits/rejected": -0.8566912412643433, "logps/chosen": -46.09511947631836, "logps/rejected": -74.49787902832031, "loss": 0.1312, "rewards/accuracies": 1.0, "rewards/chosen": 0.6465389132499695, "rewards/margins": 1.4184939861297607, "rewards/rejected": -0.771955132484436, "step": 820 }, { "epoch": 0.18, "learning_rate": 9.9034888274046e-06, "logits/chosen": -0.6349717378616333, "logits/rejected": -0.6201197504997253, "logps/chosen": -166.81002807617188, "logps/rejected": -255.59130859375, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": 1.5279449224472046, "rewards/margins": 3.1927123069763184, "rewards/rejected": -1.6647675037384033, "step": 821 }, { "epoch": 0.18, "learning_rate": 9.903138058863793e-06, "logits/chosen": -0.6626745462417603, "logits/rejected": -0.6626745462417603, "logps/chosen": -111.78794860839844, "logps/rejected": -111.78794860839844, "loss": 0.5165, "rewards/accuracies": 0.0, "rewards/chosen": 0.31326523423194885, "rewards/margins": 0.0, "rewards/rejected": 0.31326523423194885, "step": 822 }, { "epoch": 0.18, "learning_rate": 9.902786660281533e-06, "logits/chosen": -0.29997214674949646, "logits/rejected": -0.2302502989768982, "logps/chosen": -60.72734451293945, "logps/rejected": -127.60607147216797, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": 0.44480401277542114, "rewards/margins": 3.2334887981414795, "rewards/rejected": -2.788684844970703, "step": 823 }, { "epoch": 0.18, "learning_rate": 9.902434631702976e-06, "logits/chosen": -0.6864389181137085, "logits/rejected": -0.6536690592765808, "logps/chosen": -94.70906066894531, "logps/rejected": -234.47100830078125, "loss": 0.0734, "rewards/accuracies": 1.0, "rewards/chosen": 2.2338807582855225, "rewards/margins": 2.3619096279144287, "rewards/rejected": -0.12802886962890625, "step": 824 }, { "epoch": 0.18, "learning_rate": 9.902081973173352e-06, "logits/chosen": -0.4383462369441986, "logits/rejected": -0.4047170579433441, "logps/chosen": -75.15750885009766, "logps/rejected": -157.5220184326172, "loss": 1.1499, "rewards/accuracies": 1.0, "rewards/chosen": -0.2387947142124176, "rewards/margins": 1.2892403602600098, "rewards/rejected": -1.528035044670105, "step": 825 }, { "epoch": 0.18, "learning_rate": 9.901728684737977e-06, "logits/chosen": -0.6616297960281372, "logits/rejected": -0.6134929656982422, "logps/chosen": -105.3720703125, "logps/rejected": -116.71328735351562, "loss": 0.5429, "rewards/accuracies": 1.0, "rewards/chosen": 0.6987838745117188, "rewards/margins": 1.5974235534667969, "rewards/rejected": -0.8986396789550781, "step": 826 }, { "epoch": 0.18, "learning_rate": 9.901374766442252e-06, "logits/chosen": -0.4895940124988556, "logits/rejected": -0.4895940124988556, "logps/chosen": -152.68914794921875, "logps/rejected": -152.68914794921875, "loss": 0.3486, "rewards/accuracies": 0.0, "rewards/chosen": 0.22387848794460297, "rewards/margins": 0.0, "rewards/rejected": 0.22387848794460297, "step": 827 }, { "epoch": 0.18, "learning_rate": 9.901020218331652e-06, "logits/chosen": -0.9129829406738281, "logits/rejected": -0.939578652381897, "logps/chosen": -205.1771240234375, "logps/rejected": -120.6485595703125, "loss": 0.095, "rewards/accuracies": 1.0, "rewards/chosen": 1.1239013671875, "rewards/margins": 1.5691817998886108, "rewards/rejected": -0.4452804625034332, "step": 828 }, { "epoch": 0.18, "learning_rate": 9.900665040451735e-06, "logits/chosen": -0.4352608323097229, "logits/rejected": -0.4352608323097229, "logps/chosen": -57.76535415649414, "logps/rejected": -57.76535415649414, "loss": 0.5045, "rewards/accuracies": 0.0, "rewards/chosen": -0.9553890228271484, "rewards/margins": 0.0, "rewards/rejected": -0.9553890228271484, "step": 829 }, { "epoch": 0.18, "learning_rate": 9.90030923284814e-06, "logits/chosen": -0.7722312211990356, "logits/rejected": -0.70350182056427, "logps/chosen": -211.216064453125, "logps/rejected": -312.61077880859375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 2.625866651535034, "rewards/margins": 7.538769721984863, "rewards/rejected": -4.91290283203125, "step": 830 }, { "epoch": 0.18, "learning_rate": 9.89995279556659e-06, "logits/chosen": -0.729229211807251, "logits/rejected": -0.729229211807251, "logps/chosen": -33.5405387878418, "logps/rejected": -33.5405387878418, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -0.6778404116630554, "rewards/margins": 0.0, "rewards/rejected": -0.6778404116630554, "step": 831 }, { "epoch": 0.18, "learning_rate": 9.899595728652883e-06, "logits/chosen": -0.8404524922370911, "logits/rejected": -0.8195958733558655, "logps/chosen": -72.41178131103516, "logps/rejected": -92.39653778076172, "loss": 0.1513, "rewards/accuracies": 1.0, "rewards/chosen": -0.5781509280204773, "rewards/margins": 1.1183784008026123, "rewards/rejected": -1.6965293884277344, "step": 832 }, { "epoch": 0.18, "learning_rate": 9.899238032152907e-06, "logits/chosen": -0.6205676794052124, "logits/rejected": -0.5904129147529602, "logps/chosen": -138.69979858398438, "logps/rejected": -134.363525390625, "loss": 0.1183, "rewards/accuracies": 1.0, "rewards/chosen": -0.06512756645679474, "rewards/margins": 1.4424995183944702, "rewards/rejected": -1.5076271295547485, "step": 833 }, { "epoch": 0.18, "learning_rate": 9.898879706112618e-06, "logits/chosen": -0.5927562117576599, "logits/rejected": -0.5850952863693237, "logps/chosen": -79.73957824707031, "logps/rejected": -83.21058654785156, "loss": 0.2569, "rewards/accuracies": 1.0, "rewards/chosen": 0.1814415007829666, "rewards/margins": 0.6349762082099915, "rewards/rejected": -0.45353469252586365, "step": 834 }, { "epoch": 0.18, "learning_rate": 9.898520750578065e-06, "logits/chosen": -0.6625739932060242, "logits/rejected": -0.6563750505447388, "logps/chosen": -93.75527954101562, "logps/rejected": -168.79823303222656, "loss": 0.2433, "rewards/accuracies": 1.0, "rewards/chosen": 0.42937469482421875, "rewards/margins": 0.7402527332305908, "rewards/rejected": -0.3108780086040497, "step": 835 }, { "epoch": 0.19, "learning_rate": 9.898161165595371e-06, "logits/chosen": -0.3533712327480316, "logits/rejected": -0.3256559371948242, "logps/chosen": -140.969482421875, "logps/rejected": -164.4554443359375, "loss": 0.2157, "rewards/accuracies": 1.0, "rewards/chosen": -2.9643828868865967, "rewards/margins": 0.6703643798828125, "rewards/rejected": -3.634747266769409, "step": 836 }, { "epoch": 0.19, "learning_rate": 9.897800951210741e-06, "logits/chosen": -1.0013015270233154, "logits/rejected": -1.2866259813308716, "logps/chosen": -90.81619262695312, "logps/rejected": -112.60943603515625, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": 0.6440262198448181, "rewards/margins": 2.6164581775665283, "rewards/rejected": -1.972432017326355, "step": 837 }, { "epoch": 0.19, "learning_rate": 9.897440107470463e-06, "logits/chosen": -0.7839307188987732, "logits/rejected": -0.6093515753746033, "logps/chosen": -149.52447509765625, "logps/rejected": -272.5466613769531, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": 0.23074646294116974, "rewards/margins": 8.633310317993164, "rewards/rejected": -8.40256404876709, "step": 838 }, { "epoch": 0.19, "learning_rate": 9.897078634420905e-06, "logits/chosen": -0.3947165310382843, "logits/rejected": -0.40526577830314636, "logps/chosen": -40.814117431640625, "logps/rejected": -56.951377868652344, "loss": 0.4832, "rewards/accuracies": 0.0, "rewards/chosen": -0.5586261749267578, "rewards/margins": -0.4839206635951996, "rewards/rejected": -0.07470550388097763, "step": 839 }, { "epoch": 0.19, "learning_rate": 9.896716532108515e-06, "logits/chosen": -0.832749605178833, "logits/rejected": -0.832749605178833, "logps/chosen": -151.15298461914062, "logps/rejected": -151.15298461914062, "loss": 0.6499, "rewards/accuracies": 0.0, "rewards/chosen": -3.322416067123413, "rewards/margins": 0.0, "rewards/rejected": -3.322416067123413, "step": 840 }, { "epoch": 0.19, "learning_rate": 9.896353800579823e-06, "logits/chosen": -0.6443510055541992, "logits/rejected": -0.6560949087142944, "logps/chosen": -196.33631896972656, "logps/rejected": -145.15362548828125, "loss": 0.0435, "rewards/accuracies": 1.0, "rewards/chosen": 1.719319224357605, "rewards/margins": 3.2720489501953125, "rewards/rejected": -1.552729845046997, "step": 841 }, { "epoch": 0.19, "learning_rate": 9.895990439881436e-06, "logits/chosen": -1.0393582582473755, "logits/rejected": -1.1437227725982666, "logps/chosen": -159.96102905273438, "logps/rejected": -71.40341186523438, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": 0.5373199582099915, "rewards/margins": 2.7723679542541504, "rewards/rejected": -2.2350480556488037, "step": 842 }, { "epoch": 0.19, "learning_rate": 9.895626450060047e-06, "logits/chosen": -0.7474889159202576, "logits/rejected": -0.7209068536758423, "logps/chosen": -202.33135986328125, "logps/rejected": -54.65201187133789, "loss": 0.0375, "rewards/accuracies": 1.0, "rewards/chosen": 0.9817901849746704, "rewards/margins": 2.606154441833496, "rewards/rejected": -1.6243641376495361, "step": 843 }, { "epoch": 0.19, "learning_rate": 9.89526183116243e-06, "logits/chosen": -0.6428936123847961, "logits/rejected": -0.6428936123847961, "logps/chosen": -78.53298950195312, "logps/rejected": -78.53298950195312, "loss": 0.4126, "rewards/accuracies": 0.0, "rewards/chosen": -0.8000442385673523, "rewards/margins": 0.0, "rewards/rejected": -0.8000442385673523, "step": 844 }, { "epoch": 0.19, "learning_rate": 9.894896583235434e-06, "logits/chosen": -0.5338640213012695, "logits/rejected": -0.5462673902511597, "logps/chosen": -149.87307739257812, "logps/rejected": -202.0225830078125, "loss": 0.2275, "rewards/accuracies": 1.0, "rewards/chosen": 1.0580047369003296, "rewards/margins": 6.229657173156738, "rewards/rejected": -5.171652317047119, "step": 845 }, { "epoch": 0.19, "learning_rate": 9.894530706325994e-06, "logits/chosen": -0.7270213961601257, "logits/rejected": -0.7053635120391846, "logps/chosen": -106.25827026367188, "logps/rejected": -120.59143829345703, "loss": 0.1245, "rewards/accuracies": 1.0, "rewards/chosen": 0.21430054306983948, "rewards/margins": 2.0454490184783936, "rewards/rejected": -1.8311485052108765, "step": 846 }, { "epoch": 0.19, "learning_rate": 9.894164200481124e-06, "logits/chosen": -0.6949694752693176, "logits/rejected": -0.6995921730995178, "logps/chosen": -248.014404296875, "logps/rejected": -246.7609100341797, "loss": 0.6565, "rewards/accuracies": 0.0, "rewards/chosen": 2.3288819789886475, "rewards/margins": -0.9985334873199463, "rewards/rejected": 3.3274154663085938, "step": 847 }, { "epoch": 0.19, "learning_rate": 9.89379706574792e-06, "logits/chosen": -0.6174712777137756, "logits/rejected": -0.6309969425201416, "logps/chosen": -137.65675354003906, "logps/rejected": -153.72604370117188, "loss": 0.0778, "rewards/accuracies": 1.0, "rewards/chosen": 0.7217758297920227, "rewards/margins": 1.8378372192382812, "rewards/rejected": -1.1160614490509033, "step": 848 }, { "epoch": 0.19, "learning_rate": 9.893429302173558e-06, "logits/chosen": -0.7252703309059143, "logits/rejected": -0.6483240127563477, "logps/chosen": -265.26275634765625, "logps/rejected": -160.6678466796875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 3.8176848888397217, "rewards/margins": 6.874981880187988, "rewards/rejected": -3.0572967529296875, "step": 849 }, { "epoch": 0.19, "learning_rate": 9.893060909805294e-06, "logits/chosen": -0.5837864875793457, "logits/rejected": -0.5172008275985718, "logps/chosen": -192.34515380859375, "logps/rejected": -303.64697265625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 3.1618499755859375, "rewards/margins": 6.289850234985352, "rewards/rejected": -3.128000020980835, "step": 850 }, { "epoch": 0.19, "learning_rate": 9.892691888690466e-06, "logits/chosen": -0.7750188708305359, "logits/rejected": -0.7830173373222351, "logps/chosen": -144.0203857421875, "logps/rejected": -135.89987182617188, "loss": 0.3453, "rewards/accuracies": 1.0, "rewards/chosen": -2.9380249977111816, "rewards/margins": 0.21373677253723145, "rewards/rejected": -3.151761770248413, "step": 851 }, { "epoch": 0.19, "learning_rate": 9.892322238876492e-06, "logits/chosen": -0.908429741859436, "logits/rejected": -0.9134759306907654, "logps/chosen": -231.25050354003906, "logps/rejected": -215.8017578125, "loss": 0.1451, "rewards/accuracies": 1.0, "rewards/chosen": 1.313624620437622, "rewards/margins": 1.089399814605713, "rewards/rejected": 0.22422485053539276, "step": 852 }, { "epoch": 0.19, "learning_rate": 9.89195196041087e-06, "logits/chosen": -0.49297481775283813, "logits/rejected": -0.4704208970069885, "logps/chosen": -82.91101837158203, "logps/rejected": -266.15411376953125, "loss": 0.1473, "rewards/accuracies": 1.0, "rewards/chosen": 0.016887664794921875, "rewards/margins": 1.2861305475234985, "rewards/rejected": -1.2692428827285767, "step": 853 }, { "epoch": 0.19, "learning_rate": 9.891581053341182e-06, "logits/chosen": -0.5831769704818726, "logits/rejected": -0.5831769704818726, "logps/chosen": -135.12496948242188, "logps/rejected": -135.12496948242188, "loss": 0.3568, "rewards/accuracies": 0.0, "rewards/chosen": -3.00300669670105, "rewards/margins": 0.0, "rewards/rejected": -3.00300669670105, "step": 854 }, { "epoch": 0.19, "learning_rate": 9.891209517715088e-06, "logits/chosen": -1.070633053779602, "logits/rejected": -0.9685553312301636, "logps/chosen": -123.00761413574219, "logps/rejected": -137.63009643554688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 1.0060089826583862, "rewards/margins": 6.361506938934326, "rewards/rejected": -5.35549783706665, "step": 855 }, { "epoch": 0.19, "learning_rate": 9.890837353580327e-06, "logits/chosen": -0.8033985495567322, "logits/rejected": -0.34707894921302795, "logps/chosen": -194.17495727539062, "logps/rejected": -176.10833740234375, "loss": 0.2249, "rewards/accuracies": 1.0, "rewards/chosen": 0.9046630859375, "rewards/margins": 4.282895088195801, "rewards/rejected": -3.3782317638397217, "step": 856 }, { "epoch": 0.19, "learning_rate": 9.890464560984725e-06, "logits/chosen": -1.089979648590088, "logits/rejected": -1.199323058128357, "logps/chosen": -152.4207000732422, "logps/rejected": -47.79032897949219, "loss": 0.918, "rewards/accuracies": 0.0, "rewards/chosen": -1.491145372390747, "rewards/margins": -0.7587807178497314, "rewards/rejected": -0.7323646545410156, "step": 857 }, { "epoch": 0.19, "learning_rate": 9.890091139976183e-06, "logits/chosen": -0.696526050567627, "logits/rejected": -0.6572538614273071, "logps/chosen": -65.2085952758789, "logps/rejected": -74.13018035888672, "loss": 0.0736, "rewards/accuracies": 1.0, "rewards/chosen": 0.6096428036689758, "rewards/margins": 1.8434715270996094, "rewards/rejected": -1.2338287830352783, "step": 858 }, { "epoch": 0.19, "learning_rate": 9.889717090602685e-06, "logits/chosen": -0.6840128898620605, "logits/rejected": -0.616186261177063, "logps/chosen": -82.49919891357422, "logps/rejected": -44.90579605102539, "loss": 0.1344, "rewards/accuracies": 1.0, "rewards/chosen": 0.692706286907196, "rewards/margins": 2.4250264167785645, "rewards/rejected": -1.7323200702667236, "step": 859 }, { "epoch": 0.19, "learning_rate": 9.889342412912296e-06, "logits/chosen": -0.7864828109741211, "logits/rejected": -0.7291152477264404, "logps/chosen": -148.5522003173828, "logps/rejected": -220.11712646484375, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": 1.0664291381835938, "rewards/margins": 2.8025832176208496, "rewards/rejected": -1.7361541986465454, "step": 860 }, { "epoch": 0.19, "learning_rate": 9.88896710695316e-06, "logits/chosen": -0.7603144645690918, "logits/rejected": -0.6133226752281189, "logps/chosen": -193.60928344726562, "logps/rejected": -308.0606689453125, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": 4.725549221038818, "rewards/margins": 11.138699531555176, "rewards/rejected": -6.413150310516357, "step": 861 }, { "epoch": 0.19, "learning_rate": 9.888591172773502e-06, "logits/chosen": -0.7628702521324158, "logits/rejected": -0.803081214427948, "logps/chosen": -92.23278045654297, "logps/rejected": -40.18014907836914, "loss": 0.2879, "rewards/accuracies": 1.0, "rewards/chosen": -0.09533005207777023, "rewards/margins": 0.2802268862724304, "rewards/rejected": -0.37555694580078125, "step": 862 }, { "epoch": 0.19, "learning_rate": 9.888214610421633e-06, "logits/chosen": -1.0183695554733276, "logits/rejected": -1.0293772220611572, "logps/chosen": -116.78982543945312, "logps/rejected": -122.24091339111328, "loss": 0.0513, "rewards/accuracies": 1.0, "rewards/chosen": 0.7585975527763367, "rewards/margins": 2.2253615856170654, "rewards/rejected": -1.4667640924453735, "step": 863 }, { "epoch": 0.19, "learning_rate": 9.887837419945937e-06, "logits/chosen": -0.9311139583587646, "logits/rejected": -0.9261481165885925, "logps/chosen": -170.121826171875, "logps/rejected": -92.74647521972656, "loss": 0.6758, "rewards/accuracies": 0.0, "rewards/chosen": -0.0917205810546875, "rewards/margins": -1.051973819732666, "rewards/rejected": 0.9602531790733337, "step": 864 }, { "epoch": 0.19, "learning_rate": 9.887459601394881e-06, "logits/chosen": -0.832639217376709, "logits/rejected": -0.8281622529029846, "logps/chosen": -138.39964294433594, "logps/rejected": -103.73002624511719, "loss": 0.2586, "rewards/accuracies": 1.0, "rewards/chosen": 1.7834656238555908, "rewards/margins": 0.49572980403900146, "rewards/rejected": 1.2877358198165894, "step": 865 }, { "epoch": 0.19, "learning_rate": 9.887081154817015e-06, "logits/chosen": -0.8608192801475525, "logits/rejected": -0.876787543296814, "logps/chosen": -103.25796508789062, "logps/rejected": -74.87995910644531, "loss": 0.8616, "rewards/accuracies": 0.0, "rewards/chosen": -0.5184463858604431, "rewards/margins": -1.5258698463439941, "rewards/rejected": 1.0074234008789062, "step": 866 }, { "epoch": 0.19, "learning_rate": 9.88670208026097e-06, "logits/chosen": -0.7889797687530518, "logits/rejected": -0.7838693857192993, "logps/chosen": -86.39824676513672, "logps/rejected": -179.0496063232422, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": 0.3270927369594574, "rewards/margins": 4.9258294105529785, "rewards/rejected": -4.598736763000488, "step": 867 }, { "epoch": 0.19, "learning_rate": 9.886322377775455e-06, "logits/chosen": -0.6702223420143127, "logits/rejected": -0.6702223420143127, "logps/chosen": -157.73995971679688, "logps/rejected": -157.73995971679688, "loss": 0.3472, "rewards/accuracies": 0.0, "rewards/chosen": -2.4022369384765625, "rewards/margins": 0.0, "rewards/rejected": -2.4022369384765625, "step": 868 }, { "epoch": 0.19, "learning_rate": 9.885942047409262e-06, "logits/chosen": -0.538053035736084, "logits/rejected": -0.5490502715110779, "logps/chosen": -66.48438262939453, "logps/rejected": -74.33782958984375, "loss": 0.1926, "rewards/accuracies": 1.0, "rewards/chosen": 1.1872780323028564, "rewards/margins": 0.7649803161621094, "rewards/rejected": 0.4222976863384247, "step": 869 }, { "epoch": 0.19, "learning_rate": 9.885561089211259e-06, "logits/chosen": -0.8404435515403748, "logits/rejected": -0.8207713961601257, "logps/chosen": -239.29200744628906, "logps/rejected": -337.76611328125, "loss": 0.8859, "rewards/accuracies": 1.0, "rewards/chosen": 3.535548448562622, "rewards/margins": 5.743895053863525, "rewards/rejected": -2.2083466053009033, "step": 870 }, { "epoch": 0.19, "learning_rate": 9.885179503230403e-06, "logits/chosen": -0.7650158405303955, "logits/rejected": -0.7597769498825073, "logps/chosen": -128.30715942382812, "logps/rejected": -107.2166976928711, "loss": 0.104, "rewards/accuracies": 1.0, "rewards/chosen": -0.09528809040784836, "rewards/margins": 2.29079532623291, "rewards/rejected": -2.3860833644866943, "step": 871 }, { "epoch": 0.19, "learning_rate": 9.884797289515723e-06, "logits/chosen": -1.0656213760375977, "logits/rejected": -0.9760881066322327, "logps/chosen": -153.09165954589844, "logps/rejected": -244.11697387695312, "loss": 1.6828, "rewards/accuracies": 0.0, "rewards/chosen": -2.5579164028167725, "rewards/margins": -3.3303256034851074, "rewards/rejected": 0.7724090814590454, "step": 872 }, { "epoch": 0.19, "learning_rate": 9.884414448116335e-06, "logits/chosen": -0.5443845391273499, "logits/rejected": -0.5443845391273499, "logps/chosen": -169.14915466308594, "logps/rejected": -169.14915466308594, "loss": 0.3475, "rewards/accuracies": 0.0, "rewards/chosen": -5.55893087387085, "rewards/margins": 0.0, "rewards/rejected": -5.55893087387085, "step": 873 }, { "epoch": 0.19, "learning_rate": 9.88403097908143e-06, "logits/chosen": -1.162062406539917, "logits/rejected": -1.108539342880249, "logps/chosen": -138.89675903320312, "logps/rejected": -158.76837158203125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": 1.6097854375839233, "rewards/margins": 4.531374931335449, "rewards/rejected": -2.9215896129608154, "step": 874 }, { "epoch": 0.19, "learning_rate": 9.883646882460287e-06, "logits/chosen": -0.625133752822876, "logits/rejected": -0.5607445240020752, "logps/chosen": -70.94552612304688, "logps/rejected": -42.98231506347656, "loss": 0.2802, "rewards/accuracies": 1.0, "rewards/chosen": -0.17933045327663422, "rewards/margins": 1.008583426475525, "rewards/rejected": -1.1879138946533203, "step": 875 }, { "epoch": 0.19, "learning_rate": 9.883262158302259e-06, "logits/chosen": -1.1101560592651367, "logits/rejected": -1.1350057125091553, "logps/chosen": -131.1171112060547, "logps/rejected": -67.97273254394531, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": 1.051265001296997, "rewards/margins": 4.347049713134766, "rewards/rejected": -3.2957844734191895, "step": 876 }, { "epoch": 0.19, "learning_rate": 9.882876806656783e-06, "logits/chosen": -0.5761884450912476, "logits/rejected": -0.45419925451278687, "logps/chosen": -240.0995330810547, "logps/rejected": -356.845458984375, "loss": 0.0652, "rewards/accuracies": 1.0, "rewards/chosen": 0.6475570797920227, "rewards/margins": 1.9721055030822754, "rewards/rejected": -1.324548363685608, "step": 877 }, { "epoch": 0.19, "learning_rate": 9.882490827573375e-06, "logits/chosen": -0.8120752573013306, "logits/rejected": -0.8191591501235962, "logps/chosen": -102.39521026611328, "logps/rejected": -89.84883117675781, "loss": 0.2673, "rewards/accuracies": 1.0, "rewards/chosen": -0.312652587890625, "rewards/margins": 0.40224915742874146, "rewards/rejected": -0.7149017453193665, "step": 878 }, { "epoch": 0.19, "learning_rate": 9.882104221101634e-06, "logits/chosen": -0.5504143238067627, "logits/rejected": -0.5286214351654053, "logps/chosen": -146.8690185546875, "logps/rejected": -270.7805480957031, "loss": 0.0391, "rewards/accuracies": 1.0, "rewards/chosen": 3.187396287918091, "rewards/margins": 2.833883762359619, "rewards/rejected": 0.35351258516311646, "step": 879 }, { "epoch": 0.19, "learning_rate": 9.881716987291235e-06, "logits/chosen": -0.45500117540359497, "logits/rejected": -0.45500117540359497, "logps/chosen": -161.27662658691406, "logps/rejected": -161.27662658691406, "loss": 0.6791, "rewards/accuracies": 0.0, "rewards/chosen": -3.39701247215271, "rewards/margins": 0.0, "rewards/rejected": -3.39701247215271, "step": 880 }, { "epoch": 0.19, "learning_rate": 9.88132912619194e-06, "logits/chosen": -0.7980444431304932, "logits/rejected": -0.8211427330970764, "logps/chosen": -129.36334228515625, "logps/rejected": -154.560546875, "loss": 0.9871, "rewards/accuracies": 0.0, "rewards/chosen": -0.21775054931640625, "rewards/margins": -1.8234847784042358, "rewards/rejected": 1.6057342290878296, "step": 881 }, { "epoch": 0.2, "learning_rate": 9.880940637853585e-06, "logits/chosen": -0.7584762573242188, "logits/rejected": -0.7053876519203186, "logps/chosen": -121.75881958007812, "logps/rejected": -237.34170532226562, "loss": 0.042, "rewards/accuracies": 1.0, "rewards/chosen": -0.06552276760339737, "rewards/margins": 4.9630126953125, "rewards/rejected": -5.02853536605835, "step": 882 }, { "epoch": 0.2, "learning_rate": 9.880551522326093e-06, "logits/chosen": -0.44029292464256287, "logits/rejected": -0.4365713596343994, "logps/chosen": -109.40768432617188, "logps/rejected": -85.27947235107422, "loss": 0.1332, "rewards/accuracies": 1.0, "rewards/chosen": 0.358316034078598, "rewards/margins": 1.1886818408966064, "rewards/rejected": -0.830365777015686, "step": 883 }, { "epoch": 0.2, "learning_rate": 9.880161779659463e-06, "logits/chosen": -0.8138925433158875, "logits/rejected": -0.3668684959411621, "logps/chosen": -180.38327026367188, "logps/rejected": -285.5981140136719, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": 0.5539810061454773, "rewards/margins": 13.977842330932617, "rewards/rejected": -13.423861503601074, "step": 884 }, { "epoch": 0.2, "learning_rate": 9.879771409903775e-06, "logits/chosen": -1.1299539804458618, "logits/rejected": -1.1231417655944824, "logps/chosen": -124.87484741210938, "logps/rejected": -142.695556640625, "loss": 0.2272, "rewards/accuracies": 1.0, "rewards/chosen": -2.277613878250122, "rewards/margins": 1.3740935325622559, "rewards/rejected": -3.651707410812378, "step": 885 }, { "epoch": 0.2, "learning_rate": 9.879380413109193e-06, "logits/chosen": -0.7859264612197876, "logits/rejected": -0.7711543440818787, "logps/chosen": -82.74755859375, "logps/rejected": -162.70590209960938, "loss": 0.0624, "rewards/accuracies": 1.0, "rewards/chosen": 0.3317398130893707, "rewards/margins": 2.047084093093872, "rewards/rejected": -1.7153443098068237, "step": 886 }, { "epoch": 0.2, "learning_rate": 9.878988789325955e-06, "logits/chosen": -0.45630592107772827, "logits/rejected": -0.47488638758659363, "logps/chosen": -79.96165466308594, "logps/rejected": -88.84469604492188, "loss": 0.9611, "rewards/accuracies": 0.0, "rewards/chosen": -2.54447865486145, "rewards/margins": -0.8828986883163452, "rewards/rejected": -1.661579966545105, "step": 887 }, { "epoch": 0.2, "learning_rate": 9.878596538604388e-06, "logits/chosen": -0.6525511145591736, "logits/rejected": -0.6512984037399292, "logps/chosen": -81.68258666992188, "logps/rejected": -126.85517883300781, "loss": 0.2477, "rewards/accuracies": 1.0, "rewards/chosen": -0.5330551266670227, "rewards/margins": 1.0491669178009033, "rewards/rejected": -1.5822219848632812, "step": 888 }, { "epoch": 0.2, "learning_rate": 9.878203660994894e-06, "logits/chosen": -1.16559636592865, "logits/rejected": -1.254088044166565, "logps/chosen": -114.89547729492188, "logps/rejected": -70.07186889648438, "loss": 1.7369, "rewards/accuracies": 1.0, "rewards/chosen": -1.038458228111267, "rewards/margins": 1.7813934087753296, "rewards/rejected": -2.8198516368865967, "step": 889 }, { "epoch": 0.2, "learning_rate": 9.877810156547956e-06, "logits/chosen": -0.6446656584739685, "logits/rejected": -0.6286717653274536, "logps/chosen": -137.89630126953125, "logps/rejected": -112.27854919433594, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.2854721248149872, "rewards/margins": 2.1462509632110596, "rewards/rejected": -1.86077880859375, "step": 890 }, { "epoch": 0.2, "learning_rate": 9.877416025314139e-06, "logits/chosen": -0.7520719170570374, "logits/rejected": -0.7510973811149597, "logps/chosen": -93.33833312988281, "logps/rejected": -205.8202667236328, "loss": 0.1045, "rewards/accuracies": 1.0, "rewards/chosen": -0.6661178469657898, "rewards/margins": 2.0381133556365967, "rewards/rejected": -2.7042312622070312, "step": 891 }, { "epoch": 0.2, "learning_rate": 9.877021267344087e-06, "logits/chosen": -0.4412018954753876, "logits/rejected": -0.38920044898986816, "logps/chosen": -204.41452026367188, "logps/rejected": -170.815185546875, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": 3.718235731124878, "rewards/margins": 7.352116584777832, "rewards/rejected": -3.633880615234375, "step": 892 }, { "epoch": 0.2, "learning_rate": 9.876625882688526e-06, "logits/chosen": -0.6958832740783691, "logits/rejected": -0.5801172852516174, "logps/chosen": -83.58573913574219, "logps/rejected": -125.78924560546875, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.8585365414619446, "rewards/margins": 4.3377509117126465, "rewards/rejected": -5.196287631988525, "step": 893 }, { "epoch": 0.2, "learning_rate": 9.876229871398263e-06, "logits/chosen": -0.6863581538200378, "logits/rejected": -0.6863581538200378, "logps/chosen": -169.1049346923828, "logps/rejected": -169.1049346923828, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -4.543403148651123, "rewards/margins": 0.0, "rewards/rejected": -4.543403148651123, "step": 894 }, { "epoch": 0.2, "learning_rate": 9.875833233524183e-06, "logits/chosen": -0.5684399008750916, "logits/rejected": -0.5684399008750916, "logps/chosen": -150.70611572265625, "logps/rejected": -150.70611572265625, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -1.3986740112304688, "rewards/margins": 0.0, "rewards/rejected": -1.3986740112304688, "step": 895 }, { "epoch": 0.2, "learning_rate": 9.875435969117254e-06, "logits/chosen": -0.469575971364975, "logits/rejected": -0.3933859169483185, "logps/chosen": -170.08004760742188, "logps/rejected": -157.86422729492188, "loss": 0.24, "rewards/accuracies": 1.0, "rewards/chosen": -1.0120575428009033, "rewards/margins": 2.765749454498291, "rewards/rejected": -3.7778069972991943, "step": 896 }, { "epoch": 0.2, "learning_rate": 9.875038078228522e-06, "logits/chosen": -0.6066269278526306, "logits/rejected": -0.5636944770812988, "logps/chosen": -114.05801391601562, "logps/rejected": -155.41812133789062, "loss": 0.2266, "rewards/accuracies": 1.0, "rewards/chosen": -2.045117139816284, "rewards/margins": 0.5742173194885254, "rewards/rejected": -2.6193344593048096, "step": 897 }, { "epoch": 0.2, "learning_rate": 9.874639560909118e-06, "logits/chosen": -1.020080327987671, "logits/rejected": -0.9443391561508179, "logps/chosen": -125.406982421875, "logps/rejected": -225.51019287109375, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": 2.3525421619415283, "rewards/margins": 4.291358947753906, "rewards/rejected": -1.9388169050216675, "step": 898 }, { "epoch": 0.2, "learning_rate": 9.87424041721025e-06, "logits/chosen": -0.5905193090438843, "logits/rejected": -0.48961910605430603, "logps/chosen": -106.66682434082031, "logps/rejected": -287.2846374511719, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.38416367769241333, "rewards/margins": 6.24863338470459, "rewards/rejected": -6.6327972412109375, "step": 899 }, { "epoch": 0.2, "learning_rate": 9.873840647183204e-06, "logits/chosen": -0.7224101424217224, "logits/rejected": -0.4281827509403229, "logps/chosen": -105.74388122558594, "logps/rejected": -214.82650756835938, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": 0.04682007059454918, "rewards/margins": 4.489850044250488, "rewards/rejected": -4.443029880523682, "step": 900 }, { "epoch": 0.2, "learning_rate": 9.87344025087935e-06, "logits/chosen": -0.9777504205703735, "logits/rejected": -0.5019979476928711, "logps/chosen": -85.07044982910156, "logps/rejected": -367.81671142578125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 1.0147743225097656, "rewards/margins": 20.57844352722168, "rewards/rejected": -19.563669204711914, "step": 901 }, { "epoch": 0.2, "learning_rate": 9.87303922835014e-06, "logits/chosen": -0.508920431137085, "logits/rejected": -0.4797024726867676, "logps/chosen": -126.30874633789062, "logps/rejected": -105.48007202148438, "loss": 0.1821, "rewards/accuracies": 1.0, "rewards/chosen": 1.620123267173767, "rewards/margins": 1.1191115379333496, "rewards/rejected": 0.5010116696357727, "step": 902 }, { "epoch": 0.2, "learning_rate": 9.872637579647105e-06, "logits/chosen": -0.9110789895057678, "logits/rejected": -0.8881663084030151, "logps/chosen": -167.6426544189453, "logps/rejected": -176.8963623046875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 2.802189588546753, "rewards/margins": 8.155097007751465, "rewards/rejected": -5.352907657623291, "step": 903 }, { "epoch": 0.2, "learning_rate": 9.872235304821853e-06, "logits/chosen": -0.790669858455658, "logits/rejected": -0.8141142725944519, "logps/chosen": -130.72732543945312, "logps/rejected": -103.79473114013672, "loss": 0.094, "rewards/accuracies": 1.0, "rewards/chosen": -2.1519241333007812, "rewards/margins": 1.5775558948516846, "rewards/rejected": -3.729480028152466, "step": 904 }, { "epoch": 0.2, "learning_rate": 9.871832403926077e-06, "logits/chosen": -0.5607197284698486, "logits/rejected": -0.5544236302375793, "logps/chosen": -141.68634033203125, "logps/rejected": -185.91757202148438, "loss": 0.3858, "rewards/accuracies": 1.0, "rewards/chosen": -2.6791367530822754, "rewards/margins": 1.2719955444335938, "rewards/rejected": -3.951132297515869, "step": 905 }, { "epoch": 0.2, "learning_rate": 9.871428877011549e-06, "logits/chosen": -0.6361001133918762, "logits/rejected": -0.5707293152809143, "logps/chosen": -243.73318481445312, "logps/rejected": -148.59124755859375, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": 3.049276828765869, "rewards/margins": 3.1098785400390625, "rewards/rejected": -0.06060180813074112, "step": 906 }, { "epoch": 0.2, "learning_rate": 9.87102472413012e-06, "logits/chosen": -0.541590690612793, "logits/rejected": -0.48302748799324036, "logps/chosen": -91.31571197509766, "logps/rejected": -165.79800415039062, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": 0.4405784606933594, "rewards/margins": 4.630181312561035, "rewards/rejected": -4.189602851867676, "step": 907 }, { "epoch": 0.2, "learning_rate": 9.870619945333727e-06, "logits/chosen": -1.1950711011886597, "logits/rejected": -1.1089035272598267, "logps/chosen": -218.10031127929688, "logps/rejected": -329.31048583984375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.4179977476596832, "rewards/margins": 14.29423999786377, "rewards/rejected": -14.712237358093262, "step": 908 }, { "epoch": 0.2, "learning_rate": 9.870214540674377e-06, "logits/chosen": -0.5926674008369446, "logits/rejected": -0.5721110701560974, "logps/chosen": -55.363712310791016, "logps/rejected": -64.6189193725586, "loss": 0.2469, "rewards/accuracies": 1.0, "rewards/chosen": -0.15242882072925568, "rewards/margins": 0.5172523856163025, "rewards/rejected": -0.669681191444397, "step": 909 }, { "epoch": 0.2, "learning_rate": 9.869808510204165e-06, "logits/chosen": -0.7277579307556152, "logits/rejected": -0.7091979384422302, "logps/chosen": -86.90240478515625, "logps/rejected": -173.17843627929688, "loss": 0.3091, "rewards/accuracies": 1.0, "rewards/chosen": -1.0095093250274658, "rewards/margins": 1.9131546020507812, "rewards/rejected": -2.922663927078247, "step": 910 }, { "epoch": 0.2, "learning_rate": 9.869401853975268e-06, "logits/chosen": -0.7421103715896606, "logits/rejected": -0.8013793230056763, "logps/chosen": -105.69804382324219, "logps/rejected": -35.851566314697266, "loss": 0.4353, "rewards/accuracies": 0.0, "rewards/chosen": -0.9070038199424744, "rewards/margins": -0.3272327780723572, "rewards/rejected": -0.5797710418701172, "step": 911 }, { "epoch": 0.2, "learning_rate": 9.868994572039939e-06, "logits/chosen": -0.5323014259338379, "logits/rejected": -0.47077441215515137, "logps/chosen": -114.41178131103516, "logps/rejected": -193.24313354492188, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.2808830440044403, "rewards/margins": 6.7796220779418945, "rewards/rejected": -7.060504913330078, "step": 912 }, { "epoch": 0.2, "learning_rate": 9.86858666445051e-06, "logits/chosen": -0.8447217345237732, "logits/rejected": -0.8623420000076294, "logps/chosen": -99.99726867675781, "logps/rejected": -63.37468719482422, "loss": 0.1412, "rewards/accuracies": 1.0, "rewards/chosen": -0.6114494204521179, "rewards/margins": 1.1202874183654785, "rewards/rejected": -1.7317367792129517, "step": 913 }, { "epoch": 0.2, "learning_rate": 9.8681781312594e-06, "logits/chosen": -0.7372955679893494, "logits/rejected": -0.7379109263420105, "logps/chosen": -96.9688720703125, "logps/rejected": -157.3426513671875, "loss": 0.3069, "rewards/accuracies": 1.0, "rewards/chosen": 0.6399490237236023, "rewards/margins": 0.4534164071083069, "rewards/rejected": 0.18653260171413422, "step": 914 }, { "epoch": 0.2, "learning_rate": 9.867768972519101e-06, "logits/chosen": -0.5468807816505432, "logits/rejected": -0.45025914907455444, "logps/chosen": -101.51516723632812, "logps/rejected": -146.99490356445312, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.36184999346733093, "rewards/margins": 4.836102485656738, "rewards/rejected": -5.1979522705078125, "step": 915 }, { "epoch": 0.2, "learning_rate": 9.867359188282193e-06, "logits/chosen": -0.8842455744743347, "logits/rejected": -0.8647706508636475, "logps/chosen": -203.88816833496094, "logps/rejected": -119.93756866455078, "loss": 0.0491, "rewards/accuracies": 1.0, "rewards/chosen": -0.21968689560890198, "rewards/margins": 2.2894842624664307, "rewards/rejected": -2.5091712474823, "step": 916 }, { "epoch": 0.2, "learning_rate": 9.86694877860133e-06, "logits/chosen": -0.5425186157226562, "logits/rejected": -0.5425186157226562, "logps/chosen": -136.86007690429688, "logps/rejected": -136.86007690429688, "loss": 0.3472, "rewards/accuracies": 0.0, "rewards/chosen": -6.42291784286499, "rewards/margins": 0.0, "rewards/rejected": -6.42291784286499, "step": 917 }, { "epoch": 0.2, "learning_rate": 9.866537743529247e-06, "logits/chosen": -0.9313812255859375, "logits/rejected": -0.9033624529838562, "logps/chosen": -76.61910247802734, "logps/rejected": -59.197784423828125, "loss": 0.0453, "rewards/accuracies": 1.0, "rewards/chosen": 0.6150169372558594, "rewards/margins": 2.3648500442504883, "rewards/rejected": -1.7498329877853394, "step": 918 }, { "epoch": 0.2, "learning_rate": 9.866126083118765e-06, "logits/chosen": -0.6103572845458984, "logits/rejected": -0.5552460551261902, "logps/chosen": -101.92178344726562, "logps/rejected": -146.0137481689453, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": 0.5095291137695312, "rewards/margins": 3.367257833480835, "rewards/rejected": -2.8577287197113037, "step": 919 }, { "epoch": 0.2, "learning_rate": 9.865713797422778e-06, "logits/chosen": -0.6795247197151184, "logits/rejected": -0.6421193480491638, "logps/chosen": -71.19557189941406, "logps/rejected": -115.95314025878906, "loss": 0.3216, "rewards/accuracies": 1.0, "rewards/chosen": 0.3622589111328125, "rewards/margins": 3.4446685314178467, "rewards/rejected": -3.082409620285034, "step": 920 }, { "epoch": 0.2, "learning_rate": 9.865300886494264e-06, "logits/chosen": -0.6713318228721619, "logits/rejected": -0.692358672618866, "logps/chosen": -204.11985778808594, "logps/rejected": -157.69937133789062, "loss": 0.6066, "rewards/accuracies": 0.0, "rewards/chosen": -0.2635345458984375, "rewards/margins": -0.858990490436554, "rewards/rejected": 0.5954559445381165, "step": 921 }, { "epoch": 0.2, "learning_rate": 9.864887350386284e-06, "logits/chosen": -0.5945829153060913, "logits/rejected": -0.5827163457870483, "logps/chosen": -68.06432342529297, "logps/rejected": -86.25653076171875, "loss": 0.2546, "rewards/accuracies": 1.0, "rewards/chosen": 0.17555618286132812, "rewards/margins": 0.4102287292480469, "rewards/rejected": -0.23467254638671875, "step": 922 }, { "epoch": 0.2, "learning_rate": 9.864473189151972e-06, "logits/chosen": -0.8396347761154175, "logits/rejected": -0.7646147608757019, "logps/chosen": -83.19660949707031, "logps/rejected": -156.95895385742188, "loss": 0.3814, "rewards/accuracies": 1.0, "rewards/chosen": -0.6824241876602173, "rewards/margins": 2.6284570693969727, "rewards/rejected": -3.3108811378479004, "step": 923 }, { "epoch": 0.2, "learning_rate": 9.864058402844553e-06, "logits/chosen": -0.47504502534866333, "logits/rejected": -0.4582909643650055, "logps/chosen": -13.355697631835938, "logps/rejected": -58.36985397338867, "loss": 0.2655, "rewards/accuracies": 1.0, "rewards/chosen": 0.2510931193828583, "rewards/margins": 1.7900160551071167, "rewards/rejected": -1.538922905921936, "step": 924 }, { "epoch": 0.2, "learning_rate": 9.863642991517317e-06, "logits/chosen": -0.8494991064071655, "logits/rejected": -0.8494991064071655, "logps/chosen": -84.29603576660156, "logps/rejected": -84.29603576660156, "loss": 0.3993, "rewards/accuracies": 0.0, "rewards/chosen": -1.6901711225509644, "rewards/margins": 0.0, "rewards/rejected": -1.6901711225509644, "step": 925 }, { "epoch": 0.2, "learning_rate": 9.863226955223653e-06, "logits/chosen": -1.124330997467041, "logits/rejected": -1.08807373046875, "logps/chosen": -61.36254119873047, "logps/rejected": -168.15599060058594, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -1.2198047637939453, "rewards/margins": 3.2621073722839355, "rewards/rejected": -4.481912136077881, "step": 926 }, { "epoch": 0.21, "learning_rate": 9.862810294017014e-06, "logits/chosen": -1.0205870866775513, "logits/rejected": -0.977876603603363, "logps/chosen": -151.18988037109375, "logps/rejected": -284.1969299316406, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 1.9972671270370483, "rewards/margins": 10.045083045959473, "rewards/rejected": -8.047816276550293, "step": 927 }, { "epoch": 0.21, "learning_rate": 9.86239300795094e-06, "logits/chosen": -0.6880605816841125, "logits/rejected": -0.588009774684906, "logps/chosen": -176.295654296875, "logps/rejected": -258.0748596191406, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": 0.2853759825229645, "rewards/margins": 4.6143798828125, "rewards/rejected": -4.329003810882568, "step": 928 }, { "epoch": 0.21, "learning_rate": 9.861975097079057e-06, "logits/chosen": -0.852593719959259, "logits/rejected": -0.8589868545532227, "logps/chosen": -101.88082885742188, "logps/rejected": -99.87128448486328, "loss": 0.1643, "rewards/accuracies": 1.0, "rewards/chosen": -0.8937774896621704, "rewards/margins": 0.9439376592636108, "rewards/rejected": -1.8377151489257812, "step": 929 }, { "epoch": 0.21, "learning_rate": 9.861556561455061e-06, "logits/chosen": -1.0071245431900024, "logits/rejected": -0.9905649423599243, "logps/chosen": -61.83504104614258, "logps/rejected": -54.12361145019531, "loss": 0.2532, "rewards/accuracies": 1.0, "rewards/chosen": 0.5261516571044922, "rewards/margins": 0.41758307814598083, "rewards/rejected": 0.10856857150793076, "step": 930 }, { "epoch": 0.21, "learning_rate": 9.861137401132733e-06, "logits/chosen": -0.7113713622093201, "logits/rejected": 0.056548018008470535, "logps/chosen": -100.13236999511719, "logps/rejected": -229.2460174560547, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8187332153320312, "rewards/margins": 10.192480087280273, "rewards/rejected": -11.011213302612305, "step": 931 }, { "epoch": 0.21, "learning_rate": 9.860717616165934e-06, "logits/chosen": -1.332685112953186, "logits/rejected": -1.297523856163025, "logps/chosen": -91.8475341796875, "logps/rejected": -112.90126037597656, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 1.7393234968185425, "rewards/margins": 5.402227878570557, "rewards/rejected": -3.6629045009613037, "step": 932 }, { "epoch": 0.21, "learning_rate": 9.860297206608606e-06, "logits/chosen": -0.8272624015808105, "logits/rejected": -0.7118315696716309, "logps/chosen": -141.2473907470703, "logps/rejected": -190.17935180664062, "loss": 0.268, "rewards/accuracies": 1.0, "rewards/chosen": 1.4257004261016846, "rewards/margins": 8.251254081726074, "rewards/rejected": -6.825553894042969, "step": 933 }, { "epoch": 0.21, "learning_rate": 9.859876172514773e-06, "logits/chosen": -0.3231971263885498, "logits/rejected": -0.30181288719177246, "logps/chosen": -42.75054168701172, "logps/rejected": -21.818647384643555, "loss": 0.1754, "rewards/accuracies": 1.0, "rewards/chosen": 0.06356125324964523, "rewards/margins": 0.8782828450202942, "rewards/rejected": -0.8147215843200684, "step": 934 }, { "epoch": 0.21, "learning_rate": 9.859454513938534e-06, "logits/chosen": -0.4707038998603821, "logits/rejected": -0.4323089122772217, "logps/chosen": -98.82545471191406, "logps/rejected": -65.48893737792969, "loss": 0.1539, "rewards/accuracies": 1.0, "rewards/chosen": -1.2097710371017456, "rewards/margins": 1.0315536260604858, "rewards/rejected": -2.2413246631622314, "step": 935 }, { "epoch": 0.21, "learning_rate": 9.859032230934071e-06, "logits/chosen": -0.9063400626182556, "logits/rejected": -0.8442193269729614, "logps/chosen": -180.46316528320312, "logps/rejected": -178.97833251953125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.49698182940483093, "rewards/margins": 6.068371295928955, "rewards/rejected": -5.571389675140381, "step": 936 }, { "epoch": 0.21, "learning_rate": 9.858609323555646e-06, "logits/chosen": -1.0306493043899536, "logits/rejected": -1.0198185443878174, "logps/chosen": -42.650291442871094, "logps/rejected": -29.529199600219727, "loss": 0.2875, "rewards/accuracies": 1.0, "rewards/chosen": -0.7696388363838196, "rewards/margins": 0.2954156994819641, "rewards/rejected": -1.0650545358657837, "step": 937 }, { "epoch": 0.21, "learning_rate": 9.858185791857604e-06, "logits/chosen": -0.48044657707214355, "logits/rejected": -0.38857242465019226, "logps/chosen": -131.5011749267578, "logps/rejected": -189.93264770507812, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -2.081627607345581, "rewards/margins": 3.867304563522339, "rewards/rejected": -5.94893217086792, "step": 938 }, { "epoch": 0.21, "learning_rate": 9.857761635894367e-06, "logits/chosen": -1.0651500225067139, "logits/rejected": -1.0365486145019531, "logps/chosen": -178.41671752929688, "logps/rejected": -153.57797241210938, "loss": 0.0715, "rewards/accuracies": 1.0, "rewards/chosen": 1.6091187000274658, "rewards/margins": 4.8584489822387695, "rewards/rejected": -3.2493302822113037, "step": 939 }, { "epoch": 0.21, "learning_rate": 9.857336855720439e-06, "logits/chosen": -0.6700614094734192, "logits/rejected": -0.6499446630477905, "logps/chosen": -149.49185180664062, "logps/rejected": -168.97866821289062, "loss": 0.1063, "rewards/accuracies": 1.0, "rewards/chosen": 1.0398117303848267, "rewards/margins": 1.6074234247207642, "rewards/rejected": -0.5676116943359375, "step": 940 }, { "epoch": 0.21, "learning_rate": 9.856911451390399e-06, "logits/chosen": -0.6062577366828918, "logits/rejected": -0.5512149333953857, "logps/chosen": -165.20269775390625, "logps/rejected": -141.25216674804688, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 1.37627112865448, "rewards/margins": 5.212893962860107, "rewards/rejected": -3.836622714996338, "step": 941 }, { "epoch": 0.21, "learning_rate": 9.856485422958913e-06, "logits/chosen": -0.7157541513442993, "logits/rejected": -0.7212505340576172, "logps/chosen": -109.53445434570312, "logps/rejected": -266.5389099121094, "loss": 0.1654, "rewards/accuracies": 1.0, "rewards/chosen": 0.7474471926689148, "rewards/margins": 1.2818924188613892, "rewards/rejected": -0.5344452261924744, "step": 942 }, { "epoch": 0.21, "learning_rate": 9.856058770480726e-06, "logits/chosen": -0.8909076452255249, "logits/rejected": -0.8687085509300232, "logps/chosen": -92.17366027832031, "logps/rejected": -177.13511657714844, "loss": 0.044, "rewards/accuracies": 1.0, "rewards/chosen": 0.047414399683475494, "rewards/margins": 3.564371347427368, "rewards/rejected": -3.5169570446014404, "step": 943 }, { "epoch": 0.21, "learning_rate": 9.855631494010661e-06, "logits/chosen": -1.0886203050613403, "logits/rejected": -1.1542110443115234, "logps/chosen": -156.3821258544922, "logps/rejected": -109.6747055053711, "loss": 0.0345, "rewards/accuracies": 1.0, "rewards/chosen": 0.8371978998184204, "rewards/margins": 2.6615281105041504, "rewards/rejected": -1.82433021068573, "step": 944 }, { "epoch": 0.21, "learning_rate": 9.855203593603622e-06, "logits/chosen": -0.8143914937973022, "logits/rejected": -0.8143914937973022, "logps/chosen": -157.1217498779297, "logps/rejected": -157.1217498779297, "loss": 0.3555, "rewards/accuracies": 0.0, "rewards/chosen": -6.007798194885254, "rewards/margins": 0.0, "rewards/rejected": -6.007798194885254, "step": 945 }, { "epoch": 0.21, "learning_rate": 9.85477506931459e-06, "logits/chosen": -0.6690681576728821, "logits/rejected": -0.70846027135849, "logps/chosen": -111.569091796875, "logps/rejected": -117.62135314941406, "loss": 0.3353, "rewards/accuracies": 1.0, "rewards/chosen": -1.5410645008087158, "rewards/margins": 0.11438977718353271, "rewards/rejected": -1.6554542779922485, "step": 946 }, { "epoch": 0.21, "learning_rate": 9.854345921198637e-06, "logits/chosen": -0.9327982664108276, "logits/rejected": -0.9036582708358765, "logps/chosen": -108.25660705566406, "logps/rejected": -152.02310180664062, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": -0.9666358828544617, "rewards/margins": 2.8748068809509277, "rewards/rejected": -3.841442823410034, "step": 947 }, { "epoch": 0.21, "learning_rate": 9.853916149310898e-06, "logits/chosen": -0.8651964664459229, "logits/rejected": -0.8992708921432495, "logps/chosen": -109.45368194580078, "logps/rejected": -82.42027282714844, "loss": 0.1017, "rewards/accuracies": 1.0, "rewards/chosen": -0.2116859406232834, "rewards/margins": 1.596522569656372, "rewards/rejected": -1.8082084655761719, "step": 948 }, { "epoch": 0.21, "learning_rate": 9.853485753706603e-06, "logits/chosen": -0.9868552684783936, "logits/rejected": -1.0008288621902466, "logps/chosen": -165.84512329101562, "logps/rejected": -147.3943328857422, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 0.22629700601100922, "rewards/margins": 5.3218865394592285, "rewards/rejected": -5.095589637756348, "step": 949 }, { "epoch": 0.21, "learning_rate": 9.853054734441059e-06, "logits/chosen": -1.2056325674057007, "logits/rejected": -1.0726100206375122, "logps/chosen": -71.83769226074219, "logps/rejected": -262.14337158203125, "loss": 0.0994, "rewards/accuracies": 1.0, "rewards/chosen": 0.9070495963096619, "rewards/margins": 2.114750623703003, "rewards/rejected": -1.2077010869979858, "step": 950 }, { "epoch": 0.21, "learning_rate": 9.852623091569646e-06, "logits/chosen": -0.8431333303451538, "logits/rejected": -0.7510104179382324, "logps/chosen": -99.84580993652344, "logps/rejected": -133.18380737304688, "loss": 0.074, "rewards/accuracies": 1.0, "rewards/chosen": -2.765254259109497, "rewards/margins": 2.285003900527954, "rewards/rejected": -5.050258159637451, "step": 951 }, { "epoch": 0.21, "learning_rate": 9.852190825147831e-06, "logits/chosen": -0.5751140117645264, "logits/rejected": -0.47442299127578735, "logps/chosen": -143.1092987060547, "logps/rejected": -43.132572174072266, "loss": 0.0766, "rewards/accuracies": 1.0, "rewards/chosen": 0.310964971780777, "rewards/margins": 1.801574468612671, "rewards/rejected": -1.4906095266342163, "step": 952 }, { "epoch": 0.21, "learning_rate": 9.85175793523116e-06, "logits/chosen": -0.6371622681617737, "logits/rejected": -0.5839284062385559, "logps/chosen": -101.55970764160156, "logps/rejected": -124.48948669433594, "loss": 0.0528, "rewards/accuracies": 1.0, "rewards/chosen": 0.6418693661689758, "rewards/margins": 5.340157508850098, "rewards/rejected": -4.6982879638671875, "step": 953 }, { "epoch": 0.21, "learning_rate": 9.851324421875256e-06, "logits/chosen": -0.7891619205474854, "logits/rejected": -0.7892037630081177, "logps/chosen": -171.74429321289062, "logps/rejected": -121.21085357666016, "loss": 0.2739, "rewards/accuracies": 1.0, "rewards/chosen": -2.0392441749572754, "rewards/margins": 0.5878937244415283, "rewards/rejected": -2.6271378993988037, "step": 954 }, { "epoch": 0.21, "learning_rate": 9.850890285135829e-06, "logits/chosen": -1.1990801095962524, "logits/rejected": -1.1585073471069336, "logps/chosen": -80.87754821777344, "logps/rejected": -121.10014343261719, "loss": 0.1936, "rewards/accuracies": 1.0, "rewards/chosen": -0.010769653134047985, "rewards/margins": 0.7746383547782898, "rewards/rejected": -0.7854080200195312, "step": 955 }, { "epoch": 0.21, "learning_rate": 9.850455525068658e-06, "logits/chosen": -0.9035502076148987, "logits/rejected": -0.8902080655097961, "logps/chosen": -79.30880737304688, "logps/rejected": -47.488014221191406, "loss": 0.2001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9899231195449829, "rewards/margins": 0.769606351852417, "rewards/rejected": -1.7595294713974, "step": 956 }, { "epoch": 0.21, "learning_rate": 9.850020141729615e-06, "logits/chosen": -0.7887477278709412, "logits/rejected": -0.7653403282165527, "logps/chosen": -64.22532653808594, "logps/rejected": -95.23533630371094, "loss": 0.1884, "rewards/accuracies": 1.0, "rewards/chosen": -1.048413872718811, "rewards/margins": 1.479019284248352, "rewards/rejected": -2.527433156967163, "step": 957 }, { "epoch": 0.21, "learning_rate": 9.849584135174642e-06, "logits/chosen": -0.9182920455932617, "logits/rejected": -0.9184579849243164, "logps/chosen": -103.85216522216797, "logps/rejected": -136.48577880859375, "loss": 0.2864, "rewards/accuracies": 1.0, "rewards/chosen": -1.0414314270019531, "rewards/margins": 0.30079734325408936, "rewards/rejected": -1.3422287702560425, "step": 958 }, { "epoch": 0.21, "learning_rate": 9.849147505459766e-06, "logits/chosen": -0.906338095664978, "logits/rejected": -0.9038633108139038, "logps/chosen": -83.2319564819336, "logps/rejected": -57.35143280029297, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": 0.4589439332485199, "rewards/margins": 3.435058355331421, "rewards/rejected": -2.976114511489868, "step": 959 }, { "epoch": 0.21, "learning_rate": 9.848710252641092e-06, "logits/chosen": -0.9454014301300049, "logits/rejected": -0.9239707589149475, "logps/chosen": -77.62004852294922, "logps/rejected": -88.12109375, "loss": 0.0777, "rewards/accuracies": 1.0, "rewards/chosen": 0.6437363028526306, "rewards/margins": 1.7848784923553467, "rewards/rejected": -1.1411422491073608, "step": 960 }, { "epoch": 0.21, "learning_rate": 9.848272376774807e-06, "logits/chosen": -0.9701720476150513, "logits/rejected": -0.9701720476150513, "logps/chosen": -126.81366729736328, "logps/rejected": -126.81366729736328, "loss": 0.3479, "rewards/accuracies": 0.0, "rewards/chosen": -3.339008331298828, "rewards/margins": 0.0, "rewards/rejected": -3.339008331298828, "step": 961 }, { "epoch": 0.21, "learning_rate": 9.847833877917177e-06, "logits/chosen": -0.9552633762359619, "logits/rejected": -0.9550233483314514, "logps/chosen": -101.56493377685547, "logps/rejected": -90.71727752685547, "loss": 0.0723, "rewards/accuracies": 1.0, "rewards/chosen": 0.41346436738967896, "rewards/margins": 3.2601051330566406, "rewards/rejected": -2.8466408252716064, "step": 962 }, { "epoch": 0.21, "learning_rate": 9.847394756124547e-06, "logits/chosen": -0.7018311023712158, "logits/rejected": -0.7018311023712158, "logps/chosen": -93.91981506347656, "logps/rejected": -93.91981506347656, "loss": 0.3489, "rewards/accuracies": 0.0, "rewards/chosen": -0.096282958984375, "rewards/margins": 0.0, "rewards/rejected": -0.096282958984375, "step": 963 }, { "epoch": 0.21, "learning_rate": 9.846955011453343e-06, "logits/chosen": -1.0768507719039917, "logits/rejected": -1.0593668222427368, "logps/chosen": -108.14925384521484, "logps/rejected": -339.3104553222656, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": 0.4403328001499176, "rewards/margins": 4.458017826080322, "rewards/rejected": -4.0176849365234375, "step": 964 }, { "epoch": 0.21, "learning_rate": 9.846514643960072e-06, "logits/chosen": -0.824087381362915, "logits/rejected": -0.755795955657959, "logps/chosen": -174.07244873046875, "logps/rejected": -311.3498840332031, "loss": 0.5568, "rewards/accuracies": 0.0, "rewards/chosen": -3.8489975929260254, "rewards/margins": -0.6975541114807129, "rewards/rejected": -3.1514434814453125, "step": 965 }, { "epoch": 0.21, "learning_rate": 9.846073653701321e-06, "logits/chosen": -0.9245348572731018, "logits/rejected": -0.8995851278305054, "logps/chosen": -111.385986328125, "logps/rejected": -217.29920959472656, "loss": 0.6669, "rewards/accuracies": 0.0, "rewards/chosen": 0.06610565632581711, "rewards/margins": -0.580365002155304, "rewards/rejected": 0.6464706659317017, "step": 966 }, { "epoch": 0.21, "learning_rate": 9.845632040733754e-06, "logits/chosen": -0.709735095500946, "logits/rejected": -0.5779592394828796, "logps/chosen": -186.3562469482422, "logps/rejected": -56.91017532348633, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": 1.4059890508651733, "rewards/margins": 3.813781261444092, "rewards/rejected": -2.407792329788208, "step": 967 }, { "epoch": 0.21, "learning_rate": 9.845189805114119e-06, "logits/chosen": -0.8837976455688477, "logits/rejected": -0.774886965751648, "logps/chosen": -189.10574340820312, "logps/rejected": -168.68475341796875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.1079590320587158, "rewards/margins": 4.884282112121582, "rewards/rejected": -5.992240905761719, "step": 968 }, { "epoch": 0.21, "learning_rate": 9.844746946899241e-06, "logits/chosen": -0.8125890493392944, "logits/rejected": -0.42541366815567017, "logps/chosen": -149.9906005859375, "logps/rejected": -132.9149932861328, "loss": 1.6117, "rewards/accuracies": 0.0, "rewards/chosen": -5.17230749130249, "rewards/margins": -0.25444984436035156, "rewards/rejected": -4.917857646942139, "step": 969 }, { "epoch": 0.21, "learning_rate": 9.844303466146027e-06, "logits/chosen": -0.856368899345398, "logits/rejected": -0.8032515048980713, "logps/chosen": -195.44845581054688, "logps/rejected": -183.86546325683594, "loss": 0.2049, "rewards/accuracies": 1.0, "rewards/chosen": -1.086492896080017, "rewards/margins": 0.6877975463867188, "rewards/rejected": -1.7742904424667358, "step": 970 }, { "epoch": 0.21, "learning_rate": 9.843859362911463e-06, "logits/chosen": -1.2854019403457642, "logits/rejected": -1.3142136335372925, "logps/chosen": -174.02059936523438, "logps/rejected": -184.60699462890625, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": 0.8241638541221619, "rewards/margins": 4.420239448547363, "rewards/rejected": -3.5960755348205566, "step": 971 }, { "epoch": 0.22, "learning_rate": 9.843414637252615e-06, "logits/chosen": -0.6469441652297974, "logits/rejected": -0.5854745507240295, "logps/chosen": -121.23163604736328, "logps/rejected": -189.66729736328125, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -0.3691154420375824, "rewards/margins": 3.396561622619629, "rewards/rejected": -3.765676975250244, "step": 972 }, { "epoch": 0.22, "learning_rate": 9.842969289226629e-06, "logits/chosen": -1.4491082429885864, "logits/rejected": -1.4775391817092896, "logps/chosen": -118.47502136230469, "logps/rejected": -89.82693481445312, "loss": 0.0987, "rewards/accuracies": 1.0, "rewards/chosen": -0.4699111878871918, "rewards/margins": 1.5228149890899658, "rewards/rejected": -1.99272620677948, "step": 973 }, { "epoch": 0.22, "learning_rate": 9.842523318890733e-06, "logits/chosen": -1.0056042671203613, "logits/rejected": -1.0447427034378052, "logps/chosen": -249.88308715820312, "logps/rejected": -140.06454467773438, "loss": 0.3166, "rewards/accuracies": 1.0, "rewards/chosen": -2.3676986694335938, "rewards/margins": 0.12552809715270996, "rewards/rejected": -2.4932267665863037, "step": 974 }, { "epoch": 0.22, "learning_rate": 9.84207672630223e-06, "logits/chosen": -0.624944269657135, "logits/rejected": -0.609017014503479, "logps/chosen": -203.99002075195312, "logps/rejected": -190.22105407714844, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": 1.6040070056915283, "rewards/margins": 3.618072509765625, "rewards/rejected": -2.0140655040740967, "step": 975 }, { "epoch": 0.22, "learning_rate": 9.84162951151851e-06, "logits/chosen": -1.0152792930603027, "logits/rejected": -0.9908663630485535, "logps/chosen": -199.36013793945312, "logps/rejected": -211.18972778320312, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 1.3949387073516846, "rewards/margins": 4.629961013793945, "rewards/rejected": -3.2350220680236816, "step": 976 }, { "epoch": 0.22, "learning_rate": 9.841181674597034e-06, "logits/chosen": -1.0195857286453247, "logits/rejected": -0.8374218344688416, "logps/chosen": -215.19607543945312, "logps/rejected": -436.772705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.423968553543091, "rewards/margins": 12.950775146484375, "rewards/rejected": -9.526806831359863, "step": 977 }, { "epoch": 0.22, "learning_rate": 9.840733215595351e-06, "logits/chosen": -1.1001033782958984, "logits/rejected": -1.0932615995407104, "logps/chosen": -118.98185729980469, "logps/rejected": -212.6766357421875, "loss": 0.1694, "rewards/accuracies": 1.0, "rewards/chosen": -0.6030837893486023, "rewards/margins": 0.9774704575538635, "rewards/rejected": -1.5805542469024658, "step": 978 }, { "epoch": 0.22, "learning_rate": 9.840284134571088e-06, "logits/chosen": -1.2238638401031494, "logits/rejected": -1.2613450288772583, "logps/chosen": -170.2136993408203, "logps/rejected": -131.3482666015625, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 0.5403030514717102, "rewards/margins": 4.303813934326172, "rewards/rejected": -3.7635109424591064, "step": 979 }, { "epoch": 0.22, "learning_rate": 9.83983443158195e-06, "logits/chosen": -0.8118237257003784, "logits/rejected": -0.7907273769378662, "logps/chosen": -83.29132080078125, "logps/rejected": -73.3882064819336, "loss": 0.2278, "rewards/accuracies": 1.0, "rewards/chosen": -0.7722503542900085, "rewards/margins": 0.6443130373954773, "rewards/rejected": -1.4165633916854858, "step": 980 }, { "epoch": 0.22, "learning_rate": 9.839384106685721e-06, "logits/chosen": -1.0396808385849, "logits/rejected": -0.968568742275238, "logps/chosen": -122.31529235839844, "logps/rejected": -134.9445343017578, "loss": 0.1274, "rewards/accuracies": 1.0, "rewards/chosen": -1.1078720092773438, "rewards/margins": 5.385382175445557, "rewards/rejected": -6.4932541847229, "step": 981 }, { "epoch": 0.22, "learning_rate": 9.838933159940266e-06, "logits/chosen": -1.0935773849487305, "logits/rejected": -1.1075092554092407, "logps/chosen": -186.6497802734375, "logps/rejected": -87.36256408691406, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 2.62955641746521, "rewards/margins": 5.461150646209717, "rewards/rejected": -2.831594228744507, "step": 982 }, { "epoch": 0.22, "learning_rate": 9.838481591403536e-06, "logits/chosen": -0.9199174046516418, "logits/rejected": -0.8305774331092834, "logps/chosen": -69.52767944335938, "logps/rejected": -252.6177978515625, "loss": 0.0962, "rewards/accuracies": 1.0, "rewards/chosen": -1.8123451471328735, "rewards/margins": 1.550768256187439, "rewards/rejected": -3.3631134033203125, "step": 983 }, { "epoch": 0.22, "learning_rate": 9.83802940113355e-06, "logits/chosen": -0.7244497537612915, "logits/rejected": -0.6899893283843994, "logps/chosen": -154.967529296875, "logps/rejected": -223.35983276367188, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 1.4057830572128296, "rewards/margins": 8.795743942260742, "rewards/rejected": -7.389961242675781, "step": 984 }, { "epoch": 0.22, "learning_rate": 9.837576589188418e-06, "logits/chosen": -0.5044252276420593, "logits/rejected": -0.4116378426551819, "logps/chosen": -82.66822052001953, "logps/rejected": -215.66989135742188, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.4058380126953125, "rewards/margins": 4.379705905914307, "rewards/rejected": -5.785543918609619, "step": 985 }, { "epoch": 0.22, "learning_rate": 9.837123155626323e-06, "logits/chosen": -0.380849689245224, "logits/rejected": -0.2825157046318054, "logps/chosen": -109.35150146484375, "logps/rejected": -508.3568115234375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.302316278219223, "rewards/margins": 41.494022369384766, "rewards/rejected": -41.191707611083984, "step": 986 }, { "epoch": 0.22, "learning_rate": 9.836669100505532e-06, "logits/chosen": -0.7637065649032593, "logits/rejected": -0.7637065649032593, "logps/chosen": -124.11882781982422, "logps/rejected": -124.11882781982422, "loss": 0.3594, "rewards/accuracies": 0.0, "rewards/chosen": -0.6222488284111023, "rewards/margins": 0.0, "rewards/rejected": -0.6222488284111023, "step": 987 }, { "epoch": 0.22, "learning_rate": 9.836214423884387e-06, "logits/chosen": -0.9063935875892639, "logits/rejected": -0.8862938284873962, "logps/chosen": -83.00019836425781, "logps/rejected": -102.76252746582031, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.4362350404262543, "rewards/margins": 5.6459245681762695, "rewards/rejected": -5.209689617156982, "step": 988 }, { "epoch": 0.22, "learning_rate": 9.835759125821314e-06, "logits/chosen": -0.9283522367477417, "logits/rejected": -0.9209385514259338, "logps/chosen": -110.65679931640625, "logps/rejected": -78.15699768066406, "loss": 0.438, "rewards/accuracies": 0.0, "rewards/chosen": -0.7540969848632812, "rewards/margins": -0.33582305908203125, "rewards/rejected": -0.41827392578125, "step": 989 }, { "epoch": 0.22, "learning_rate": 9.83530320637482e-06, "logits/chosen": -0.7842546105384827, "logits/rejected": -0.7698089480400085, "logps/chosen": -100.46807861328125, "logps/rejected": -187.69009399414062, "loss": 0.7834, "rewards/accuracies": 0.0, "rewards/chosen": -0.86444091796875, "rewards/margins": -1.332525610923767, "rewards/rejected": 0.4680847227573395, "step": 990 }, { "epoch": 0.22, "learning_rate": 9.834846665603486e-06, "logits/chosen": -0.79702228307724, "logits/rejected": -0.7696820497512817, "logps/chosen": -175.0244140625, "logps/rejected": -117.50956726074219, "loss": 0.3558, "rewards/accuracies": 1.0, "rewards/chosen": 0.7090423703193665, "rewards/margins": 3.9857566356658936, "rewards/rejected": -3.276714324951172, "step": 991 }, { "epoch": 0.22, "learning_rate": 9.834389503565978e-06, "logits/chosen": -1.0077235698699951, "logits/rejected": -1.016031265258789, "logps/chosen": -161.6951141357422, "logps/rejected": -117.05987548828125, "loss": 0.2674, "rewards/accuracies": 1.0, "rewards/chosen": 0.4291397035121918, "rewards/margins": 0.375732421875, "rewards/rejected": 0.05340728908777237, "step": 992 }, { "epoch": 0.22, "learning_rate": 9.833931720321042e-06, "logits/chosen": -0.703777015209198, "logits/rejected": -0.7020907402038574, "logps/chosen": -102.7545166015625, "logps/rejected": -107.58552551269531, "loss": 0.1395, "rewards/accuracies": 1.0, "rewards/chosen": -0.46557846665382385, "rewards/margins": 1.2189781665802002, "rewards/rejected": -1.6845566034317017, "step": 993 }, { "epoch": 0.22, "learning_rate": 9.833473315927498e-06, "logits/chosen": -1.0720818042755127, "logits/rejected": -1.0425620079040527, "logps/chosen": -75.84577178955078, "logps/rejected": -137.66729736328125, "loss": 0.68, "rewards/accuracies": 0.0, "rewards/chosen": -1.2371593713760376, "rewards/margins": -1.0634106397628784, "rewards/rejected": -0.17374877631664276, "step": 994 }, { "epoch": 0.22, "learning_rate": 9.833014290444254e-06, "logits/chosen": -0.5972107648849487, "logits/rejected": -0.6154047250747681, "logps/chosen": -223.06063842773438, "logps/rejected": -283.20343017578125, "loss": 0.0483, "rewards/accuracies": 1.0, "rewards/chosen": -3.058035373687744, "rewards/margins": 2.3802552223205566, "rewards/rejected": -5.438290596008301, "step": 995 }, { "epoch": 0.22, "learning_rate": 9.832554643930292e-06, "logits/chosen": -0.8429503440856934, "logits/rejected": -0.7512670755386353, "logps/chosen": -180.82293701171875, "logps/rejected": -321.62396240234375, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -0.10623627156019211, "rewards/margins": 13.209269523620605, "rewards/rejected": -13.315505981445312, "step": 996 }, { "epoch": 0.22, "learning_rate": 9.832094376444675e-06, "logits/chosen": -1.096928358078003, "logits/rejected": -1.0716618299484253, "logps/chosen": -180.27011108398438, "logps/rejected": -239.11990356445312, "loss": 0.2344, "rewards/accuracies": 1.0, "rewards/chosen": 0.262298583984375, "rewards/margins": 5.2241411209106445, "rewards/rejected": -4.9618425369262695, "step": 997 }, { "epoch": 0.22, "learning_rate": 9.831633488046547e-06, "logits/chosen": -0.8807138204574585, "logits/rejected": -0.9062751531600952, "logps/chosen": -160.0438232421875, "logps/rejected": -137.20144653320312, "loss": 0.2707, "rewards/accuracies": 1.0, "rewards/chosen": -1.4420593976974487, "rewards/margins": 1.746416449546814, "rewards/rejected": -3.1884758472442627, "step": 998 }, { "epoch": 0.22, "learning_rate": 9.83117197879513e-06, "logits/chosen": -1.0436272621154785, "logits/rejected": -0.9804543852806091, "logps/chosen": -102.06543731689453, "logps/rejected": -143.67782592773438, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": 0.17588196694850922, "rewards/margins": 3.8286163806915283, "rewards/rejected": -3.6527345180511475, "step": 999 }, { "epoch": 0.22, "learning_rate": 9.830709848749727e-06, "logits/chosen": -0.9330449104309082, "logits/rejected": -0.8576820492744446, "logps/chosen": -123.82176971435547, "logps/rejected": -154.38653564453125, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.6781547665596008, "rewards/margins": 5.791043758392334, "rewards/rejected": -6.469198703765869, "step": 1000 }, { "epoch": 0.22, "learning_rate": 9.830247097969723e-06, "logits/chosen": -0.5479977130889893, "logits/rejected": -0.5581260323524475, "logps/chosen": -110.36740112304688, "logps/rejected": -218.1629638671875, "loss": 0.0745, "rewards/accuracies": 1.0, "rewards/chosen": -0.7286667227745056, "rewards/margins": 1.9308974742889404, "rewards/rejected": -2.659564256668091, "step": 1001 }, { "epoch": 0.22, "learning_rate": 9.829783726514578e-06, "logits/chosen": -0.9923155307769775, "logits/rejected": -0.9040446877479553, "logps/chosen": -121.39140319824219, "logps/rejected": -182.49046325683594, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": -0.042653657495975494, "rewards/margins": 5.501918792724609, "rewards/rejected": -5.544572353363037, "step": 1002 }, { "epoch": 0.22, "learning_rate": 9.829319734443833e-06, "logits/chosen": -0.8939082026481628, "logits/rejected": -0.8564789891242981, "logps/chosen": -114.58682250976562, "logps/rejected": -118.13069915771484, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": 0.7064293026924133, "rewards/margins": 3.2499237060546875, "rewards/rejected": -2.543494462966919, "step": 1003 }, { "epoch": 0.22, "learning_rate": 9.828855121817114e-06, "logits/chosen": -0.9745548367500305, "logits/rejected": -0.9899196028709412, "logps/chosen": -258.2412109375, "logps/rejected": -213.38494873046875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 1.077355980873108, "rewards/margins": 7.584632873535156, "rewards/rejected": -6.507277011871338, "step": 1004 }, { "epoch": 0.22, "learning_rate": 9.82838988869412e-06, "logits/chosen": -1.0409071445465088, "logits/rejected": -1.0736323595046997, "logps/chosen": -278.8494567871094, "logps/rejected": -230.1015625, "loss": 0.7194, "rewards/accuracies": 1.0, "rewards/chosen": -0.144683837890625, "rewards/margins": 1.7739304304122925, "rewards/rejected": -1.9186142683029175, "step": 1005 }, { "epoch": 0.22, "learning_rate": 9.827924035134629e-06, "logits/chosen": -0.733729362487793, "logits/rejected": -0.733729362487793, "logps/chosen": -203.34295654296875, "logps/rejected": -203.34295654296875, "loss": 0.3491, "rewards/accuracies": 0.0, "rewards/chosen": -2.7135772705078125, "rewards/margins": 0.0, "rewards/rejected": -2.7135772705078125, "step": 1006 }, { "epoch": 0.22, "learning_rate": 9.827457561198507e-06, "logits/chosen": -0.9877393841743469, "logits/rejected": -0.9418664574623108, "logps/chosen": -120.17550659179688, "logps/rejected": -48.17768096923828, "loss": 0.396, "rewards/accuracies": 0.0, "rewards/chosen": -2.454164981842041, "rewards/margins": -0.18721604347229004, "rewards/rejected": -2.266948938369751, "step": 1007 }, { "epoch": 0.22, "learning_rate": 9.826990466945695e-06, "logits/chosen": -0.8099895715713501, "logits/rejected": -0.7620251178741455, "logps/chosen": -102.22101593017578, "logps/rejected": -126.2272720336914, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": -0.04290771484375, "rewards/margins": 3.8210701942443848, "rewards/rejected": -3.8639779090881348, "step": 1008 }, { "epoch": 0.22, "learning_rate": 9.826522752436211e-06, "logits/chosen": -0.6457411646842957, "logits/rejected": -0.5957860946655273, "logps/chosen": -195.6874542236328, "logps/rejected": -206.33755493164062, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": 1.439666748046875, "rewards/margins": 3.4599366188049316, "rewards/rejected": -2.0202698707580566, "step": 1009 }, { "epoch": 0.22, "learning_rate": 9.826054417730156e-06, "logits/chosen": -0.7081067562103271, "logits/rejected": -0.6596547365188599, "logps/chosen": -126.79228210449219, "logps/rejected": -174.32264709472656, "loss": 0.5427, "rewards/accuracies": 1.0, "rewards/chosen": -2.8503715991973877, "rewards/margins": 0.1820228099822998, "rewards/rejected": -3.0323944091796875, "step": 1010 }, { "epoch": 0.22, "learning_rate": 9.825585462887709e-06, "logits/chosen": -0.8593970537185669, "logits/rejected": -0.8740487694740295, "logps/chosen": -101.59799194335938, "logps/rejected": -210.8948974609375, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -0.99666827917099, "rewards/margins": 3.7972068786621094, "rewards/rejected": -4.793875217437744, "step": 1011 }, { "epoch": 0.22, "learning_rate": 9.825115887969131e-06, "logits/chosen": -0.5748221278190613, "logits/rejected": -0.6184704303741455, "logps/chosen": -79.9176254272461, "logps/rejected": -221.64300537109375, "loss": 0.0939, "rewards/accuracies": 1.0, "rewards/chosen": -1.8640648126602173, "rewards/margins": 3.097339630126953, "rewards/rejected": -4.961404323577881, "step": 1012 }, { "epoch": 0.22, "learning_rate": 9.82464569303476e-06, "logits/chosen": -1.1611826419830322, "logits/rejected": -1.1773324012756348, "logps/chosen": -196.7141571044922, "logps/rejected": -158.0971221923828, "loss": 0.1837, "rewards/accuracies": 1.0, "rewards/chosen": -0.4610885679721832, "rewards/margins": 3.573930263519287, "rewards/rejected": -4.0350189208984375, "step": 1013 }, { "epoch": 0.22, "learning_rate": 9.824174878145017e-06, "logits/chosen": -1.0523436069488525, "logits/rejected": -1.0091418027877808, "logps/chosen": -161.72488403320312, "logps/rejected": -172.46275329589844, "loss": 0.8485, "rewards/accuracies": 0.0, "rewards/chosen": -1.7840683460235596, "rewards/margins": -1.4941071271896362, "rewards/rejected": -0.2899612486362457, "step": 1014 }, { "epoch": 0.22, "learning_rate": 9.823703443360398e-06, "logits/chosen": -1.040635585784912, "logits/rejected": -1.0070960521697998, "logps/chosen": -159.4167938232422, "logps/rejected": -55.588470458984375, "loss": 0.1647, "rewards/accuracies": 1.0, "rewards/chosen": 0.8834854364395142, "rewards/margins": 2.519618034362793, "rewards/rejected": -1.6361324787139893, "step": 1015 }, { "epoch": 0.22, "learning_rate": 9.823231388741483e-06, "logits/chosen": -0.7715215682983398, "logits/rejected": -0.7398615479469299, "logps/chosen": -135.06529235839844, "logps/rejected": -255.8396759033203, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": -0.5382431149482727, "rewards/margins": 8.060259819030762, "rewards/rejected": -8.598503112792969, "step": 1016 }, { "epoch": 0.23, "learning_rate": 9.822758714348928e-06, "logits/chosen": -0.7019120454788208, "logits/rejected": -0.6553794145584106, "logps/chosen": -221.90158081054688, "logps/rejected": -314.8211975097656, "loss": 0.1645, "rewards/accuracies": 1.0, "rewards/chosen": -0.893310546875, "rewards/margins": 0.9425079822540283, "rewards/rejected": -1.8358185291290283, "step": 1017 }, { "epoch": 0.23, "learning_rate": 9.822285420243474e-06, "logits/chosen": -0.8006020188331604, "logits/rejected": -0.8014869689941406, "logps/chosen": -113.08099365234375, "logps/rejected": -94.97048950195312, "loss": 0.0756, "rewards/accuracies": 1.0, "rewards/chosen": -2.1121201515197754, "rewards/margins": 2.2708139419555664, "rewards/rejected": -4.382934093475342, "step": 1018 }, { "epoch": 0.23, "learning_rate": 9.821811506485934e-06, "logits/chosen": -0.6529785990715027, "logits/rejected": -0.635510265827179, "logps/chosen": -162.95217895507812, "logps/rejected": -179.34730529785156, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -0.14707641303539276, "rewards/margins": 4.606011867523193, "rewards/rejected": -4.753088474273682, "step": 1019 }, { "epoch": 0.23, "learning_rate": 9.821336973137207e-06, "logits/chosen": -1.0332626104354858, "logits/rejected": -1.0434695482254028, "logps/chosen": -145.10113525390625, "logps/rejected": -101.47329711914062, "loss": 0.3969, "rewards/accuracies": 0.0, "rewards/chosen": -0.6700989007949829, "rewards/margins": -0.19213488698005676, "rewards/rejected": -0.47796401381492615, "step": 1020 }, { "epoch": 0.23, "learning_rate": 9.820861820258269e-06, "logits/chosen": -1.0259008407592773, "logits/rejected": -1.0010918378829956, "logps/chosen": -73.57547760009766, "logps/rejected": -134.04258728027344, "loss": 0.3207, "rewards/accuracies": 1.0, "rewards/chosen": 0.4549263119697571, "rewards/margins": 3.7524666786193848, "rewards/rejected": -3.2975404262542725, "step": 1021 }, { "epoch": 0.23, "learning_rate": 9.820386047910177e-06, "logits/chosen": -0.8461000323295593, "logits/rejected": -0.82355135679245, "logps/chosen": -126.59794616699219, "logps/rejected": -243.200439453125, "loss": 0.2121, "rewards/accuracies": 1.0, "rewards/chosen": -0.143849179148674, "rewards/margins": 1.578444004058838, "rewards/rejected": -1.7222931385040283, "step": 1022 }, { "epoch": 0.23, "learning_rate": 9.819909656154066e-06, "logits/chosen": -0.8743308782577515, "logits/rejected": -0.6676934361457825, "logps/chosen": -147.83514404296875, "logps/rejected": -343.702880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.9628204107284546, "rewards/margins": 16.349538803100586, "rewards/rejected": -14.38671875, "step": 1023 }, { "epoch": 0.23, "learning_rate": 9.81943264505115e-06, "logits/chosen": -0.6169708967208862, "logits/rejected": -0.5411180257797241, "logps/chosen": -84.86090087890625, "logps/rejected": -141.27259826660156, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.05499878153204918, "rewards/margins": 5.237044811248779, "rewards/rejected": -5.182045936584473, "step": 1024 }, { "epoch": 0.23, "learning_rate": 9.818955014662725e-06, "logits/chosen": -0.5545384883880615, "logits/rejected": -0.5366469621658325, "logps/chosen": -61.62428283691406, "logps/rejected": -134.2066192626953, "loss": 0.217, "rewards/accuracies": 1.0, "rewards/chosen": -1.4729160070419312, "rewards/margins": 1.3968061208724976, "rewards/rejected": -2.8697221279144287, "step": 1025 }, { "epoch": 0.23, "learning_rate": 9.818476765050167e-06, "logits/chosen": -0.7658452987670898, "logits/rejected": -0.6812323927879333, "logps/chosen": -166.50888061523438, "logps/rejected": -224.8287353515625, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.5662155151367188, "rewards/margins": 7.6521406173706055, "rewards/rejected": -8.218356132507324, "step": 1026 }, { "epoch": 0.23, "learning_rate": 9.817997896274925e-06, "logits/chosen": -0.45052728056907654, "logits/rejected": -0.3572580814361572, "logps/chosen": -135.16387939453125, "logps/rejected": -183.95773315429688, "loss": 0.1795, "rewards/accuracies": 1.0, "rewards/chosen": -0.5375351309776306, "rewards/margins": 5.046318054199219, "rewards/rejected": -5.583853244781494, "step": 1027 }, { "epoch": 0.23, "learning_rate": 9.817518408398536e-06, "logits/chosen": -0.9753054976463318, "logits/rejected": -0.9489837884902954, "logps/chosen": -84.92900085449219, "logps/rejected": -234.4374542236328, "loss": 2.9496, "rewards/accuracies": 1.0, "rewards/chosen": -1.0903488397598267, "rewards/margins": 10.739923477172852, "rewards/rejected": -11.830272674560547, "step": 1028 }, { "epoch": 0.23, "learning_rate": 9.817038301482612e-06, "logits/chosen": -0.6366115808486938, "logits/rejected": -0.6798142790794373, "logps/chosen": -135.28070068359375, "logps/rejected": -173.4009552001953, "loss": 3.7393, "rewards/accuracies": 1.0, "rewards/chosen": -3.294725179672241, "rewards/margins": 3.202322244644165, "rewards/rejected": -6.497047424316406, "step": 1029 }, { "epoch": 0.23, "learning_rate": 9.81655757558885e-06, "logits/chosen": -0.6773518919944763, "logits/rejected": -0.6773518919944763, "logps/chosen": -45.602577209472656, "logps/rejected": -45.602577209472656, "loss": 0.7952, "rewards/accuracies": 0.0, "rewards/chosen": -0.9688507318496704, "rewards/margins": 0.0, "rewards/rejected": -0.9688507318496704, "step": 1030 }, { "epoch": 0.23, "learning_rate": 9.816076230779014e-06, "logits/chosen": -0.6048711538314819, "logits/rejected": -0.4765821695327759, "logps/chosen": -80.64326477050781, "logps/rejected": -326.97955322265625, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": -0.09506683796644211, "rewards/margins": 4.949824333190918, "rewards/rejected": -5.044891357421875, "step": 1031 }, { "epoch": 0.23, "learning_rate": 9.815594267114962e-06, "logits/chosen": -0.9913817644119263, "logits/rejected": -1.0151252746582031, "logps/chosen": -141.8551483154297, "logps/rejected": -106.90620422363281, "loss": 0.5124, "rewards/accuracies": 0.0, "rewards/chosen": -1.2384109497070312, "rewards/margins": -0.45286864042282104, "rewards/rejected": -0.7855423092842102, "step": 1032 }, { "epoch": 0.23, "learning_rate": 9.815111684658622e-06, "logits/chosen": -0.7021135091781616, "logits/rejected": -0.6383166313171387, "logps/chosen": -101.66819763183594, "logps/rejected": -177.57806396484375, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": 0.40945741534233093, "rewards/margins": 4.141241550445557, "rewards/rejected": -3.7317841053009033, "step": 1033 }, { "epoch": 0.23, "learning_rate": 9.814628483472006e-06, "logits/chosen": -1.1067636013031006, "logits/rejected": -1.1597108840942383, "logps/chosen": -98.36001586914062, "logps/rejected": -52.08814239501953, "loss": 0.4364, "rewards/accuracies": 1.0, "rewards/chosen": -1.7996186017990112, "rewards/margins": 1.2085894346237183, "rewards/rejected": -3.0082080364227295, "step": 1034 }, { "epoch": 0.23, "learning_rate": 9.814144663617204e-06, "logits/chosen": -0.5869095325469971, "logits/rejected": -0.5869095325469971, "logps/chosen": -193.22714233398438, "logps/rejected": -193.22714233398438, "loss": 0.4346, "rewards/accuracies": 0.0, "rewards/chosen": -2.110311985015869, "rewards/margins": 0.0, "rewards/rejected": -2.110311985015869, "step": 1035 }, { "epoch": 0.23, "learning_rate": 9.813660225156385e-06, "logits/chosen": -1.1776236295700073, "logits/rejected": -1.2649707794189453, "logps/chosen": -209.32537841796875, "logps/rejected": -71.38713836669922, "loss": 0.0877, "rewards/accuracies": 1.0, "rewards/chosen": -1.0625702142715454, "rewards/margins": 2.057806968688965, "rewards/rejected": -3.1203773021698, "step": 1036 }, { "epoch": 0.23, "learning_rate": 9.813175168151801e-06, "logits/chosen": -0.850038468837738, "logits/rejected": -0.8285812139511108, "logps/chosen": -82.0599594116211, "logps/rejected": -221.43011474609375, "loss": 0.1341, "rewards/accuracies": 1.0, "rewards/chosen": 0.5160461664199829, "rewards/margins": 3.2959094047546387, "rewards/rejected": -2.779863119125366, "step": 1037 }, { "epoch": 0.23, "learning_rate": 9.812689492665777e-06, "logits/chosen": -0.8563348650932312, "logits/rejected": -0.8398711085319519, "logps/chosen": -128.74972534179688, "logps/rejected": -218.54464721679688, "loss": 0.1374, "rewards/accuracies": 1.0, "rewards/chosen": -0.9920364618301392, "rewards/margins": 5.336099624633789, "rewards/rejected": -6.328135967254639, "step": 1038 }, { "epoch": 0.23, "learning_rate": 9.812203198760722e-06, "logits/chosen": -0.47175949811935425, "logits/rejected": -0.4703158140182495, "logps/chosen": -69.54423522949219, "logps/rejected": -130.29791259765625, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": -0.3216240108013153, "rewards/margins": 2.9421792030334473, "rewards/rejected": -3.263803243637085, "step": 1039 }, { "epoch": 0.23, "learning_rate": 9.811716286499125e-06, "logits/chosen": -0.7502645254135132, "logits/rejected": -0.6422886252403259, "logps/chosen": -137.11431884765625, "logps/rejected": -38.84221267700195, "loss": 1.4142, "rewards/accuracies": 0.0, "rewards/chosen": -1.5115798711776733, "rewards/margins": -0.11613273620605469, "rewards/rejected": -1.3954471349716187, "step": 1040 }, { "epoch": 0.23, "learning_rate": 9.811228755943551e-06, "logits/chosen": -1.3916473388671875, "logits/rejected": -1.2423309087753296, "logps/chosen": -160.62644958496094, "logps/rejected": -362.0589904785156, "loss": 0.0315, "rewards/accuracies": 1.0, "rewards/chosen": -3.1978683471679688, "rewards/margins": 2.752406597137451, "rewards/rejected": -5.95027494430542, "step": 1041 }, { "epoch": 0.23, "learning_rate": 9.810740607156647e-06, "logits/chosen": -0.6967649459838867, "logits/rejected": -0.6267828941345215, "logps/chosen": -61.72658920288086, "logps/rejected": -185.82315063476562, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": 0.11766471713781357, "rewards/margins": 3.108933687210083, "rewards/rejected": -2.9912688732147217, "step": 1042 }, { "epoch": 0.23, "learning_rate": 9.810251840201143e-06, "logits/chosen": -0.9640809297561646, "logits/rejected": -0.9640809297561646, "logps/chosen": -143.58346557617188, "logps/rejected": -143.58346557617188, "loss": 0.5866, "rewards/accuracies": 0.0, "rewards/chosen": -4.549275875091553, "rewards/margins": 0.0, "rewards/rejected": -4.549275875091553, "step": 1043 }, { "epoch": 0.23, "learning_rate": 9.80976245513984e-06, "logits/chosen": -0.7336459159851074, "logits/rejected": -0.7060388326644897, "logps/chosen": -154.49900817871094, "logps/rejected": -122.78631591796875, "loss": 0.0978, "rewards/accuracies": 1.0, "rewards/chosen": -0.9585342407226562, "rewards/margins": 2.952648878097534, "rewards/rejected": -3.9111831188201904, "step": 1044 }, { "epoch": 0.23, "learning_rate": 9.809272452035622e-06, "logits/chosen": -0.7641429901123047, "logits/rejected": -0.7613328695297241, "logps/chosen": -109.81556701660156, "logps/rejected": -117.2464599609375, "loss": 0.1422, "rewards/accuracies": 1.0, "rewards/chosen": -1.3939812183380127, "rewards/margins": 1.1121742725372314, "rewards/rejected": -2.506155490875244, "step": 1045 }, { "epoch": 0.23, "learning_rate": 9.808781830951457e-06, "logits/chosen": -0.9533964991569519, "logits/rejected": -0.9002549648284912, "logps/chosen": -188.569091796875, "logps/rejected": -171.7705841064453, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.8843231201171875, "rewards/margins": 6.769839763641357, "rewards/rejected": -5.88551664352417, "step": 1046 }, { "epoch": 0.23, "learning_rate": 9.808290591950386e-06, "logits/chosen": -0.7222555875778198, "logits/rejected": -0.7354251742362976, "logps/chosen": -88.04127502441406, "logps/rejected": -203.87071228027344, "loss": 0.6338, "rewards/accuracies": 1.0, "rewards/chosen": -0.4739883542060852, "rewards/margins": 3.980541944503784, "rewards/rejected": -4.454530239105225, "step": 1047 }, { "epoch": 0.23, "learning_rate": 9.807798735095533e-06, "logits/chosen": -0.5459370017051697, "logits/rejected": -0.5002580881118774, "logps/chosen": -50.91908264160156, "logps/rejected": -79.06089782714844, "loss": 0.1913, "rewards/accuracies": 1.0, "rewards/chosen": 0.2411365509033203, "rewards/margins": 3.097994327545166, "rewards/rejected": -2.8568577766418457, "step": 1048 }, { "epoch": 0.23, "learning_rate": 9.807306260450098e-06, "logits/chosen": -0.9450458884239197, "logits/rejected": -0.42478829622268677, "logps/chosen": -99.47456359863281, "logps/rejected": -522.5399169921875, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 1.0357849597930908, "rewards/margins": 23.90342140197754, "rewards/rejected": -22.86763572692871, "step": 1049 }, { "epoch": 0.23, "learning_rate": 9.806813168077367e-06, "logits/chosen": -0.8042629957199097, "logits/rejected": -0.8198959231376648, "logps/chosen": -23.59195327758789, "logps/rejected": -37.82057189941406, "loss": 0.2652, "rewards/accuracies": 1.0, "rewards/chosen": -0.7162098288536072, "rewards/margins": 0.39903122186660767, "rewards/rejected": -1.1152410507202148, "step": 1050 }, { "epoch": 0.23, "learning_rate": 9.806319458040701e-06, "logits/chosen": -0.8275154232978821, "logits/rejected": -0.8641678690910339, "logps/chosen": -223.73403930664062, "logps/rejected": -209.2762451171875, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": 2.2869536876678467, "rewards/margins": 4.169464111328125, "rewards/rejected": -1.8825104236602783, "step": 1051 }, { "epoch": 0.23, "learning_rate": 9.805825130403536e-06, "logits/chosen": -0.9116854667663574, "logits/rejected": -0.8747106790542603, "logps/chosen": -152.3220672607422, "logps/rejected": -189.5322265625, "loss": 0.0469, "rewards/accuracies": 1.0, "rewards/chosen": 0.4292404353618622, "rewards/margins": 3.5459702014923096, "rewards/rejected": -3.116729736328125, "step": 1052 }, { "epoch": 0.23, "learning_rate": 9.805330185229397e-06, "logits/chosen": -0.8863846659660339, "logits/rejected": -0.8787094354629517, "logps/chosen": -99.73649597167969, "logps/rejected": -77.42973327636719, "loss": 0.2225, "rewards/accuracies": 1.0, "rewards/chosen": -2.3857247829437256, "rewards/margins": 0.5794615745544434, "rewards/rejected": -2.965186357498169, "step": 1053 }, { "epoch": 0.23, "learning_rate": 9.804834622581879e-06, "logits/chosen": -0.6148297786712646, "logits/rejected": -0.6303350329399109, "logps/chosen": -210.86538696289062, "logps/rejected": -156.05262756347656, "loss": 0.6313, "rewards/accuracies": 0.0, "rewards/chosen": -0.4810287654399872, "rewards/margins": -0.8344284296035767, "rewards/rejected": 0.3533996641635895, "step": 1054 }, { "epoch": 0.23, "learning_rate": 9.804338442524661e-06, "logits/chosen": -1.0632293224334717, "logits/rejected": -0.9819138050079346, "logps/chosen": -72.0486068725586, "logps/rejected": -186.38548278808594, "loss": 0.2571, "rewards/accuracies": 1.0, "rewards/chosen": -0.22441254556179047, "rewards/margins": 0.4264434576034546, "rewards/rejected": -0.6508560180664062, "step": 1055 }, { "epoch": 0.23, "learning_rate": 9.803841645121505e-06, "logits/chosen": -0.6034795641899109, "logits/rejected": -0.5740774869918823, "logps/chosen": -139.8678436279297, "logps/rejected": -214.42013549804688, "loss": 0.1795, "rewards/accuracies": 1.0, "rewards/chosen": -0.5778854489326477, "rewards/margins": 2.0658371448516846, "rewards/rejected": -2.6437225341796875, "step": 1056 }, { "epoch": 0.23, "learning_rate": 9.803344230436245e-06, "logits/chosen": -0.8421050906181335, "logits/rejected": -0.9072226285934448, "logps/chosen": -298.32098388671875, "logps/rejected": -142.21981811523438, "loss": 0.0712, "rewards/accuracies": 1.0, "rewards/chosen": 0.06992798298597336, "rewards/margins": 2.0613114833831787, "rewards/rejected": -1.99138343334198, "step": 1057 }, { "epoch": 0.23, "learning_rate": 9.802846198532798e-06, "logits/chosen": -0.6384299993515015, "logits/rejected": -0.5924481153488159, "logps/chosen": -82.82421112060547, "logps/rejected": -155.67991638183594, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.21205826103687286, "rewards/margins": 4.577677249908447, "rewards/rejected": -4.789735317230225, "step": 1058 }, { "epoch": 0.23, "learning_rate": 9.80234754947516e-06, "logits/chosen": -0.7967857718467712, "logits/rejected": -0.6865503787994385, "logps/chosen": -172.94363403320312, "logps/rejected": -293.46197509765625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 2.5552093982696533, "rewards/margins": 5.656839370727539, "rewards/rejected": -3.1016297340393066, "step": 1059 }, { "epoch": 0.23, "learning_rate": 9.801848283327406e-06, "logits/chosen": -0.7892616987228394, "logits/rejected": -0.7836410403251648, "logps/chosen": -173.3677978515625, "logps/rejected": -210.92393493652344, "loss": 0.5642, "rewards/accuracies": 0.0, "rewards/chosen": -4.624075412750244, "rewards/margins": -0.6991302967071533, "rewards/rejected": -3.924945116043091, "step": 1060 }, { "epoch": 0.23, "learning_rate": 9.801348400153692e-06, "logits/chosen": -0.9731348752975464, "logits/rejected": -0.9369363188743591, "logps/chosen": -70.12638854980469, "logps/rejected": -85.04602813720703, "loss": 0.3578, "rewards/accuracies": 1.0, "rewards/chosen": 0.07210998982191086, "rewards/margins": 3.05798602104187, "rewards/rejected": -2.9858760833740234, "step": 1061 }, { "epoch": 0.24, "learning_rate": 9.800847900018251e-06, "logits/chosen": -0.9860715270042419, "logits/rejected": -0.9852598309516907, "logps/chosen": -65.65316772460938, "logps/rejected": -133.01576232910156, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.15532226860523224, "rewards/margins": 5.114535808563232, "rewards/rejected": -5.269857883453369, "step": 1062 }, { "epoch": 0.24, "learning_rate": 9.800346782985395e-06, "logits/chosen": -0.976996123790741, "logits/rejected": -0.9111310839653015, "logps/chosen": -162.56451416015625, "logps/rejected": -164.83775329589844, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.24017181992530823, "rewards/margins": 7.12783145904541, "rewards/rejected": -7.3680033683776855, "step": 1063 }, { "epoch": 0.24, "learning_rate": 9.799845049119517e-06, "logits/chosen": -0.7759237289428711, "logits/rejected": -0.7717228531837463, "logps/chosen": -117.05342102050781, "logps/rejected": -213.9624786376953, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": 0.067108154296875, "rewards/margins": 3.4802963733673096, "rewards/rejected": -3.4131882190704346, "step": 1064 }, { "epoch": 0.24, "learning_rate": 9.79934269848509e-06, "logits/chosen": -0.566899836063385, "logits/rejected": -0.4089306890964508, "logps/chosen": -182.03271484375, "logps/rejected": -162.20950317382812, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 1.5280808210372925, "rewards/margins": 9.05804443359375, "rewards/rejected": -7.529963970184326, "step": 1065 }, { "epoch": 0.24, "learning_rate": 9.798839731146662e-06, "logits/chosen": -0.9416078925132751, "logits/rejected": -0.9607592821121216, "logps/chosen": -96.41893005371094, "logps/rejected": -121.60774993896484, "loss": 0.7949, "rewards/accuracies": 0.0, "rewards/chosen": -1.8394073247909546, "rewards/margins": -1.3483772277832031, "rewards/rejected": -0.49103012681007385, "step": 1066 }, { "epoch": 0.24, "learning_rate": 9.798336147168865e-06, "logits/chosen": -0.7487953305244446, "logits/rejected": -0.8198091983795166, "logps/chosen": -231.875, "logps/rejected": -91.53801727294922, "loss": 0.3591, "rewards/accuracies": 0.0, "rewards/chosen": -3.4878005981445312, "rewards/margins": -0.04940986633300781, "rewards/rejected": -3.4383907318115234, "step": 1067 }, { "epoch": 0.24, "learning_rate": 9.797831946616408e-06, "logits/chosen": -0.9202093482017517, "logits/rejected": -0.9246073365211487, "logps/chosen": -183.164794921875, "logps/rejected": -203.11306762695312, "loss": 1.2994, "rewards/accuracies": 0.0, "rewards/chosen": -3.869825839996338, "rewards/margins": -2.0085954666137695, "rewards/rejected": -1.861230492591858, "step": 1068 }, { "epoch": 0.24, "learning_rate": 9.797327129554081e-06, "logits/chosen": -0.5574327707290649, "logits/rejected": -0.5382843613624573, "logps/chosen": -66.06439208984375, "logps/rejected": -51.624961853027344, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": 0.7256706357002258, "rewards/margins": 3.3144733905792236, "rewards/rejected": -2.5888028144836426, "step": 1069 }, { "epoch": 0.24, "learning_rate": 9.796821696046748e-06, "logits/chosen": -0.7079146504402161, "logits/rejected": -0.7129292488098145, "logps/chosen": -82.31218719482422, "logps/rejected": -64.69145202636719, "loss": 0.1881, "rewards/accuracies": 1.0, "rewards/chosen": 0.6025398373603821, "rewards/margins": 0.7839908599853516, "rewards/rejected": -0.18145103752613068, "step": 1070 }, { "epoch": 0.24, "learning_rate": 9.79631564615936e-06, "logits/chosen": -0.6637027859687805, "logits/rejected": -0.5534813404083252, "logps/chosen": -166.55557250976562, "logps/rejected": -252.83001708984375, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": -0.9893524050712585, "rewards/margins": 2.366046190261841, "rewards/rejected": -3.355398654937744, "step": 1071 }, { "epoch": 0.24, "learning_rate": 9.79580897995694e-06, "logits/chosen": -0.5293701887130737, "logits/rejected": -0.43113282322883606, "logps/chosen": -64.14787292480469, "logps/rejected": -236.0968017578125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.2686500549316406, "rewards/margins": 7.9679670333862305, "rewards/rejected": -8.236617088317871, "step": 1072 }, { "epoch": 0.24, "learning_rate": 9.795301697504595e-06, "logits/chosen": -0.5533232092857361, "logits/rejected": -0.5856408476829529, "logps/chosen": -54.62895965576172, "logps/rejected": -78.44989776611328, "loss": 0.568, "rewards/accuracies": 0.0, "rewards/chosen": -1.6104835271835327, "rewards/margins": -0.7475104928016663, "rewards/rejected": -0.8629730343818665, "step": 1073 }, { "epoch": 0.24, "learning_rate": 9.794793798867512e-06, "logits/chosen": -0.4383235275745392, "logits/rejected": -0.4383235275745392, "logps/chosen": -78.11749267578125, "logps/rejected": -78.11749267578125, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -1.4492508172988892, "rewards/margins": 0.0, "rewards/rejected": -1.4492508172988892, "step": 1074 }, { "epoch": 0.24, "learning_rate": 9.794285284110949e-06, "logits/chosen": -0.49838390946388245, "logits/rejected": -0.486480176448822, "logps/chosen": -139.5859832763672, "logps/rejected": -40.59223175048828, "loss": 0.7125, "rewards/accuracies": 0.0, "rewards/chosen": -2.496441602706909, "rewards/margins": -0.5584437847137451, "rewards/rejected": -1.937997817993164, "step": 1075 }, { "epoch": 0.24, "learning_rate": 9.793776153300253e-06, "logits/chosen": -0.6505413055419922, "logits/rejected": -0.6472850441932678, "logps/chosen": -50.20487976074219, "logps/rejected": -96.28800201416016, "loss": 0.4229, "rewards/accuracies": 1.0, "rewards/chosen": -0.32877427339553833, "rewards/margins": 0.481201171875, "rewards/rejected": -0.8099754452705383, "step": 1076 }, { "epoch": 0.24, "learning_rate": 9.793266406500847e-06, "logits/chosen": -0.5737372040748596, "logits/rejected": -0.5689167976379395, "logps/chosen": -69.60738372802734, "logps/rejected": -110.11023712158203, "loss": 0.5555, "rewards/accuracies": 1.0, "rewards/chosen": -0.43613892793655396, "rewards/margins": 0.6564949154853821, "rewards/rejected": -1.092633843421936, "step": 1077 }, { "epoch": 0.24, "learning_rate": 9.792756043778229e-06, "logits/chosen": -0.6094911694526672, "logits/rejected": -0.6094911694526672, "logps/chosen": -70.2106704711914, "logps/rejected": -70.2106704711914, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.01720275916159153, "rewards/margins": 0.0, "rewards/rejected": 0.01720275916159153, "step": 1078 }, { "epoch": 0.24, "learning_rate": 9.79224506519798e-06, "logits/chosen": -0.8119922280311584, "logits/rejected": -0.82585209608078, "logps/chosen": -90.12522888183594, "logps/rejected": -183.41441345214844, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.24860993027687073, "rewards/margins": 6.061404228210449, "rewards/rejected": -6.310014247894287, "step": 1079 }, { "epoch": 0.24, "learning_rate": 9.791733470825763e-06, "logits/chosen": -0.41544461250305176, "logits/rejected": -0.41544461250305176, "logps/chosen": -163.7574462890625, "logps/rejected": -163.7574462890625, "loss": 0.3794, "rewards/accuracies": 0.0, "rewards/chosen": -4.308098793029785, "rewards/margins": 0.0, "rewards/rejected": -4.308098793029785, "step": 1080 }, { "epoch": 0.24, "learning_rate": 9.791221260727313e-06, "logits/chosen": -0.5269055366516113, "logits/rejected": -0.45865437388420105, "logps/chosen": -109.03132629394531, "logps/rejected": -143.48233032226562, "loss": 0.0613, "rewards/accuracies": 1.0, "rewards/chosen": 0.2635330259799957, "rewards/margins": 4.4316864013671875, "rewards/rejected": -4.168153285980225, "step": 1081 }, { "epoch": 0.24, "learning_rate": 9.790708434968448e-06, "logits/chosen": -1.1195316314697266, "logits/rejected": -1.1034109592437744, "logps/chosen": -185.08131408691406, "logps/rejected": -192.9305419921875, "loss": 0.4628, "rewards/accuracies": 0.0, "rewards/chosen": 1.4386948347091675, "rewards/margins": -0.31945037841796875, "rewards/rejected": 1.7581452131271362, "step": 1082 }, { "epoch": 0.24, "learning_rate": 9.790194993615065e-06, "logits/chosen": -0.7256045341491699, "logits/rejected": -0.7052453756332397, "logps/chosen": -76.86994171142578, "logps/rejected": -159.59060668945312, "loss": 0.1373, "rewards/accuracies": 1.0, "rewards/chosen": 0.49886322021484375, "rewards/margins": 3.5962021350860596, "rewards/rejected": -3.097338914871216, "step": 1083 }, { "epoch": 0.24, "learning_rate": 9.78968093673314e-06, "logits/chosen": -1.0172302722930908, "logits/rejected": -0.9854671955108643, "logps/chosen": -71.5389404296875, "logps/rejected": -142.89210510253906, "loss": 0.0453, "rewards/accuracies": 1.0, "rewards/chosen": 0.906451404094696, "rewards/margins": 2.564298152923584, "rewards/rejected": -1.6578468084335327, "step": 1084 }, { "epoch": 0.24, "learning_rate": 9.789166264388732e-06, "logits/chosen": -0.9512462615966797, "logits/rejected": -0.9602194428443909, "logps/chosen": -101.30006408691406, "logps/rejected": -54.63117218017578, "loss": 0.3371, "rewards/accuracies": 1.0, "rewards/chosen": -0.25994643568992615, "rewards/margins": 0.4519977867603302, "rewards/rejected": -0.7119442224502563, "step": 1085 }, { "epoch": 0.24, "learning_rate": 9.78865097664797e-06, "logits/chosen": -1.045058012008667, "logits/rejected": -1.045058012008667, "logps/chosen": -109.19389343261719, "logps/rejected": -109.19389343261719, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.294802188873291, "rewards/margins": 0.0, "rewards/rejected": -2.294802188873291, "step": 1086 }, { "epoch": 0.24, "learning_rate": 9.788135073577069e-06, "logits/chosen": -0.9883262515068054, "logits/rejected": -0.9049113392829895, "logps/chosen": -82.83702087402344, "logps/rejected": -163.83201599121094, "loss": 0.0363, "rewards/accuracies": 1.0, "rewards/chosen": 0.13072510063648224, "rewards/margins": 2.6619081497192383, "rewards/rejected": -2.5311830043792725, "step": 1087 }, { "epoch": 0.24, "learning_rate": 9.787618555242321e-06, "logits/chosen": -0.950069785118103, "logits/rejected": -0.9369642734527588, "logps/chosen": -133.0499267578125, "logps/rejected": -233.22027587890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.16603699326515198, "rewards/margins": 6.335455417633057, "rewards/rejected": -6.501492500305176, "step": 1088 }, { "epoch": 0.24, "learning_rate": 9.787101421710099e-06, "logits/chosen": -0.6810927987098694, "logits/rejected": -0.655083954334259, "logps/chosen": -140.74620056152344, "logps/rejected": -213.8072967529297, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": 1.0456345081329346, "rewards/margins": 8.284153938293457, "rewards/rejected": -7.238519191741943, "step": 1089 }, { "epoch": 0.24, "learning_rate": 9.786583673046851e-06, "logits/chosen": -0.848788321018219, "logits/rejected": -0.8755096197128296, "logps/chosen": -174.2880859375, "logps/rejected": -207.11618041992188, "loss": 0.1344, "rewards/accuracies": 1.0, "rewards/chosen": -0.29055023193359375, "rewards/margins": 1.1759811639785767, "rewards/rejected": -1.4665313959121704, "step": 1090 }, { "epoch": 0.24, "learning_rate": 9.786065309319107e-06, "logits/chosen": -0.8622516989707947, "logits/rejected": -0.86111980676651, "logps/chosen": -100.09989929199219, "logps/rejected": -58.32282638549805, "loss": 0.5981, "rewards/accuracies": 0.0, "rewards/chosen": -0.4938293397426605, "rewards/margins": -0.8360241055488586, "rewards/rejected": 0.3421947658061981, "step": 1091 }, { "epoch": 0.24, "learning_rate": 9.785546330593479e-06, "logits/chosen": -0.5517064929008484, "logits/rejected": -0.6015412211418152, "logps/chosen": -134.97900390625, "logps/rejected": -120.99212646484375, "loss": 0.5011, "rewards/accuracies": 1.0, "rewards/chosen": -1.8847137689590454, "rewards/margins": 1.015374779701233, "rewards/rejected": -2.9000885486602783, "step": 1092 }, { "epoch": 0.24, "learning_rate": 9.78502673693665e-06, "logits/chosen": -0.5479652881622314, "logits/rejected": -0.521116316318512, "logps/chosen": -152.61190795898438, "logps/rejected": -178.7627410888672, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": 0.7904251217842102, "rewards/margins": 3.747788906097412, "rewards/rejected": -2.9573638439178467, "step": 1093 }, { "epoch": 0.24, "learning_rate": 9.784506528415388e-06, "logits/chosen": -0.5577855706214905, "logits/rejected": -0.5143308639526367, "logps/chosen": -140.96087646484375, "logps/rejected": -290.66595458984375, "loss": 0.0541, "rewards/accuracies": 1.0, "rewards/chosen": 0.7748169302940369, "rewards/margins": 5.130232334136963, "rewards/rejected": -4.355415344238281, "step": 1094 }, { "epoch": 0.24, "learning_rate": 9.78398570509654e-06, "logits/chosen": -0.5835485458374023, "logits/rejected": -0.6427436470985413, "logps/chosen": -192.81417846679688, "logps/rejected": -72.53337860107422, "loss": 0.1086, "rewards/accuracies": 1.0, "rewards/chosen": -1.875280737876892, "rewards/margins": 1.4408093690872192, "rewards/rejected": -3.3160901069641113, "step": 1095 }, { "epoch": 0.24, "learning_rate": 9.783464267047027e-06, "logits/chosen": -0.6579173803329468, "logits/rejected": -0.6622433066368103, "logps/chosen": -147.10317993164062, "logps/rejected": -201.27801513671875, "loss": 0.1009, "rewards/accuracies": 1.0, "rewards/chosen": -3.9450957775115967, "rewards/margins": 7.88212776184082, "rewards/rejected": -11.827223777770996, "step": 1096 }, { "epoch": 0.24, "learning_rate": 9.782942214333855e-06, "logits/chosen": -0.8899386525154114, "logits/rejected": -0.8921967148780823, "logps/chosen": -97.25215148925781, "logps/rejected": -76.57603454589844, "loss": 0.0614, "rewards/accuracies": 1.0, "rewards/chosen": 0.744006335735321, "rewards/margins": 2.0369648933410645, "rewards/rejected": -1.2929584980010986, "step": 1097 }, { "epoch": 0.24, "learning_rate": 9.782419547024108e-06, "logits/chosen": -0.5889549851417542, "logits/rejected": -0.5889549851417542, "logps/chosen": -202.5300750732422, "logps/rejected": -202.5300750732422, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -1.6339691877365112, "rewards/margins": 0.0, "rewards/rejected": -1.6339691877365112, "step": 1098 }, { "epoch": 0.24, "learning_rate": 9.781896265184944e-06, "logits/chosen": -1.2184839248657227, "logits/rejected": -1.277971863746643, "logps/chosen": -96.8184585571289, "logps/rejected": -78.21936798095703, "loss": 0.0673, "rewards/accuracies": 1.0, "rewards/chosen": -1.6056114435195923, "rewards/margins": 1.951823353767395, "rewards/rejected": -3.5574347972869873, "step": 1099 }, { "epoch": 0.24, "learning_rate": 9.781372368883607e-06, "logits/chosen": -0.7744563221931458, "logits/rejected": -0.7627446055412292, "logps/chosen": -221.52777099609375, "logps/rejected": -213.5971221923828, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.3284103870391846, "rewards/margins": 7.071820259094238, "rewards/rejected": -8.400230407714844, "step": 1100 }, { "epoch": 0.24, "learning_rate": 9.780847858187414e-06, "logits/chosen": -0.6734490990638733, "logits/rejected": -0.6465274095535278, "logps/chosen": -192.36190795898438, "logps/rejected": -200.96127319335938, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 2.6829254627227783, "rewards/margins": 7.633353233337402, "rewards/rejected": -4.950427532196045, "step": 1101 }, { "epoch": 0.24, "learning_rate": 9.780322733163766e-06, "logits/chosen": -0.7843245267868042, "logits/rejected": -0.7051877379417419, "logps/chosen": -169.52304077148438, "logps/rejected": -266.94903564453125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 2.967031955718994, "rewards/margins": 5.283679008483887, "rewards/rejected": -2.3166472911834717, "step": 1102 }, { "epoch": 0.24, "learning_rate": 9.779796993880135e-06, "logits/chosen": -0.544401228427887, "logits/rejected": -0.544401228427887, "logps/chosen": -133.32127380371094, "logps/rejected": -133.32127380371094, "loss": 0.3471, "rewards/accuracies": 0.0, "rewards/chosen": -5.738305568695068, "rewards/margins": 0.0, "rewards/rejected": -5.738305568695068, "step": 1103 }, { "epoch": 0.24, "learning_rate": 9.779270640404082e-06, "logits/chosen": -0.8103135824203491, "logits/rejected": -0.7726222276687622, "logps/chosen": -286.87890625, "logps/rejected": -177.34994506835938, "loss": 0.1925, "rewards/accuracies": 1.0, "rewards/chosen": -0.8576629757881165, "rewards/margins": 0.7555832266807556, "rewards/rejected": -1.613246202468872, "step": 1104 }, { "epoch": 0.24, "learning_rate": 9.778743672803241e-06, "logits/chosen": -0.9275358319282532, "logits/rejected": -0.8665109872817993, "logps/chosen": -178.91189575195312, "logps/rejected": -305.74371337890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.9769378900527954, "rewards/margins": 15.206192970275879, "rewards/rejected": -13.229254722595215, "step": 1105 }, { "epoch": 0.24, "learning_rate": 9.778216091145325e-06, "logits/chosen": -0.6667605638504028, "logits/rejected": -0.6235899925231934, "logps/chosen": -112.50486755371094, "logps/rejected": -100.04178619384766, "loss": 0.2763, "rewards/accuracies": 1.0, "rewards/chosen": 0.8646743893623352, "rewards/margins": 0.30402833223342896, "rewards/rejected": 0.5606460571289062, "step": 1106 }, { "epoch": 0.25, "learning_rate": 9.777687895498128e-06, "logits/chosen": -1.0141518115997314, "logits/rejected": -0.33146053552627563, "logps/chosen": -143.09690856933594, "logps/rejected": -304.3330383300781, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.03051910363137722, "rewards/margins": 14.377884864807129, "rewards/rejected": -14.408404350280762, "step": 1107 }, { "epoch": 0.25, "learning_rate": 9.777159085929524e-06, "logits/chosen": -1.0224665403366089, "logits/rejected": -1.0677958726882935, "logps/chosen": -137.5204620361328, "logps/rejected": -64.55526733398438, "loss": 1.1843, "rewards/accuracies": 0.0, "rewards/chosen": -4.6082963943481445, "rewards/margins": -2.261213779449463, "rewards/rejected": -2.3470826148986816, "step": 1108 }, { "epoch": 0.25, "learning_rate": 9.776629662507458e-06, "logits/chosen": -0.7697572112083435, "logits/rejected": -0.7288370132446289, "logps/chosen": -100.29118347167969, "logps/rejected": -102.11524200439453, "loss": 0.1324, "rewards/accuracies": 1.0, "rewards/chosen": -0.8712204098701477, "rewards/margins": 1.252108097076416, "rewards/rejected": -2.123328447341919, "step": 1109 }, { "epoch": 0.25, "learning_rate": 9.776099625299966e-06, "logits/chosen": -0.534966230392456, "logits/rejected": -0.4931306838989258, "logps/chosen": -167.65814208984375, "logps/rejected": -157.978515625, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": 1.073388695716858, "rewards/margins": 3.9005327224731445, "rewards/rejected": -2.827143907546997, "step": 1110 }, { "epoch": 0.25, "learning_rate": 9.775568974375151e-06, "logits/chosen": -0.6837556958198547, "logits/rejected": -0.6513057947158813, "logps/chosen": -75.24809265136719, "logps/rejected": -167.35806274414062, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 0.5477836728096008, "rewards/margins": 5.70711088180542, "rewards/rejected": -5.159327030181885, "step": 1111 }, { "epoch": 0.25, "learning_rate": 9.775037709801206e-06, "logits/chosen": -0.8074097037315369, "logits/rejected": -0.7815409898757935, "logps/chosen": -122.99471282958984, "logps/rejected": -135.39019775390625, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": 0.5822792053222656, "rewards/margins": 3.655958652496338, "rewards/rejected": -3.0736794471740723, "step": 1112 }, { "epoch": 0.25, "learning_rate": 9.774505831646392e-06, "logits/chosen": -0.7511641979217529, "logits/rejected": -0.7678930759429932, "logps/chosen": -203.0706787109375, "logps/rejected": -188.5845947265625, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": 1.9099715948104858, "rewards/margins": 9.698518753051758, "rewards/rejected": -7.788547039031982, "step": 1113 }, { "epoch": 0.25, "learning_rate": 9.773973339979056e-06, "logits/chosen": -0.9426581263542175, "logits/rejected": -0.9443289041519165, "logps/chosen": -190.73638916015625, "logps/rejected": -200.25730895996094, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": 1.6306015253067017, "rewards/margins": 3.915280342102051, "rewards/rejected": -2.2846786975860596, "step": 1114 }, { "epoch": 0.25, "learning_rate": 9.773440234867623e-06, "logits/chosen": -0.8034824132919312, "logits/rejected": -0.7942978143692017, "logps/chosen": -142.55825805664062, "logps/rejected": -87.43199157714844, "loss": 0.3309, "rewards/accuracies": 1.0, "rewards/chosen": -3.086632490158081, "rewards/margins": 0.11505746841430664, "rewards/rejected": -3.2016899585723877, "step": 1115 }, { "epoch": 0.25, "learning_rate": 9.772906516380594e-06, "logits/chosen": -0.6489067673683167, "logits/rejected": -0.65987229347229, "logps/chosen": -96.41178894042969, "logps/rejected": -52.51915740966797, "loss": 0.1926, "rewards/accuracies": 1.0, "rewards/chosen": -1.6664161682128906, "rewards/margins": 0.7549586296081543, "rewards/rejected": -2.421374797821045, "step": 1116 }, { "epoch": 0.25, "learning_rate": 9.772372184586551e-06, "logits/chosen": -0.7730804085731506, "logits/rejected": -0.8126676678657532, "logps/chosen": -182.76100158691406, "logps/rejected": -93.84281158447266, "loss": 0.5888, "rewards/accuracies": 0.0, "rewards/chosen": 0.4771011471748352, "rewards/margins": -0.7486763596534729, "rewards/rejected": 1.225777506828308, "step": 1117 }, { "epoch": 0.25, "learning_rate": 9.771837239554156e-06, "logits/chosen": -0.5180425643920898, "logits/rejected": -0.5110189318656921, "logps/chosen": -68.27750396728516, "logps/rejected": -103.99553680419922, "loss": 0.1225, "rewards/accuracies": 1.0, "rewards/chosen": -2.1160619258880615, "rewards/margins": 1.2822437286376953, "rewards/rejected": -3.398305654525757, "step": 1118 }, { "epoch": 0.25, "learning_rate": 9.771301681352148e-06, "logits/chosen": -0.6831977367401123, "logits/rejected": -0.6831977367401123, "logps/chosen": -175.2966766357422, "logps/rejected": -175.2966766357422, "loss": 0.3483, "rewards/accuracies": 0.0, "rewards/chosen": -1.0033096075057983, "rewards/margins": 0.0, "rewards/rejected": -1.0033096075057983, "step": 1119 }, { "epoch": 0.25, "learning_rate": 9.770765510049342e-06, "logits/chosen": -0.5988656282424927, "logits/rejected": -0.5998958945274353, "logps/chosen": -168.60609436035156, "logps/rejected": -242.7286834716797, "loss": 2.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.4878219664096832, "rewards/margins": 0.05687406659126282, "rewards/rejected": -0.544696033000946, "step": 1120 }, { "epoch": 0.25, "learning_rate": 9.770228725714637e-06, "logits/chosen": -0.8991212844848633, "logits/rejected": -0.8182979226112366, "logps/chosen": -111.47148895263672, "logps/rejected": -210.32266235351562, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 1.0015106201171875, "rewards/margins": 6.677072048187256, "rewards/rejected": -5.675561428070068, "step": 1121 }, { "epoch": 0.25, "learning_rate": 9.769691328417008e-06, "logits/chosen": -0.6565179228782654, "logits/rejected": -0.6565179228782654, "logps/chosen": -50.94051742553711, "logps/rejected": -50.94051742553711, "loss": 0.3482, "rewards/accuracies": 0.0, "rewards/chosen": -1.8751347064971924, "rewards/margins": 0.0, "rewards/rejected": -1.8751347064971924, "step": 1122 }, { "epoch": 0.25, "learning_rate": 9.769153318225509e-06, "logits/chosen": -0.7676279544830322, "logits/rejected": -0.7448996901512146, "logps/chosen": -73.9609146118164, "logps/rejected": -62.775726318359375, "loss": 0.1578, "rewards/accuracies": 1.0, "rewards/chosen": 0.11262359470129013, "rewards/margins": 0.9915016293525696, "rewards/rejected": -0.8788780570030212, "step": 1123 }, { "epoch": 0.25, "learning_rate": 9.768614695209273e-06, "logits/chosen": -1.2491698265075684, "logits/rejected": -1.1408483982086182, "logps/chosen": -96.88154602050781, "logps/rejected": -168.0034637451172, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.6722335815429688, "rewards/margins": 5.50701904296875, "rewards/rejected": -4.834785461425781, "step": 1124 }, { "epoch": 0.25, "learning_rate": 9.768075459437513e-06, "logits/chosen": -1.1531883478164673, "logits/rejected": -1.1073533296585083, "logps/chosen": -117.01808166503906, "logps/rejected": -187.34544372558594, "loss": 0.1326, "rewards/accuracies": 1.0, "rewards/chosen": -1.674407958984375, "rewards/margins": 1.3482422828674316, "rewards/rejected": -3.0226502418518066, "step": 1125 }, { "epoch": 0.25, "learning_rate": 9.76753561097952e-06, "logits/chosen": -0.8176724910736084, "logits/rejected": -0.8078334331512451, "logps/chosen": -90.43860626220703, "logps/rejected": -199.11732482910156, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.5487068295478821, "rewards/margins": 6.596580982208252, "rewards/rejected": -7.145287990570068, "step": 1126 }, { "epoch": 0.25, "learning_rate": 9.766995149904658e-06, "logits/chosen": -1.0339531898498535, "logits/rejected": -1.0468628406524658, "logps/chosen": -132.99166870117188, "logps/rejected": -148.91168212890625, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": 1.1402069330215454, "rewards/margins": 5.025028228759766, "rewards/rejected": -3.8848214149475098, "step": 1127 }, { "epoch": 0.25, "learning_rate": 9.766454076282382e-06, "logits/chosen": -0.7686046957969666, "logits/rejected": -0.7670966386795044, "logps/chosen": -93.10458374023438, "logps/rejected": -80.22254943847656, "loss": 0.8569, "rewards/accuracies": 0.0, "rewards/chosen": -1.922796607017517, "rewards/margins": -1.5131499767303467, "rewards/rejected": -0.409646600484848, "step": 1128 }, { "epoch": 0.25, "learning_rate": 9.765912390182216e-06, "logits/chosen": -0.520513117313385, "logits/rejected": -0.4431549310684204, "logps/chosen": -234.67904663085938, "logps/rejected": -195.1601104736328, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.974700927734375, "rewards/margins": 5.642233371734619, "rewards/rejected": -4.667532444000244, "step": 1129 }, { "epoch": 0.25, "learning_rate": 9.765370091673762e-06, "logits/chosen": -0.9594337940216064, "logits/rejected": -0.9594337940216064, "logps/chosen": -125.75668334960938, "logps/rejected": -125.75668334960938, "loss": 0.3556, "rewards/accuracies": 0.0, "rewards/chosen": -2.0917282104492188, "rewards/margins": 0.0, "rewards/rejected": -2.0917282104492188, "step": 1130 }, { "epoch": 0.25, "learning_rate": 9.764827180826708e-06, "logits/chosen": -0.4607485234737396, "logits/rejected": -0.4607485234737396, "logps/chosen": -147.04690551757812, "logps/rejected": -147.04690551757812, "loss": 0.3471, "rewards/accuracies": 0.0, "rewards/chosen": -2.310406446456909, "rewards/margins": 0.0, "rewards/rejected": -2.310406446456909, "step": 1131 }, { "epoch": 0.25, "learning_rate": 9.764283657710815e-06, "logits/chosen": -0.6871556639671326, "logits/rejected": -0.6863947510719299, "logps/chosen": -258.0555725097656, "logps/rejected": -235.18380737304688, "loss": 0.1223, "rewards/accuracies": 1.0, "rewards/chosen": -0.5869415402412415, "rewards/margins": 1.405635118484497, "rewards/rejected": -1.9925765991210938, "step": 1132 }, { "epoch": 0.25, "learning_rate": 9.763739522395926e-06, "logits/chosen": -1.1582510471343994, "logits/rejected": -1.1561216115951538, "logps/chosen": -82.0557861328125, "logps/rejected": -136.64593505859375, "loss": 0.6961, "rewards/accuracies": 1.0, "rewards/chosen": -0.2781936824321747, "rewards/margins": 3.161198377609253, "rewards/rejected": -3.43939208984375, "step": 1133 }, { "epoch": 0.25, "learning_rate": 9.76319477495196e-06, "logits/chosen": -0.739702582359314, "logits/rejected": -0.5168038010597229, "logps/chosen": -96.63905334472656, "logps/rejected": -487.5986633300781, "loss": 0.0734, "rewards/accuracies": 1.0, "rewards/chosen": -0.32993316650390625, "rewards/margins": 39.13064956665039, "rewards/rejected": -39.4605827331543, "step": 1134 }, { "epoch": 0.25, "learning_rate": 9.762649415448916e-06, "logits/chosen": -0.8523843288421631, "logits/rejected": -0.8632545471191406, "logps/chosen": -182.56846618652344, "logps/rejected": -97.88236999511719, "loss": 0.091, "rewards/accuracies": 1.0, "rewards/chosen": 1.0066986083984375, "rewards/margins": 1.6978614330291748, "rewards/rejected": -0.6911628842353821, "step": 1135 }, { "epoch": 0.25, "learning_rate": 9.76210344395687e-06, "logits/chosen": -1.1733752489089966, "logits/rejected": -1.348776936531067, "logps/chosen": -198.6908721923828, "logps/rejected": -155.5291290283203, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": 1.6346282958984375, "rewards/margins": 11.961580276489258, "rewards/rejected": -10.32695198059082, "step": 1136 }, { "epoch": 0.25, "learning_rate": 9.76155686054598e-06, "logits/chosen": -0.8231512904167175, "logits/rejected": -0.8887045979499817, "logps/chosen": -206.67108154296875, "logps/rejected": -159.86886596679688, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -1.050103783607483, "rewards/margins": 3.820373058319092, "rewards/rejected": -4.870476722717285, "step": 1137 }, { "epoch": 0.25, "learning_rate": 9.76100966528648e-06, "logits/chosen": -0.5210387110710144, "logits/rejected": -0.45746931433677673, "logps/chosen": -90.09827423095703, "logps/rejected": -206.75340270996094, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -1.4092025756835938, "rewards/margins": 3.361577033996582, "rewards/rejected": -4.770779609680176, "step": 1138 }, { "epoch": 0.25, "learning_rate": 9.760461858248684e-06, "logits/chosen": -0.8128334879875183, "logits/rejected": -0.7815489768981934, "logps/chosen": -121.52423095703125, "logps/rejected": -161.8089599609375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 0.11623382568359375, "rewards/margins": 4.887768745422363, "rewards/rejected": -4.7715349197387695, "step": 1139 }, { "epoch": 0.25, "learning_rate": 9.759913439502982e-06, "logits/chosen": -0.7521023750305176, "logits/rejected": -0.73283851146698, "logps/chosen": -102.88233947753906, "logps/rejected": -191.89291381835938, "loss": 0.2965, "rewards/accuracies": 1.0, "rewards/chosen": -0.8939926028251648, "rewards/margins": 0.2204177975654602, "rewards/rejected": -1.114410400390625, "step": 1140 }, { "epoch": 0.25, "learning_rate": 9.759364409119844e-06, "logits/chosen": -0.5239502191543579, "logits/rejected": -0.4786500632762909, "logps/chosen": -63.47006607055664, "logps/rejected": -80.08256530761719, "loss": 0.0721, "rewards/accuracies": 1.0, "rewards/chosen": 0.34763452410697937, "rewards/margins": 2.0617737770080566, "rewards/rejected": -1.7141392230987549, "step": 1141 }, { "epoch": 0.25, "learning_rate": 9.758814767169825e-06, "logits/chosen": -0.6005902290344238, "logits/rejected": -0.5660770535469055, "logps/chosen": -60.267723083496094, "logps/rejected": -71.25841522216797, "loss": 0.0619, "rewards/accuracies": 1.0, "rewards/chosen": -0.3244079649448395, "rewards/margins": 2.628342390060425, "rewards/rejected": -2.9527504444122314, "step": 1142 }, { "epoch": 0.25, "learning_rate": 9.758264513723544e-06, "logits/chosen": -0.6259584426879883, "logits/rejected": -0.6572054028511047, "logps/chosen": -97.06288146972656, "logps/rejected": -171.80960083007812, "loss": 0.0782, "rewards/accuracies": 1.0, "rewards/chosen": -0.015232086181640625, "rewards/margins": 1.7800896167755127, "rewards/rejected": -1.7953217029571533, "step": 1143 }, { "epoch": 0.25, "learning_rate": 9.757713648851714e-06, "logits/chosen": -0.670208215713501, "logits/rejected": -0.5867244601249695, "logps/chosen": -279.416259765625, "logps/rejected": -289.1978759765625, "loss": 0.3595, "rewards/accuracies": 1.0, "rewards/chosen": -4.7692551612854, "rewards/margins": 2.157515048980713, "rewards/rejected": -6.926770210266113, "step": 1144 }, { "epoch": 0.25, "learning_rate": 9.757162172625116e-06, "logits/chosen": -0.6830536127090454, "logits/rejected": -0.5405457615852356, "logps/chosen": -264.2530212402344, "logps/rejected": -286.2305908203125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.5378570556640625, "rewards/margins": 7.352563381195068, "rewards/rejected": -7.890420436859131, "step": 1145 }, { "epoch": 0.25, "learning_rate": 9.756610085114615e-06, "logits/chosen": -0.8911576271057129, "logits/rejected": -0.8957493305206299, "logps/chosen": -78.58554077148438, "logps/rejected": -28.628860473632812, "loss": 0.4865, "rewards/accuracies": 0.0, "rewards/chosen": -1.6209766864776611, "rewards/margins": -0.436260461807251, "rewards/rejected": -1.1847162246704102, "step": 1146 }, { "epoch": 0.25, "learning_rate": 9.756057386391154e-06, "logits/chosen": -0.6137781739234924, "logits/rejected": -0.4629667401313782, "logps/chosen": -258.66680908203125, "logps/rejected": -43.606590270996094, "loss": 0.4065, "rewards/accuracies": 1.0, "rewards/chosen": 0.48888856172561646, "rewards/margins": 1.7965028285980225, "rewards/rejected": -1.3076143264770508, "step": 1147 }, { "epoch": 0.25, "learning_rate": 9.75550407652575e-06, "logits/chosen": -1.0102587938308716, "logits/rejected": -0.9495688676834106, "logps/chosen": -135.71401977539062, "logps/rejected": -246.1366424560547, "loss": 0.1748, "rewards/accuracies": 1.0, "rewards/chosen": 0.7899444699287415, "rewards/margins": 1.7616806030273438, "rewards/rejected": -0.9717361330986023, "step": 1148 }, { "epoch": 0.25, "learning_rate": 9.754950155589504e-06, "logits/chosen": -0.7267551422119141, "logits/rejected": -0.7596482038497925, "logps/chosen": -61.58942413330078, "logps/rejected": -75.75740051269531, "loss": 0.1985, "rewards/accuracies": 1.0, "rewards/chosen": 0.4209136962890625, "rewards/margins": 0.7877441644668579, "rewards/rejected": -0.366830438375473, "step": 1149 }, { "epoch": 0.25, "learning_rate": 9.754395623653595e-06, "logits/chosen": -0.6322858929634094, "logits/rejected": -0.6322858929634094, "logps/chosen": -98.02864074707031, "logps/rejected": -98.02864074707031, "loss": 0.3685, "rewards/accuracies": 0.0, "rewards/chosen": -0.6338562369346619, "rewards/margins": 0.0, "rewards/rejected": -0.6338562369346619, "step": 1150 }, { "epoch": 0.25, "learning_rate": 9.753840480789278e-06, "logits/chosen": -0.9193819761276245, "logits/rejected": -0.8679331541061401, "logps/chosen": -72.5465087890625, "logps/rejected": -199.16024780273438, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": 0.54928058385849, "rewards/margins": 8.248942375183105, "rewards/rejected": -7.699662208557129, "step": 1151 }, { "epoch": 0.25, "learning_rate": 9.753284727067886e-06, "logits/chosen": -0.6138270497322083, "logits/rejected": -0.6063303351402283, "logps/chosen": -81.76077270507812, "logps/rejected": -100.1990737915039, "loss": 0.1019, "rewards/accuracies": 1.0, "rewards/chosen": -0.20221252739429474, "rewards/margins": 1.971479892730713, "rewards/rejected": -2.173692464828491, "step": 1152 }, { "epoch": 0.26, "learning_rate": 9.752728362560834e-06, "logits/chosen": -0.8344901204109192, "logits/rejected": -0.8560538291931152, "logps/chosen": -102.55025482177734, "logps/rejected": -95.408935546875, "loss": 0.13, "rewards/accuracies": 1.0, "rewards/chosen": -0.17670059204101562, "rewards/margins": 1.229016900062561, "rewards/rejected": -1.4057174921035767, "step": 1153 }, { "epoch": 0.26, "learning_rate": 9.752171387339612e-06, "logits/chosen": -0.7787975668907166, "logits/rejected": -0.8109242916107178, "logps/chosen": -203.84930419921875, "logps/rejected": -140.2190704345703, "loss": 0.3079, "rewards/accuracies": 1.0, "rewards/chosen": -0.0056442259810864925, "rewards/margins": 0.16543731093406677, "rewards/rejected": -0.17108154296875, "step": 1154 }, { "epoch": 0.26, "learning_rate": 9.75161380147579e-06, "logits/chosen": -0.6868268251419067, "logits/rejected": -0.6710410118103027, "logps/chosen": -85.34026336669922, "logps/rejected": -91.24826049804688, "loss": 0.0414, "rewards/accuracies": 1.0, "rewards/chosen": -0.9096023440361023, "rewards/margins": 2.6739609241485596, "rewards/rejected": -3.5835633277893066, "step": 1155 }, { "epoch": 0.26, "learning_rate": 9.751055605041017e-06, "logits/chosen": -0.48415637016296387, "logits/rejected": -0.39633792638778687, "logps/chosen": -145.2239532470703, "logps/rejected": -62.0701904296875, "loss": 0.1232, "rewards/accuracies": 1.0, "rewards/chosen": -0.7748977541923523, "rewards/margins": 2.1875336170196533, "rewards/rejected": -2.9624314308166504, "step": 1156 }, { "epoch": 0.26, "learning_rate": 9.750496798107021e-06, "logits/chosen": -0.8177793622016907, "logits/rejected": -0.7848567366600037, "logps/chosen": -82.22289276123047, "logps/rejected": -142.98794555664062, "loss": 0.0408, "rewards/accuracies": 1.0, "rewards/chosen": -1.148271918296814, "rewards/margins": 2.7324037551879883, "rewards/rejected": -3.8806755542755127, "step": 1157 }, { "epoch": 0.26, "learning_rate": 9.749937380745607e-06, "logits/chosen": -0.7414231896400452, "logits/rejected": -0.7549546957015991, "logps/chosen": -95.83182525634766, "logps/rejected": -154.2535400390625, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -1.238501787185669, "rewards/margins": 3.3143279552459717, "rewards/rejected": -4.552829742431641, "step": 1158 }, { "epoch": 0.26, "learning_rate": 9.749377353028657e-06, "logits/chosen": -0.7157396078109741, "logits/rejected": -0.7277716994285583, "logps/chosen": -134.33331298828125, "logps/rejected": -160.3411407470703, "loss": 0.2414, "rewards/accuracies": 1.0, "rewards/chosen": 0.5775360465049744, "rewards/margins": 0.47716525197029114, "rewards/rejected": 0.10037078708410263, "step": 1159 }, { "epoch": 0.26, "learning_rate": 9.748816715028135e-06, "logits/chosen": -0.685893714427948, "logits/rejected": -0.7208700776100159, "logps/chosen": -123.88197326660156, "logps/rejected": -109.7144775390625, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": 1.4390274286270142, "rewards/margins": 3.3510751724243164, "rewards/rejected": -1.9120476245880127, "step": 1160 }, { "epoch": 0.26, "learning_rate": 9.748255466816081e-06, "logits/chosen": -0.8416329026222229, "logits/rejected": -0.8416329026222229, "logps/chosen": -84.88032531738281, "logps/rejected": -84.88032531738281, "loss": 0.4498, "rewards/accuracies": 0.0, "rewards/chosen": 0.3258865475654602, "rewards/margins": 0.0, "rewards/rejected": 0.3258865475654602, "step": 1161 }, { "epoch": 0.26, "learning_rate": 9.747693608464614e-06, "logits/chosen": -0.7433655261993408, "logits/rejected": -0.6331396698951721, "logps/chosen": -122.83609771728516, "logps/rejected": -81.85669708251953, "loss": 0.5112, "rewards/accuracies": 1.0, "rewards/chosen": -1.875566840171814, "rewards/margins": 0.9419339895248413, "rewards/rejected": -2.8175008296966553, "step": 1162 }, { "epoch": 0.26, "learning_rate": 9.74713114004593e-06, "logits/chosen": -1.0514153242111206, "logits/rejected": -1.0883939266204834, "logps/chosen": -167.92825317382812, "logps/rejected": -188.7845916748047, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 2.582562208175659, "rewards/margins": 8.170631408691406, "rewards/rejected": -5.588069438934326, "step": 1163 }, { "epoch": 0.26, "learning_rate": 9.746568061632308e-06, "logits/chosen": -0.828968346118927, "logits/rejected": -0.8435471057891846, "logps/chosen": -205.9828338623047, "logps/rejected": -111.73780822753906, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": 0.2918228209018707, "rewards/margins": 2.8032586574554443, "rewards/rejected": -2.5114357471466064, "step": 1164 }, { "epoch": 0.26, "learning_rate": 9.746004373296099e-06, "logits/chosen": -0.5991516709327698, "logits/rejected": -0.590381383895874, "logps/chosen": -234.4446258544922, "logps/rejected": -56.681556701660156, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": 1.8828216791152954, "rewards/margins": 3.411787986755371, "rewards/rejected": -1.5289661884307861, "step": 1165 }, { "epoch": 0.26, "learning_rate": 9.745440075109738e-06, "logits/chosen": -0.7205771803855896, "logits/rejected": -0.7188141942024231, "logps/chosen": -187.04641723632812, "logps/rejected": -231.93557739257812, "loss": 1.1129, "rewards/accuracies": 0.0, "rewards/chosen": -0.8949920535087585, "rewards/margins": -2.110626220703125, "rewards/rejected": 1.2156342267990112, "step": 1166 }, { "epoch": 0.26, "learning_rate": 9.744875167145735e-06, "logits/chosen": -0.9715285301208496, "logits/rejected": -0.9191458225250244, "logps/chosen": -96.939208984375, "logps/rejected": -199.5213623046875, "loss": 0.0346, "rewards/accuracies": 1.0, "rewards/chosen": 0.5962730646133423, "rewards/margins": 6.59459924697876, "rewards/rejected": -5.998326301574707, "step": 1167 }, { "epoch": 0.26, "learning_rate": 9.74430964947668e-06, "logits/chosen": -0.9597029089927673, "logits/rejected": -0.9597029089927673, "logps/chosen": -103.68113708496094, "logps/rejected": -103.68113708496094, "loss": 0.4929, "rewards/accuracies": 0.0, "rewards/chosen": -0.067235566675663, "rewards/margins": 0.0, "rewards/rejected": -0.067235566675663, "step": 1168 }, { "epoch": 0.26, "learning_rate": 9.74374352217524e-06, "logits/chosen": -0.426794171333313, "logits/rejected": -0.43867093324661255, "logps/chosen": -98.91972351074219, "logps/rejected": -88.15116119384766, "loss": 0.5332, "rewards/accuracies": 0.0, "rewards/chosen": -3.3962502479553223, "rewards/margins": -0.6279683113098145, "rewards/rejected": -2.768281936645508, "step": 1169 }, { "epoch": 0.26, "learning_rate": 9.743176785314159e-06, "logits/chosen": -0.8150225281715393, "logits/rejected": -0.7914477586746216, "logps/chosen": -120.43550109863281, "logps/rejected": -127.05400085449219, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": 0.14211273193359375, "rewards/margins": 2.5796761512756348, "rewards/rejected": -2.437563419342041, "step": 1170 }, { "epoch": 0.26, "learning_rate": 9.742609438966265e-06, "logits/chosen": -0.8372529149055481, "logits/rejected": -0.8150993585586548, "logps/chosen": -134.20401000976562, "logps/rejected": -155.234130859375, "loss": 0.0432, "rewards/accuracies": 1.0, "rewards/chosen": -3.33636474609375, "rewards/margins": 2.565387725830078, "rewards/rejected": -5.901752471923828, "step": 1171 }, { "epoch": 0.26, "learning_rate": 9.74204148320446e-06, "logits/chosen": -0.6305258870124817, "logits/rejected": -0.6305258870124817, "logps/chosen": -151.36444091796875, "logps/rejected": -151.36444091796875, "loss": 0.3537, "rewards/accuracies": 0.0, "rewards/chosen": -3.6237831115722656, "rewards/margins": 0.0, "rewards/rejected": -3.6237831115722656, "step": 1172 }, { "epoch": 0.26, "learning_rate": 9.741472918101722e-06, "logits/chosen": -1.1622182130813599, "logits/rejected": -1.1066429615020752, "logps/chosen": -156.6687774658203, "logps/rejected": -208.41592407226562, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.9917876720428467, "rewards/margins": 7.258996963500977, "rewards/rejected": -10.250784873962402, "step": 1173 }, { "epoch": 0.26, "learning_rate": 9.740903743731113e-06, "logits/chosen": -0.9003723859786987, "logits/rejected": -0.850916862487793, "logps/chosen": -62.8154296875, "logps/rejected": -147.98976135253906, "loss": 0.1141, "rewards/accuracies": 1.0, "rewards/chosen": -0.8735683560371399, "rewards/margins": 4.549251079559326, "rewards/rejected": -5.4228196144104, "step": 1174 }, { "epoch": 0.26, "learning_rate": 9.74033396016577e-06, "logits/chosen": -0.8270854949951172, "logits/rejected": -0.45485153794288635, "logps/chosen": -116.82731628417969, "logps/rejected": -648.761474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.364724725484848, "rewards/margins": 39.03561019897461, "rewards/rejected": -39.400333404541016, "step": 1175 }, { "epoch": 0.26, "learning_rate": 9.739763567478908e-06, "logits/chosen": -0.9586074352264404, "logits/rejected": -0.8676226735115051, "logps/chosen": -138.00717163085938, "logps/rejected": -253.539794921875, "loss": 0.7109, "rewards/accuracies": 0.0, "rewards/chosen": -0.8165008425712585, "rewards/margins": -1.1457855701446533, "rewards/rejected": 0.32928466796875, "step": 1176 }, { "epoch": 0.26, "learning_rate": 9.739192565743822e-06, "logits/chosen": -1.0153101682662964, "logits/rejected": -0.9337356686592102, "logps/chosen": -141.7732391357422, "logps/rejected": -196.38059997558594, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.3153671324253082, "rewards/margins": 6.642145156860352, "rewards/rejected": -6.326777935028076, "step": 1177 }, { "epoch": 0.26, "learning_rate": 9.738620955033883e-06, "logits/chosen": -1.1114921569824219, "logits/rejected": -1.111946702003479, "logps/chosen": -133.98239135742188, "logps/rejected": -91.6584701538086, "loss": 0.0637, "rewards/accuracies": 1.0, "rewards/chosen": -0.8795151114463806, "rewards/margins": 1.9968674182891846, "rewards/rejected": -2.87638258934021, "step": 1178 }, { "epoch": 0.26, "learning_rate": 9.738048735422545e-06, "logits/chosen": -1.051376461982727, "logits/rejected": -0.9950390458106995, "logps/chosen": -70.74710083007812, "logps/rejected": -168.0345458984375, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -0.2569824159145355, "rewards/margins": 3.847958564758301, "rewards/rejected": -4.104940891265869, "step": 1179 }, { "epoch": 0.26, "learning_rate": 9.737475906983333e-06, "logits/chosen": -0.5200145840644836, "logits/rejected": -0.49385520815849304, "logps/chosen": -100.7928237915039, "logps/rejected": -149.6488494873047, "loss": 0.2484, "rewards/accuracies": 1.0, "rewards/chosen": -0.42211610078811646, "rewards/margins": 3.8337509632110596, "rewards/rejected": -4.255867004394531, "step": 1180 }, { "epoch": 0.26, "learning_rate": 9.736902469789855e-06, "logits/chosen": -0.6725670099258423, "logits/rejected": -0.6474598050117493, "logps/chosen": -66.45917510986328, "logps/rejected": -119.1898422241211, "loss": 0.0632, "rewards/accuracies": 1.0, "rewards/chosen": -1.668299913406372, "rewards/margins": 2.450242757797241, "rewards/rejected": -4.118542671203613, "step": 1181 }, { "epoch": 0.26, "learning_rate": 9.736328423915797e-06, "logits/chosen": -0.644637405872345, "logits/rejected": -0.3233669698238373, "logps/chosen": -57.23312759399414, "logps/rejected": -313.4571533203125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -2.232105255126953, "rewards/margins": 13.241621971130371, "rewards/rejected": -15.473727226257324, "step": 1182 }, { "epoch": 0.26, "learning_rate": 9.735753769434923e-06, "logits/chosen": -1.1154825687408447, "logits/rejected": -1.1152088642120361, "logps/chosen": -125.442138671875, "logps/rejected": -169.84962463378906, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": 0.08579864352941513, "rewards/margins": 2.172384738922119, "rewards/rejected": -2.0865859985351562, "step": 1183 }, { "epoch": 0.26, "learning_rate": 9.735178506421075e-06, "logits/chosen": -0.7638503313064575, "logits/rejected": -0.7794737815856934, "logps/chosen": -133.96865844726562, "logps/rejected": -173.8795623779297, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": 0.9320403933525085, "rewards/margins": 5.013078212738037, "rewards/rejected": -4.081037998199463, "step": 1184 }, { "epoch": 0.26, "learning_rate": 9.73460263494817e-06, "logits/chosen": -1.0418888330459595, "logits/rejected": -1.0923360586166382, "logps/chosen": -178.02389526367188, "logps/rejected": -43.87460708618164, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 0.479238897562027, "rewards/margins": 2.1597416400909424, "rewards/rejected": -1.6805027723312378, "step": 1185 }, { "epoch": 0.26, "learning_rate": 9.734026155090208e-06, "logits/chosen": -1.0393041372299194, "logits/rejected": -1.0050312280654907, "logps/chosen": -175.395751953125, "logps/rejected": -252.30462646484375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 1.2746765613555908, "rewards/margins": 5.96568489074707, "rewards/rejected": -4.6910080909729, "step": 1186 }, { "epoch": 0.26, "learning_rate": 9.733449066921268e-06, "logits/chosen": -0.8282255530357361, "logits/rejected": -0.8630048632621765, "logps/chosen": -173.30410766601562, "logps/rejected": -215.99276733398438, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -0.610241711139679, "rewards/margins": 6.772936820983887, "rewards/rejected": -7.3831787109375, "step": 1187 }, { "epoch": 0.26, "learning_rate": 9.7328713705155e-06, "logits/chosen": -0.8733800053596497, "logits/rejected": -0.845757246017456, "logps/chosen": -171.3619384765625, "logps/rejected": -197.15431213378906, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.050837755203247, "rewards/margins": 4.804308891296387, "rewards/rejected": -5.855146884918213, "step": 1188 }, { "epoch": 0.26, "learning_rate": 9.732293065947138e-06, "logits/chosen": -0.5240437984466553, "logits/rejected": -0.5463085174560547, "logps/chosen": -42.899391174316406, "logps/rejected": -65.632568359375, "loss": 0.2384, "rewards/accuracies": 1.0, "rewards/chosen": -1.016815185546875, "rewards/margins": 0.8902488946914673, "rewards/rejected": -1.9070640802383423, "step": 1189 }, { "epoch": 0.26, "learning_rate": 9.731714153290492e-06, "logits/chosen": -0.8904759287834167, "logits/rejected": -0.9170765280723572, "logps/chosen": -97.97943115234375, "logps/rejected": -51.129337310791016, "loss": 0.3754, "rewards/accuracies": 1.0, "rewards/chosen": 1.0031365156173706, "rewards/margins": 2.8240299224853516, "rewards/rejected": -1.8208935260772705, "step": 1190 }, { "epoch": 0.26, "learning_rate": 9.731134632619954e-06, "logits/chosen": -0.32133617997169495, "logits/rejected": -0.32133617997169495, "logps/chosen": -46.31635284423828, "logps/rejected": -46.31635284423828, "loss": 0.3641, "rewards/accuracies": 0.0, "rewards/chosen": -2.733760356903076, "rewards/margins": 0.0, "rewards/rejected": -2.733760356903076, "step": 1191 }, { "epoch": 0.26, "learning_rate": 9.73055450400999e-06, "logits/chosen": -1.0493086576461792, "logits/rejected": -0.9886299967765808, "logps/chosen": -105.38544464111328, "logps/rejected": -226.61663818359375, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": -0.27643662691116333, "rewards/margins": 2.4119057655334473, "rewards/rejected": -2.688342332839966, "step": 1192 }, { "epoch": 0.26, "learning_rate": 9.729973767535142e-06, "logits/chosen": -0.8364852666854858, "logits/rejected": -0.8021722435951233, "logps/chosen": -137.53289794921875, "logps/rejected": -201.8455810546875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.5267730951309204, "rewards/margins": 8.087099075317383, "rewards/rejected": -7.560325622558594, "step": 1193 }, { "epoch": 0.26, "learning_rate": 9.729392423270036e-06, "logits/chosen": -0.5479331016540527, "logits/rejected": -0.5479331016540527, "logps/chosen": -139.12625122070312, "logps/rejected": -139.12625122070312, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.5163543224334717, "rewards/margins": 0.0, "rewards/rejected": -2.5163543224334717, "step": 1194 }, { "epoch": 0.26, "learning_rate": 9.728810471289374e-06, "logits/chosen": -0.763278603553772, "logits/rejected": -0.715974748134613, "logps/chosen": -107.42801666259766, "logps/rejected": -131.47695922851562, "loss": 0.0359, "rewards/accuracies": 1.0, "rewards/chosen": -0.9165611267089844, "rewards/margins": 2.6248443126678467, "rewards/rejected": -3.541405439376831, "step": 1195 }, { "epoch": 0.26, "learning_rate": 9.728227911667934e-06, "logits/chosen": -0.635701060295105, "logits/rejected": -0.5812175869941711, "logps/chosen": -75.3025894165039, "logps/rejected": -116.26301574707031, "loss": 0.1031, "rewards/accuracies": 1.0, "rewards/chosen": -1.399438500404358, "rewards/margins": 1.4844046831130981, "rewards/rejected": -2.883843183517456, "step": 1196 }, { "epoch": 0.26, "learning_rate": 9.727644744480571e-06, "logits/chosen": -1.0070602893829346, "logits/rejected": -1.0193819999694824, "logps/chosen": -193.60169982910156, "logps/rejected": -184.17340087890625, "loss": 0.0449, "rewards/accuracies": 1.0, "rewards/chosen": -1.6140152215957642, "rewards/margins": 2.3662428855895996, "rewards/rejected": -3.9802582263946533, "step": 1197 }, { "epoch": 0.27, "learning_rate": 9.727060969802226e-06, "logits/chosen": -0.894834578037262, "logits/rejected": -0.8754928708076477, "logps/chosen": -166.6483154296875, "logps/rejected": -151.99325561523438, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 1.0538864135742188, "rewards/margins": 7.27090311050415, "rewards/rejected": -6.217016696929932, "step": 1198 }, { "epoch": 0.27, "learning_rate": 9.726476587707908e-06, "logits/chosen": -0.570041835308075, "logits/rejected": -0.4709413945674896, "logps/chosen": -246.73609924316406, "logps/rejected": -217.7938232421875, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": 0.5918990969657898, "rewards/margins": 10.861903190612793, "rewards/rejected": -10.270004272460938, "step": 1199 }, { "epoch": 0.27, "learning_rate": 9.725891598272711e-06, "logits/chosen": -0.6754300594329834, "logits/rejected": -0.6823149919509888, "logps/chosen": -119.50822448730469, "logps/rejected": -180.33065795898438, "loss": 0.0414, "rewards/accuracies": 1.0, "rewards/chosen": 0.8385528922080994, "rewards/margins": 6.446146488189697, "rewards/rejected": -5.607593536376953, "step": 1200 }, { "epoch": 0.27, "learning_rate": 9.725306001571806e-06, "logits/chosen": -1.0257741212844849, "logits/rejected": -0.9922827482223511, "logps/chosen": -116.28944396972656, "logps/rejected": -98.06123352050781, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": 1.3951218128204346, "rewards/margins": 2.4560303688049316, "rewards/rejected": -1.060908555984497, "step": 1201 }, { "epoch": 0.27, "learning_rate": 9.72471979768044e-06, "logits/chosen": -0.9665670990943909, "logits/rejected": -1.032539963722229, "logps/chosen": -115.33058166503906, "logps/rejected": -123.8406982421875, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": -0.5609650015830994, "rewards/margins": 3.1652190685272217, "rewards/rejected": -3.726184129714966, "step": 1202 }, { "epoch": 0.27, "learning_rate": 9.724132986673935e-06, "logits/chosen": -0.4759719669818878, "logits/rejected": -0.4852367043495178, "logps/chosen": -127.92434692382812, "logps/rejected": -294.4868469238281, "loss": 0.0463, "rewards/accuracies": 1.0, "rewards/chosen": 0.2003021240234375, "rewards/margins": 2.707690477371216, "rewards/rejected": -2.5073883533477783, "step": 1203 }, { "epoch": 0.27, "learning_rate": 9.723545568627699e-06, "logits/chosen": -0.7556560039520264, "logits/rejected": -0.7222690582275391, "logps/chosen": -150.0054931640625, "logps/rejected": -112.47337341308594, "loss": 0.0498, "rewards/accuracies": 1.0, "rewards/chosen": 0.8330078125, "rewards/margins": 2.9674363136291504, "rewards/rejected": -2.1344285011291504, "step": 1204 }, { "epoch": 0.27, "learning_rate": 9.722957543617211e-06, "logits/chosen": -0.6687217950820923, "logits/rejected": -0.6453049778938293, "logps/chosen": -211.00595092773438, "logps/rejected": -60.493446350097656, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": 1.1572341918945312, "rewards/margins": 4.483123779296875, "rewards/rejected": -3.3258893489837646, "step": 1205 }, { "epoch": 0.27, "learning_rate": 9.722368911718034e-06, "logits/chosen": -0.6425851583480835, "logits/rejected": -0.6592893600463867, "logps/chosen": -46.01156234741211, "logps/rejected": -16.25802230834961, "loss": 0.208, "rewards/accuracies": 1.0, "rewards/chosen": -0.05796203762292862, "rewards/margins": 0.7202070951461792, "rewards/rejected": -0.7781691551208496, "step": 1206 }, { "epoch": 0.27, "learning_rate": 9.721779673005805e-06, "logits/chosen": -0.74239581823349, "logits/rejected": -0.6860469579696655, "logps/chosen": -117.37855529785156, "logps/rejected": -244.16864013671875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.2674522399902344, "rewards/margins": 7.352110862731934, "rewards/rejected": -8.619563102722168, "step": 1207 }, { "epoch": 0.27, "learning_rate": 9.721189827556237e-06, "logits/chosen": -0.747098982334137, "logits/rejected": -0.6915969252586365, "logps/chosen": -195.041748046875, "logps/rejected": -262.572021484375, "loss": 0.1016, "rewards/accuracies": 1.0, "rewards/chosen": -2.975943088531494, "rewards/margins": 6.713196277618408, "rewards/rejected": -9.689139366149902, "step": 1208 }, { "epoch": 0.27, "learning_rate": 9.720599375445125e-06, "logits/chosen": -0.59612637758255, "logits/rejected": -0.5555551648139954, "logps/chosen": -86.10292053222656, "logps/rejected": -233.5477294921875, "loss": 0.0628, "rewards/accuracies": 1.0, "rewards/chosen": -1.1340073347091675, "rewards/margins": 7.909205436706543, "rewards/rejected": -9.043212890625, "step": 1209 }, { "epoch": 0.27, "learning_rate": 9.720008316748344e-06, "logits/chosen": -0.7189438939094543, "logits/rejected": -0.7189438939094543, "logps/chosen": -126.32994079589844, "logps/rejected": -126.32994079589844, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -5.298717021942139, "rewards/margins": 0.0, "rewards/rejected": -5.298717021942139, "step": 1210 }, { "epoch": 0.27, "learning_rate": 9.719416651541839e-06, "logits/chosen": -0.7403898239135742, "logits/rejected": -0.7263031005859375, "logps/chosen": -73.00782775878906, "logps/rejected": -89.56942749023438, "loss": 0.3922, "rewards/accuracies": 0.0, "rewards/chosen": -0.1207733154296875, "rewards/margins": -0.15298691391944885, "rewards/rejected": 0.032213594764471054, "step": 1211 }, { "epoch": 0.27, "learning_rate": 9.718824379901639e-06, "logits/chosen": -0.7551984786987305, "logits/rejected": -0.7070293426513672, "logps/chosen": -117.48556518554688, "logps/rejected": -198.65838623046875, "loss": 0.3415, "rewards/accuracies": 1.0, "rewards/chosen": -0.9343475699424744, "rewards/margins": 3.0847272872924805, "rewards/rejected": -4.0190749168396, "step": 1212 }, { "epoch": 0.27, "learning_rate": 9.718231501903851e-06, "logits/chosen": -0.9974337220191956, "logits/rejected": -0.7212368249893188, "logps/chosen": -175.77706909179688, "logps/rejected": -495.3008117675781, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.6970398426055908, "rewards/margins": 36.813934326171875, "rewards/rejected": -38.5109748840332, "step": 1213 }, { "epoch": 0.27, "learning_rate": 9.717638017624657e-06, "logits/chosen": -0.8249350190162659, "logits/rejected": -0.7607006430625916, "logps/chosen": -130.6214141845703, "logps/rejected": -279.5281677246094, "loss": 0.4259, "rewards/accuracies": 0.0, "rewards/chosen": -2.087040662765503, "rewards/margins": -0.2273939847946167, "rewards/rejected": -1.8596466779708862, "step": 1214 }, { "epoch": 0.27, "learning_rate": 9.717043927140319e-06, "logits/chosen": -0.8668135404586792, "logits/rejected": -0.8693923354148865, "logps/chosen": -44.18636703491211, "logps/rejected": -53.05145263671875, "loss": 2.3961, "rewards/accuracies": 1.0, "rewards/chosen": -2.245699405670166, "rewards/margins": 0.8057870864868164, "rewards/rejected": -3.0514864921569824, "step": 1215 }, { "epoch": 0.27, "learning_rate": 9.716449230527175e-06, "logits/chosen": -0.8481398820877075, "logits/rejected": -0.8317636847496033, "logps/chosen": -193.28155517578125, "logps/rejected": -224.09133911132812, "loss": 0.0734, "rewards/accuracies": 1.0, "rewards/chosen": -0.4419311583042145, "rewards/margins": 6.614302158355713, "rewards/rejected": -7.0562334060668945, "step": 1216 }, { "epoch": 0.27, "learning_rate": 9.715853927861643e-06, "logits/chosen": -1.1558020114898682, "logits/rejected": -1.065000057220459, "logps/chosen": -169.78802490234375, "logps/rejected": -218.48594665527344, "loss": 1.5704, "rewards/accuracies": 0.0, "rewards/chosen": -3.664410352706909, "rewards/margins": -3.096278190612793, "rewards/rejected": -0.5681320428848267, "step": 1217 }, { "epoch": 0.27, "learning_rate": 9.71525801922022e-06, "logits/chosen": -0.7813752293586731, "logits/rejected": -0.7457324266433716, "logps/chosen": -121.80827331542969, "logps/rejected": -173.40817260742188, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": 0.8896102905273438, "rewards/margins": 5.392186164855957, "rewards/rejected": -4.502575874328613, "step": 1218 }, { "epoch": 0.27, "learning_rate": 9.714661504679474e-06, "logits/chosen": -0.8248013257980347, "logits/rejected": -0.7962629199028015, "logps/chosen": -88.44892120361328, "logps/rejected": -116.42462921142578, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": 0.5584114193916321, "rewards/margins": 2.8328704833984375, "rewards/rejected": -2.27445912361145, "step": 1219 }, { "epoch": 0.27, "learning_rate": 9.71406438431606e-06, "logits/chosen": -0.635714054107666, "logits/rejected": -0.589434802532196, "logps/chosen": -111.62535095214844, "logps/rejected": -48.87723922729492, "loss": 0.6133, "rewards/accuracies": 0.0, "rewards/chosen": -2.7418181896209717, "rewards/margins": -0.5385198593139648, "rewards/rejected": -2.203298330307007, "step": 1220 }, { "epoch": 0.27, "learning_rate": 9.713466658206703e-06, "logits/chosen": -1.3614976406097412, "logits/rejected": -1.317839503288269, "logps/chosen": -82.09514617919922, "logps/rejected": -170.9362335205078, "loss": 0.1072, "rewards/accuracies": 1.0, "rewards/chosen": -2.6200807094573975, "rewards/margins": 1.4303314685821533, "rewards/rejected": -4.050412178039551, "step": 1221 }, { "epoch": 0.27, "learning_rate": 9.712868326428213e-06, "logits/chosen": -0.8316888213157654, "logits/rejected": -0.8316888213157654, "logps/chosen": -79.28861999511719, "logps/rejected": -79.28861999511719, "loss": 0.3475, "rewards/accuracies": 0.0, "rewards/chosen": -1.288674235343933, "rewards/margins": 0.0, "rewards/rejected": -1.288674235343933, "step": 1222 }, { "epoch": 0.27, "learning_rate": 9.712269389057471e-06, "logits/chosen": -0.7539942264556885, "logits/rejected": -0.7493891716003418, "logps/chosen": -103.02949523925781, "logps/rejected": -244.2148895263672, "loss": 0.2054, "rewards/accuracies": 1.0, "rewards/chosen": -1.5065453052520752, "rewards/margins": 5.324190139770508, "rewards/rejected": -6.830735683441162, "step": 1223 }, { "epoch": 0.27, "learning_rate": 9.711669846171443e-06, "logits/chosen": -0.7944270372390747, "logits/rejected": -0.7867192029953003, "logps/chosen": -108.66648864746094, "logps/rejected": -73.12777709960938, "loss": 0.2746, "rewards/accuracies": 1.0, "rewards/chosen": -2.7241318225860596, "rewards/margins": 0.32657480239868164, "rewards/rejected": -3.050706624984741, "step": 1224 }, { "epoch": 0.27, "learning_rate": 9.711069697847165e-06, "logits/chosen": -1.0502901077270508, "logits/rejected": -1.010551929473877, "logps/chosen": -225.88209533691406, "logps/rejected": -267.51751708984375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": 1.3916336297988892, "rewards/margins": 7.453500270843506, "rewards/rejected": -6.061866760253906, "step": 1225 }, { "epoch": 0.27, "learning_rate": 9.710468944161755e-06, "logits/chosen": -0.7461585402488708, "logits/rejected": -0.7237133979797363, "logps/chosen": -100.86012268066406, "logps/rejected": -159.87738037109375, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -1.2976349592208862, "rewards/margins": 3.8995957374572754, "rewards/rejected": -5.197230815887451, "step": 1226 }, { "epoch": 0.27, "learning_rate": 9.70986758519241e-06, "logits/chosen": -1.0581473112106323, "logits/rejected": -1.0149401426315308, "logps/chosen": -73.68022918701172, "logps/rejected": -140.51182556152344, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 1.2794029712677002, "rewards/margins": 6.545914649963379, "rewards/rejected": -5.2665114402771, "step": 1227 }, { "epoch": 0.27, "learning_rate": 9.709265621016401e-06, "logits/chosen": -0.6697239279747009, "logits/rejected": -0.646051824092865, "logps/chosen": -179.52099609375, "logps/rejected": -109.753173828125, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": 0.6347610354423523, "rewards/margins": 3.1377365589141846, "rewards/rejected": -2.5029754638671875, "step": 1228 }, { "epoch": 0.27, "learning_rate": 9.708663051711083e-06, "logits/chosen": -0.7408050894737244, "logits/rejected": -0.7428624629974365, "logps/chosen": -68.68318176269531, "logps/rejected": -78.34600067138672, "loss": 0.2379, "rewards/accuracies": 1.0, "rewards/chosen": -0.20428161323070526, "rewards/margins": 0.5029289126396179, "rewards/rejected": -0.7072105407714844, "step": 1229 }, { "epoch": 0.27, "learning_rate": 9.708059877353881e-06, "logits/chosen": -0.6195899248123169, "logits/rejected": -0.5207837820053101, "logps/chosen": -61.28089141845703, "logps/rejected": -203.4803466796875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.8405570983886719, "rewards/margins": 6.318613529205322, "rewards/rejected": -7.159170627593994, "step": 1230 }, { "epoch": 0.27, "learning_rate": 9.707456098022303e-06, "logits/chosen": -0.7830065488815308, "logits/rejected": -0.7555552124977112, "logps/chosen": -102.38867950439453, "logps/rejected": -214.18386840820312, "loss": 0.1883, "rewards/accuracies": 1.0, "rewards/chosen": -1.6129875183105469, "rewards/margins": 7.114089012145996, "rewards/rejected": -8.727076530456543, "step": 1231 }, { "epoch": 0.27, "learning_rate": 9.706851713793932e-06, "logits/chosen": -0.553931474685669, "logits/rejected": -0.5170985460281372, "logps/chosen": -107.87841796875, "logps/rejected": -238.3517608642578, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.5943741202354431, "rewards/margins": 8.682186126708984, "rewards/rejected": -9.276559829711914, "step": 1232 }, { "epoch": 0.27, "learning_rate": 9.706246724746433e-06, "logits/chosen": -0.8181463479995728, "logits/rejected": -0.8034703731536865, "logps/chosen": -86.47227478027344, "logps/rejected": -154.76478576660156, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -3.395892858505249, "rewards/margins": 3.866575002670288, "rewards/rejected": -7.262467861175537, "step": 1233 }, { "epoch": 0.27, "learning_rate": 9.705641130957541e-06, "logits/chosen": -0.4910708963871002, "logits/rejected": -0.47250765562057495, "logps/chosen": -108.15885925292969, "logps/rejected": -175.602783203125, "loss": 0.5338, "rewards/accuracies": 0.0, "rewards/chosen": -1.442352294921875, "rewards/margins": -0.6463592648506165, "rewards/rejected": -0.7959930300712585, "step": 1234 }, { "epoch": 0.27, "learning_rate": 9.705034932505076e-06, "logits/chosen": -0.9012542366981506, "logits/rejected": -0.8818675875663757, "logps/chosen": -101.52691650390625, "logps/rejected": -123.03388977050781, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": 0.936920166015625, "rewards/margins": 3.218315839767456, "rewards/rejected": -2.281395673751831, "step": 1235 }, { "epoch": 0.27, "learning_rate": 9.704428129466934e-06, "logits/chosen": -0.4345364570617676, "logits/rejected": -0.4195655584335327, "logps/chosen": -162.93194580078125, "logps/rejected": -163.08657836914062, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.9615539908409119, "rewards/margins": 5.087718963623047, "rewards/rejected": -6.0492730140686035, "step": 1236 }, { "epoch": 0.27, "learning_rate": 9.703820721921085e-06, "logits/chosen": -0.6539326310157776, "logits/rejected": -0.6476473212242126, "logps/chosen": -265.70721435546875, "logps/rejected": -219.25802612304688, "loss": 0.2345, "rewards/accuracies": 1.0, "rewards/chosen": 1.3312256336212158, "rewards/margins": 0.5758362412452698, "rewards/rejected": 0.755389392375946, "step": 1237 }, { "epoch": 0.27, "learning_rate": 9.703212709945583e-06, "logits/chosen": -0.6544832587242126, "logits/rejected": -0.6048038601875305, "logps/chosen": -163.0355682373047, "logps/rejected": -157.07809448242188, "loss": 0.493, "rewards/accuracies": 0.0, "rewards/chosen": 1.109089732170105, "rewards/margins": -0.5192184448242188, "rewards/rejected": 1.6283081769943237, "step": 1238 }, { "epoch": 0.27, "learning_rate": 9.70260409361855e-06, "logits/chosen": -0.7396975755691528, "logits/rejected": -0.6562955975532532, "logps/chosen": -76.2576675415039, "logps/rejected": -147.4827880859375, "loss": 0.0624, "rewards/accuracies": 1.0, "rewards/chosen": 0.4507896602153778, "rewards/margins": 6.005263328552246, "rewards/rejected": -5.554473876953125, "step": 1239 }, { "epoch": 0.27, "learning_rate": 9.701994873018198e-06, "logits/chosen": -0.7634520530700684, "logits/rejected": -0.759161114692688, "logps/chosen": -113.37626647949219, "logps/rejected": -124.47929382324219, "loss": 0.1187, "rewards/accuracies": 1.0, "rewards/chosen": -0.7702087759971619, "rewards/margins": 1.9999122619628906, "rewards/rejected": -2.7701210975646973, "step": 1240 }, { "epoch": 0.27, "learning_rate": 9.70138504822281e-06, "logits/chosen": -0.7482825517654419, "logits/rejected": -0.7517467737197876, "logps/chosen": -137.23158264160156, "logps/rejected": -54.378318786621094, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": 0.3755737245082855, "rewards/margins": 2.6723525524139404, "rewards/rejected": -2.296778917312622, "step": 1241 }, { "epoch": 0.27, "learning_rate": 9.700774619310744e-06, "logits/chosen": -0.6045926213264465, "logits/rejected": -0.595503032207489, "logps/chosen": -152.23306274414062, "logps/rejected": -127.32289123535156, "loss": 0.3898, "rewards/accuracies": 0.0, "rewards/chosen": -4.288987636566162, "rewards/margins": -0.1654205322265625, "rewards/rejected": -4.1235671043396, "step": 1242 }, { "epoch": 0.28, "learning_rate": 9.700163586360438e-06, "logits/chosen": -0.6690865755081177, "logits/rejected": -0.6455590128898621, "logps/chosen": -87.64332580566406, "logps/rejected": -110.55773162841797, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": 0.3430168330669403, "rewards/margins": 3.228933811187744, "rewards/rejected": -2.8859169483184814, "step": 1243 }, { "epoch": 0.28, "learning_rate": 9.699551949450412e-06, "logits/chosen": -0.8217800259590149, "logits/rejected": -0.8089330792427063, "logps/chosen": -81.8237533569336, "logps/rejected": -198.05377197265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.46672898530960083, "rewards/margins": 8.904205322265625, "rewards/rejected": -8.43747615814209, "step": 1244 }, { "epoch": 0.28, "learning_rate": 9.698939708659258e-06, "logits/chosen": -0.7087691426277161, "logits/rejected": -0.6835872530937195, "logps/chosen": -106.74361419677734, "logps/rejected": -158.78839111328125, "loss": 0.8685, "rewards/accuracies": 0.0, "rewards/chosen": -0.20448990166187286, "rewards/margins": -1.4733269214630127, "rewards/rejected": 1.2688369750976562, "step": 1245 }, { "epoch": 0.28, "learning_rate": 9.698326864065646e-06, "logits/chosen": -0.8287886381149292, "logits/rejected": -0.8287886381149292, "logps/chosen": -104.4774169921875, "logps/rejected": -104.4774169921875, "loss": 0.3824, "rewards/accuracies": 0.0, "rewards/chosen": -3.2630233764648438, "rewards/margins": 0.0, "rewards/rejected": -3.2630233764648438, "step": 1246 }, { "epoch": 0.28, "learning_rate": 9.697713415748327e-06, "logits/chosen": -0.7158599495887756, "logits/rejected": -0.7196062207221985, "logps/chosen": -128.32615661621094, "logps/rejected": -179.97006225585938, "loss": 0.1915, "rewards/accuracies": 1.0, "rewards/chosen": -3.262465715408325, "rewards/margins": 2.5549652576446533, "rewards/rejected": -5.8174309730529785, "step": 1247 }, { "epoch": 0.28, "learning_rate": 9.697099363786127e-06, "logits/chosen": -0.9148365259170532, "logits/rejected": -0.9693063497543335, "logps/chosen": -174.64382934570312, "logps/rejected": -181.24594116210938, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -0.8365554809570312, "rewards/margins": 2.761827230453491, "rewards/rejected": -3.5983827114105225, "step": 1248 }, { "epoch": 0.28, "learning_rate": 9.69648470825795e-06, "logits/chosen": -0.7187190651893616, "logits/rejected": -0.6496626734733582, "logps/chosen": -134.4816436767578, "logps/rejected": -271.3028564453125, "loss": 0.2128, "rewards/accuracies": 1.0, "rewards/chosen": -0.6743530631065369, "rewards/margins": 6.7262864112854, "rewards/rejected": -7.400639533996582, "step": 1249 }, { "epoch": 0.28, "learning_rate": 9.695869449242779e-06, "logits/chosen": -0.9336482882499695, "logits/rejected": -0.9470288753509521, "logps/chosen": -135.04873657226562, "logps/rejected": -81.341064453125, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -1.1938828229904175, "rewards/margins": 3.0946593284606934, "rewards/rejected": -4.2885422706604, "step": 1250 }, { "epoch": 0.28, "learning_rate": 9.695253586819672e-06, "logits/chosen": -0.7394232153892517, "logits/rejected": -0.7367052435874939, "logps/chosen": -150.91552734375, "logps/rejected": -106.65361022949219, "loss": 0.1449, "rewards/accuracies": 1.0, "rewards/chosen": -0.8703262209892273, "rewards/margins": 1.142991542816162, "rewards/rejected": -2.013317823410034, "step": 1251 }, { "epoch": 0.28, "learning_rate": 9.694637121067764e-06, "logits/chosen": -0.6945820450782776, "logits/rejected": -0.6411688327789307, "logps/chosen": -108.78723907470703, "logps/rejected": -128.52099609375, "loss": 0.0591, "rewards/accuracies": 1.0, "rewards/chosen": -0.1983482390642166, "rewards/margins": 3.6704659461975098, "rewards/rejected": -3.86881422996521, "step": 1252 }, { "epoch": 0.28, "learning_rate": 9.694020052066275e-06, "logits/chosen": -1.0995877981185913, "logits/rejected": -1.095029592514038, "logps/chosen": -73.33743286132812, "logps/rejected": -94.34967041015625, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": 0.04353027418255806, "rewards/margins": 3.4250328540802, "rewards/rejected": -3.381502628326416, "step": 1253 }, { "epoch": 0.28, "learning_rate": 9.693402379894492e-06, "logits/chosen": -0.6627225279808044, "logits/rejected": -0.625873863697052, "logps/chosen": -106.48728942871094, "logps/rejected": -137.33428955078125, "loss": 0.0752, "rewards/accuracies": 1.0, "rewards/chosen": 0.04676361009478569, "rewards/margins": 2.244227647781372, "rewards/rejected": -2.1974639892578125, "step": 1254 }, { "epoch": 0.28, "learning_rate": 9.692784104631785e-06, "logits/chosen": -0.9029390811920166, "logits/rejected": -0.8586454391479492, "logps/chosen": -83.35261535644531, "logps/rejected": -120.75198364257812, "loss": 0.317, "rewards/accuracies": 1.0, "rewards/chosen": 0.47508546710014343, "rewards/margins": 3.1057984828948975, "rewards/rejected": -2.6307129859924316, "step": 1255 }, { "epoch": 0.28, "learning_rate": 9.692165226357603e-06, "logits/chosen": -0.7912622690200806, "logits/rejected": -0.8052677512168884, "logps/chosen": -65.22272491455078, "logps/rejected": -71.3106918334961, "loss": 0.2678, "rewards/accuracies": 1.0, "rewards/chosen": -1.1098495721817017, "rewards/margins": 0.4643779993057251, "rewards/rejected": -1.5742275714874268, "step": 1256 }, { "epoch": 0.28, "learning_rate": 9.691545745151469e-06, "logits/chosen": -1.012859582901001, "logits/rejected": -1.0070006847381592, "logps/chosen": -106.06892395019531, "logps/rejected": -71.12834930419922, "loss": 0.3721, "rewards/accuracies": 0.0, "rewards/chosen": -0.3618515133857727, "rewards/margins": -0.07936477661132812, "rewards/rejected": -0.2824867367744446, "step": 1257 }, { "epoch": 0.28, "learning_rate": 9.690925661092984e-06, "logits/chosen": -0.8244881629943848, "logits/rejected": -0.7904792428016663, "logps/chosen": -162.35205078125, "logps/rejected": -115.28571319580078, "loss": 0.3419, "rewards/accuracies": 1.0, "rewards/chosen": -3.360670566558838, "rewards/margins": 0.01940464973449707, "rewards/rejected": -3.380075216293335, "step": 1258 }, { "epoch": 0.28, "learning_rate": 9.690304974261828e-06, "logits/chosen": -0.6062414050102234, "logits/rejected": -0.5512490272521973, "logps/chosen": -52.404518127441406, "logps/rejected": -192.7965087890625, "loss": 0.1866, "rewards/accuracies": 1.0, "rewards/chosen": 0.5344684720039368, "rewards/margins": 5.706450462341309, "rewards/rejected": -5.1719818115234375, "step": 1259 }, { "epoch": 0.28, "learning_rate": 9.689683684737758e-06, "logits/chosen": -0.6828327178955078, "logits/rejected": -0.611535906791687, "logps/chosen": -210.37429809570312, "logps/rejected": -343.05572509765625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.3319793939590454, "rewards/margins": 7.93964147567749, "rewards/rejected": -9.271620750427246, "step": 1260 }, { "epoch": 0.28, "learning_rate": 9.68906179260061e-06, "logits/chosen": -1.1896835565567017, "logits/rejected": -1.0772641897201538, "logps/chosen": -116.97554779052734, "logps/rejected": -216.74380493164062, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": -0.076360322535038, "rewards/margins": 5.688523292541504, "rewards/rejected": -5.764883518218994, "step": 1261 }, { "epoch": 0.28, "learning_rate": 9.688439297930292e-06, "logits/chosen": -1.0207655429840088, "logits/rejected": -1.0030008554458618, "logps/chosen": -111.98284912109375, "logps/rejected": -115.62090301513672, "loss": 0.4843, "rewards/accuracies": 1.0, "rewards/chosen": 0.1855316162109375, "rewards/margins": 4.116262435913086, "rewards/rejected": -3.9307305812835693, "step": 1262 }, { "epoch": 0.28, "learning_rate": 9.687816200806795e-06, "logits/chosen": -0.7306362986564636, "logits/rejected": -0.6212120056152344, "logps/chosen": -236.13485717773438, "logps/rejected": -157.6024932861328, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -0.9303359985351562, "rewards/margins": 4.527005195617676, "rewards/rejected": -5.457341194152832, "step": 1263 }, { "epoch": 0.28, "learning_rate": 9.687192501310186e-06, "logits/chosen": -0.6274423003196716, "logits/rejected": -0.6167041063308716, "logps/chosen": -78.33399963378906, "logps/rejected": -143.22198486328125, "loss": 0.1428, "rewards/accuracies": 1.0, "rewards/chosen": -1.181775689125061, "rewards/margins": 1.2688645124435425, "rewards/rejected": -2.4506402015686035, "step": 1264 }, { "epoch": 0.28, "learning_rate": 9.68656819952061e-06, "logits/chosen": -0.5916273593902588, "logits/rejected": -0.5705879926681519, "logps/chosen": -80.9218978881836, "logps/rejected": -305.02996826171875, "loss": 0.3446, "rewards/accuracies": 1.0, "rewards/chosen": -1.5977340936660767, "rewards/margins": 5.646263122558594, "rewards/rejected": -7.243997097015381, "step": 1265 }, { "epoch": 0.28, "learning_rate": 9.685943295518283e-06, "logits/chosen": -0.5636699795722961, "logits/rejected": -0.6178438663482666, "logps/chosen": -171.81936645507812, "logps/rejected": -475.6151428222656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.6442886590957642, "rewards/margins": 33.44316482543945, "rewards/rejected": -34.08745193481445, "step": 1266 }, { "epoch": 0.28, "learning_rate": 9.685317789383509e-06, "logits/chosen": -0.7717736959457397, "logits/rejected": -0.816520094871521, "logps/chosen": -142.01995849609375, "logps/rejected": -81.52519989013672, "loss": 0.6351, "rewards/accuracies": 1.0, "rewards/chosen": -0.974414050579071, "rewards/margins": 0.24751824140548706, "rewards/rejected": -1.221932291984558, "step": 1267 }, { "epoch": 0.28, "learning_rate": 9.684691681196664e-06, "logits/chosen": -0.874727725982666, "logits/rejected": -0.7671366930007935, "logps/chosen": -181.78736877441406, "logps/rejected": -311.93731689453125, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": 0.31245729327201843, "rewards/margins": 3.2588746547698975, "rewards/rejected": -2.9464173316955566, "step": 1268 }, { "epoch": 0.28, "learning_rate": 9.684064971038196e-06, "logits/chosen": -0.7675402164459229, "logits/rejected": -0.7523961663246155, "logps/chosen": -97.40240478515625, "logps/rejected": -97.8079605102539, "loss": 0.071, "rewards/accuracies": 1.0, "rewards/chosen": 0.3765754699707031, "rewards/margins": 1.8979660272598267, "rewards/rejected": -1.5213905572891235, "step": 1269 }, { "epoch": 0.28, "learning_rate": 9.683437658988642e-06, "logits/chosen": -0.7635930776596069, "logits/rejected": -0.7430064082145691, "logps/chosen": -124.58316802978516, "logps/rejected": -143.8941192626953, "loss": 0.639, "rewards/accuracies": 0.0, "rewards/chosen": -2.73872447013855, "rewards/margins": -0.8286963701248169, "rewards/rejected": -1.910028100013733, "step": 1270 }, { "epoch": 0.28, "learning_rate": 9.682809745128607e-06, "logits/chosen": -0.900933563709259, "logits/rejected": -0.8910478949546814, "logps/chosen": -233.3169708251953, "logps/rejected": -205.5516815185547, "loss": 0.6252, "rewards/accuracies": 0.0, "rewards/chosen": -1.3979675769805908, "rewards/margins": -0.8336700797080994, "rewards/rejected": -0.5642974972724915, "step": 1271 }, { "epoch": 0.28, "learning_rate": 9.682181229538776e-06, "logits/chosen": -1.0285842418670654, "logits/rejected": -1.014241337776184, "logps/chosen": -101.75092315673828, "logps/rejected": -132.19166564941406, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": 0.4879753291606903, "rewards/margins": 4.307015419006348, "rewards/rejected": -3.819040060043335, "step": 1272 }, { "epoch": 0.28, "learning_rate": 9.681552112299914e-06, "logits/chosen": -1.0924524068832397, "logits/rejected": -1.1857883930206299, "logps/chosen": -216.90521240234375, "logps/rejected": -126.20620727539062, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": 2.127066135406494, "rewards/margins": 3.0815773010253906, "rewards/rejected": -0.954511284828186, "step": 1273 }, { "epoch": 0.28, "learning_rate": 9.680922393492858e-06, "logits/chosen": -0.9090495109558105, "logits/rejected": -0.9092016816139221, "logps/chosen": -264.36865234375, "logps/rejected": -117.38664245605469, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": -0.26859742403030396, "rewards/margins": 3.716047763824463, "rewards/rejected": -3.984645128250122, "step": 1274 }, { "epoch": 0.28, "learning_rate": 9.68029207319853e-06, "logits/chosen": -1.099118709564209, "logits/rejected": -1.0814985036849976, "logps/chosen": -163.33029174804688, "logps/rejected": -200.87254333496094, "loss": 0.7556, "rewards/accuracies": 1.0, "rewards/chosen": -1.759118676185608, "rewards/margins": 5.2335710525512695, "rewards/rejected": -6.992689609527588, "step": 1275 }, { "epoch": 0.28, "learning_rate": 9.679661151497919e-06, "logits/chosen": -0.730228066444397, "logits/rejected": -0.6942776441574097, "logps/chosen": -117.34761047363281, "logps/rejected": -188.7874298095703, "loss": 0.9125, "rewards/accuracies": 0.0, "rewards/chosen": 0.4545745849609375, "rewards/margins": -0.6950470209121704, "rewards/rejected": 1.149621605873108, "step": 1276 }, { "epoch": 0.28, "learning_rate": 9.6790296284721e-06, "logits/chosen": -0.8462119698524475, "logits/rejected": -0.7660096883773804, "logps/chosen": -103.11151123046875, "logps/rejected": -177.7013702392578, "loss": 0.2959, "rewards/accuracies": 1.0, "rewards/chosen": 0.03584137186408043, "rewards/margins": 5.231122970581055, "rewards/rejected": -5.195281505584717, "step": 1277 }, { "epoch": 0.28, "learning_rate": 9.678397504202222e-06, "logits/chosen": -0.9740206599235535, "logits/rejected": -0.9993152618408203, "logps/chosen": -199.8505401611328, "logps/rejected": -239.65908813476562, "loss": 0.9829, "rewards/accuracies": 0.0, "rewards/chosen": -0.3966354429721832, "rewards/margins": -1.8144241571426392, "rewards/rejected": 1.4177887439727783, "step": 1278 }, { "epoch": 0.28, "learning_rate": 9.677764778769512e-06, "logits/chosen": -0.7899139523506165, "logits/rejected": -0.8418207168579102, "logps/chosen": -103.70657348632812, "logps/rejected": -98.44880676269531, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": -0.6077728271484375, "rewards/margins": 2.6422340869903564, "rewards/rejected": -3.250006914138794, "step": 1279 }, { "epoch": 0.28, "learning_rate": 9.677131452255272e-06, "logits/chosen": -0.9554622769355774, "logits/rejected": -0.904513418674469, "logps/chosen": -58.30805587768555, "logps/rejected": -154.73928833007812, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": 0.6947010159492493, "rewards/margins": 5.043344020843506, "rewards/rejected": -4.348642826080322, "step": 1280 }, { "epoch": 0.28, "learning_rate": 9.676497524740885e-06, "logits/chosen": -0.8342201709747314, "logits/rejected": -0.8916568756103516, "logps/chosen": -198.69482421875, "logps/rejected": -110.86927032470703, "loss": 0.1232, "rewards/accuracies": 1.0, "rewards/chosen": 0.3394455015659332, "rewards/margins": 1.275429606437683, "rewards/rejected": -0.9359840750694275, "step": 1281 }, { "epoch": 0.28, "learning_rate": 9.675862996307808e-06, "logits/chosen": -1.190928339958191, "logits/rejected": -1.3083701133728027, "logps/chosen": -120.8321304321289, "logps/rejected": -124.60401916503906, "loss": 0.0474, "rewards/accuracies": 1.0, "rewards/chosen": -0.6352424621582031, "rewards/margins": 3.081310272216797, "rewards/rejected": -3.716552734375, "step": 1282 }, { "epoch": 0.28, "learning_rate": 9.675227867037576e-06, "logits/chosen": -0.8416074514389038, "logits/rejected": -0.844072163105011, "logps/chosen": -80.79334259033203, "logps/rejected": -88.21551513671875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 0.58230060338974, "rewards/margins": 4.444505214691162, "rewards/rejected": -3.8622047901153564, "step": 1283 }, { "epoch": 0.28, "learning_rate": 9.674592137011801e-06, "logits/chosen": -1.1001489162445068, "logits/rejected": -1.060451626777649, "logps/chosen": -259.0494689941406, "logps/rejected": -125.66551208496094, "loss": 1.9167, "rewards/accuracies": 1.0, "rewards/chosen": 1.4858306646347046, "rewards/margins": 6.953914165496826, "rewards/rejected": -5.468083381652832, "step": 1284 }, { "epoch": 0.28, "learning_rate": 9.673955806312175e-06, "logits/chosen": -0.8053789138793945, "logits/rejected": -0.7745118737220764, "logps/chosen": -221.6866455078125, "logps/rejected": -203.0426025390625, "loss": 0.0503, "rewards/accuracies": 1.0, "rewards/chosen": 0.6503280997276306, "rewards/margins": 2.306034803390503, "rewards/rejected": -1.655706763267517, "step": 1285 }, { "epoch": 0.28, "learning_rate": 9.673318875020463e-06, "logits/chosen": -0.6658027768135071, "logits/rejected": -0.5086146593093872, "logps/chosen": -138.70457458496094, "logps/rejected": -343.0509033203125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.146981954574585, "rewards/margins": 9.14635181427002, "rewards/rejected": -11.293334007263184, "step": 1286 }, { "epoch": 0.28, "learning_rate": 9.67268134321851e-06, "logits/chosen": -1.2045536041259766, "logits/rejected": -1.2259149551391602, "logps/chosen": -261.9312438964844, "logps/rejected": -82.54302978515625, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": 0.47623902559280396, "rewards/margins": 3.3752951622009277, "rewards/rejected": -2.8990561962127686, "step": 1287 }, { "epoch": 0.29, "learning_rate": 9.672043210988237e-06, "logits/chosen": -0.770236074924469, "logits/rejected": -0.6659870147705078, "logps/chosen": -262.95550537109375, "logps/rejected": -686.3755493164062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.4137451648712158, "rewards/margins": 38.027259826660156, "rewards/rejected": -36.6135139465332, "step": 1288 }, { "epoch": 0.29, "learning_rate": 9.671404478411645e-06, "logits/chosen": -1.2775763273239136, "logits/rejected": -1.2539763450622559, "logps/chosen": -112.16346740722656, "logps/rejected": -157.58523559570312, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": -2.7535133361816406, "rewards/margins": 3.4092049598693848, "rewards/rejected": -6.162718296051025, "step": 1289 }, { "epoch": 0.29, "learning_rate": 9.670765145570804e-06, "logits/chosen": -0.865018367767334, "logits/rejected": -0.8494002819061279, "logps/chosen": -268.3348388671875, "logps/rejected": -339.0550537109375, "loss": 0.1283, "rewards/accuracies": 1.0, "rewards/chosen": -1.925787329673767, "rewards/margins": 2.6071290969848633, "rewards/rejected": -4.53291654586792, "step": 1290 }, { "epoch": 0.29, "learning_rate": 9.670125212547872e-06, "logits/chosen": -1.0247315168380737, "logits/rejected": -1.0527504682540894, "logps/chosen": -85.7706527709961, "logps/rejected": -89.18008422851562, "loss": 0.1032, "rewards/accuracies": 1.0, "rewards/chosen": 0.2861931025981903, "rewards/margins": 1.480873942375183, "rewards/rejected": -1.1946808099746704, "step": 1291 }, { "epoch": 0.29, "learning_rate": 9.669484679425077e-06, "logits/chosen": -0.822146475315094, "logits/rejected": -0.8150419592857361, "logps/chosen": -96.70067596435547, "logps/rejected": -126.7816162109375, "loss": 0.0652, "rewards/accuracies": 1.0, "rewards/chosen": -0.6414665579795837, "rewards/margins": 1.9742774963378906, "rewards/rejected": -2.615744113922119, "step": 1292 }, { "epoch": 0.29, "learning_rate": 9.668843546284725e-06, "logits/chosen": -1.1951143741607666, "logits/rejected": -0.6928644180297852, "logps/chosen": -142.21527099609375, "logps/rejected": -439.4761962890625, "loss": 0.0997, "rewards/accuracies": 1.0, "rewards/chosen": 0.7883331179618835, "rewards/margins": 33.68220520019531, "rewards/rejected": -32.89387130737305, "step": 1293 }, { "epoch": 0.29, "learning_rate": 9.668201813209202e-06, "logits/chosen": -1.1177922487258911, "logits/rejected": -1.095211148262024, "logps/chosen": -67.96698760986328, "logps/rejected": -92.25001525878906, "loss": 0.1036, "rewards/accuracies": 1.0, "rewards/chosen": 0.33737945556640625, "rewards/margins": 1.6852432489395142, "rewards/rejected": -1.347863793373108, "step": 1294 }, { "epoch": 0.29, "learning_rate": 9.667559480280968e-06, "logits/chosen": -0.9400255680084229, "logits/rejected": -0.9706478118896484, "logps/chosen": -253.2047576904297, "logps/rejected": -44.6888313293457, "loss": 0.8281, "rewards/accuracies": 0.0, "rewards/chosen": -3.6862504482269287, "rewards/margins": -1.4443495273590088, "rewards/rejected": -2.24190092086792, "step": 1295 }, { "epoch": 0.29, "learning_rate": 9.66691654758256e-06, "logits/chosen": -1.0455321073532104, "logits/rejected": -0.9723143577575684, "logps/chosen": -160.35208129882812, "logps/rejected": -270.40069580078125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 1.9887603521347046, "rewards/margins": 6.81401252746582, "rewards/rejected": -4.825252056121826, "step": 1296 }, { "epoch": 0.29, "learning_rate": 9.666273015196595e-06, "logits/chosen": -0.9842867255210876, "logits/rejected": -0.8733397126197815, "logps/chosen": -83.21583557128906, "logps/rejected": -278.88580322265625, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.5737167596817017, "rewards/margins": 4.2037248611450195, "rewards/rejected": -4.777441501617432, "step": 1297 }, { "epoch": 0.29, "learning_rate": 9.665628883205765e-06, "logits/chosen": -0.7673825621604919, "logits/rejected": -0.7355025410652161, "logps/chosen": -87.01692962646484, "logps/rejected": -136.96580505371094, "loss": 0.0645, "rewards/accuracies": 1.0, "rewards/chosen": 0.1737213134765625, "rewards/margins": 1.9820747375488281, "rewards/rejected": -1.8083534240722656, "step": 1298 }, { "epoch": 0.29, "learning_rate": 9.66498415169284e-06, "logits/chosen": -0.7586023211479187, "logits/rejected": -0.7586023211479187, "logps/chosen": -234.4129638671875, "logps/rejected": -234.4129638671875, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.134761333465576, "rewards/margins": 0.0, "rewards/rejected": -5.134761333465576, "step": 1299 }, { "epoch": 0.29, "learning_rate": 9.664338820740664e-06, "logits/chosen": -0.9692687392234802, "logits/rejected": -0.9596686959266663, "logps/chosen": -94.02134704589844, "logps/rejected": -95.85840606689453, "loss": 0.5247, "rewards/accuracies": 0.0, "rewards/chosen": -2.9893791675567627, "rewards/margins": -0.6185364723205566, "rewards/rejected": -2.370842695236206, "step": 1300 }, { "epoch": 0.29, "learning_rate": 9.663692890432164e-06, "logits/chosen": -0.7234551310539246, "logits/rejected": -0.6740811467170715, "logps/chosen": -96.00715637207031, "logps/rejected": -140.46466064453125, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.8885238766670227, "rewards/margins": 4.282661437988281, "rewards/rejected": -3.3941376209259033, "step": 1301 }, { "epoch": 0.29, "learning_rate": 9.663046360850338e-06, "logits/chosen": -0.9746822118759155, "logits/rejected": -0.9289334416389465, "logps/chosen": -85.32197570800781, "logps/rejected": -140.84210205078125, "loss": 0.2244, "rewards/accuracies": 1.0, "rewards/chosen": -1.5785804986953735, "rewards/margins": 0.7002884149551392, "rewards/rejected": -2.2788689136505127, "step": 1302 }, { "epoch": 0.29, "learning_rate": 9.662399232078264e-06, "logits/chosen": -0.749663770198822, "logits/rejected": -0.7313194274902344, "logps/chosen": -115.81155395507812, "logps/rejected": -38.424625396728516, "loss": 0.4924, "rewards/accuracies": 0.0, "rewards/chosen": -2.153088331222534, "rewards/margins": -0.5123776197433472, "rewards/rejected": -1.640710711479187, "step": 1303 }, { "epoch": 0.29, "learning_rate": 9.661751504199097e-06, "logits/chosen": -0.679575502872467, "logits/rejected": -0.6581838130950928, "logps/chosen": -117.2354507446289, "logps/rejected": -108.31932830810547, "loss": 0.3409, "rewards/accuracies": 1.0, "rewards/chosen": -0.33493882417678833, "rewards/margins": 0.03402632474899292, "rewards/rejected": -0.36896514892578125, "step": 1304 }, { "epoch": 0.29, "learning_rate": 9.661103177296069e-06, "logits/chosen": -0.9223424196243286, "logits/rejected": -0.9399068355560303, "logps/chosen": -283.52593994140625, "logps/rejected": -420.62738037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.752978563308716, "rewards/margins": 22.380264282226562, "rewards/rejected": -19.62728500366211, "step": 1305 }, { "epoch": 0.29, "learning_rate": 9.660454251452487e-06, "logits/chosen": -0.9715349078178406, "logits/rejected": -0.9724169969558716, "logps/chosen": -73.52072143554688, "logps/rejected": -123.52340698242188, "loss": 0.4783, "rewards/accuracies": 0.0, "rewards/chosen": -0.43423768877983093, "rewards/margins": -0.4601700007915497, "rewards/rejected": 0.02593231201171875, "step": 1306 }, { "epoch": 0.29, "learning_rate": 9.659804726751737e-06, "logits/chosen": -0.6321349740028381, "logits/rejected": -0.5795571208000183, "logps/chosen": -148.09664916992188, "logps/rejected": -152.3561248779297, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -0.18170471489429474, "rewards/margins": 2.9686484336853027, "rewards/rejected": -3.150353193283081, "step": 1307 }, { "epoch": 0.29, "learning_rate": 9.659154603277283e-06, "logits/chosen": -0.9475923776626587, "logits/rejected": -0.8973826169967651, "logps/chosen": -102.27275848388672, "logps/rejected": -138.94349670410156, "loss": 0.3075, "rewards/accuracies": 1.0, "rewards/chosen": 0.3863731324672699, "rewards/margins": 1.6308517456054688, "rewards/rejected": -1.2444785833358765, "step": 1308 }, { "epoch": 0.29, "learning_rate": 9.658503881112661e-06, "logits/chosen": -1.0015524625778198, "logits/rejected": -1.0261257886886597, "logps/chosen": -100.407470703125, "logps/rejected": -95.07499694824219, "loss": 0.0903, "rewards/accuracies": 1.0, "rewards/chosen": -0.05696868896484375, "rewards/margins": 1.6380951404571533, "rewards/rejected": -1.695063829421997, "step": 1309 }, { "epoch": 0.29, "learning_rate": 9.65785256034149e-06, "logits/chosen": -1.306328296661377, "logits/rejected": -1.269219994544983, "logps/chosen": -131.03457641601562, "logps/rejected": -193.98843383789062, "loss": 0.4808, "rewards/accuracies": 1.0, "rewards/chosen": -1.6041275262832642, "rewards/margins": 2.6094865798950195, "rewards/rejected": -4.213613986968994, "step": 1310 }, { "epoch": 0.29, "learning_rate": 9.657200641047462e-06, "logits/chosen": -0.9000043272972107, "logits/rejected": -0.8505759835243225, "logps/chosen": -76.28645324707031, "logps/rejected": -155.5133819580078, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.2316436767578125, "rewards/margins": 6.691188812255859, "rewards/rejected": -6.459545135498047, "step": 1311 }, { "epoch": 0.29, "learning_rate": 9.656548123314346e-06, "logits/chosen": -0.899788498878479, "logits/rejected": -0.797307550907135, "logps/chosen": -137.60089111328125, "logps/rejected": -232.91842651367188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 1.5485748052597046, "rewards/margins": 5.96243143081665, "rewards/rejected": -4.413856506347656, "step": 1312 }, { "epoch": 0.29, "learning_rate": 9.655895007225992e-06, "logits/chosen": -1.1164777278900146, "logits/rejected": -1.1030986309051514, "logps/chosen": -100.92127990722656, "logps/rejected": -124.77980041503906, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -0.060102082788944244, "rewards/margins": 3.294329881668091, "rewards/rejected": -3.3544318675994873, "step": 1313 }, { "epoch": 0.29, "learning_rate": 9.655241292866321e-06, "logits/chosen": -0.8927431106567383, "logits/rejected": -0.8226039409637451, "logps/chosen": -175.81390380859375, "logps/rejected": -171.87982177734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 3.475140333175659, "rewards/margins": 8.569931030273438, "rewards/rejected": -5.094790935516357, "step": 1314 }, { "epoch": 0.29, "learning_rate": 9.654586980319335e-06, "logits/chosen": -0.8418799042701721, "logits/rejected": -0.8418799042701721, "logps/chosen": -160.36761474609375, "logps/rejected": -160.36761474609375, "loss": 0.387, "rewards/accuracies": 0.0, "rewards/chosen": -6.355973720550537, "rewards/margins": 0.0, "rewards/rejected": -6.355973720550537, "step": 1315 }, { "epoch": 0.29, "learning_rate": 9.653932069669112e-06, "logits/chosen": -0.7635791897773743, "logits/rejected": -0.7635791897773743, "logps/chosen": -114.21249389648438, "logps/rejected": -114.21249389648438, "loss": 0.8547, "rewards/accuracies": 0.0, "rewards/chosen": -4.612069606781006, "rewards/margins": 0.0, "rewards/rejected": -4.612069606781006, "step": 1316 }, { "epoch": 0.29, "learning_rate": 9.653276560999805e-06, "logits/chosen": -0.9219754934310913, "logits/rejected": -0.919069766998291, "logps/chosen": -239.3302001953125, "logps/rejected": -340.35064697265625, "loss": 3.014, "rewards/accuracies": 1.0, "rewards/chosen": 0.9397262930870056, "rewards/margins": 6.918394565582275, "rewards/rejected": -5.978668212890625, "step": 1317 }, { "epoch": 0.29, "learning_rate": 9.652620454395647e-06, "logits/chosen": -0.9414596557617188, "logits/rejected": -0.9613908529281616, "logps/chosen": -115.39766693115234, "logps/rejected": -124.72850036621094, "loss": 0.271, "rewards/accuracies": 1.0, "rewards/chosen": 1.2632713317871094, "rewards/margins": 4.969875335693359, "rewards/rejected": -3.70660400390625, "step": 1318 }, { "epoch": 0.29, "learning_rate": 9.651963749940944e-06, "logits/chosen": -1.0102754831314087, "logits/rejected": -0.6758452653884888, "logps/chosen": -220.9308319091797, "logps/rejected": -553.9424438476562, "loss": 0.0317, "rewards/accuracies": 1.0, "rewards/chosen": -1.0662063360214233, "rewards/margins": 44.26744842529297, "rewards/rejected": -45.333656311035156, "step": 1319 }, { "epoch": 0.29, "learning_rate": 9.651306447720083e-06, "logits/chosen": -0.6033243536949158, "logits/rejected": -0.5563859939575195, "logps/chosen": -94.19879150390625, "logps/rejected": -232.05075073242188, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.3580818176269531, "rewards/margins": 7.516639232635498, "rewards/rejected": -7.158557415008545, "step": 1320 }, { "epoch": 0.29, "learning_rate": 9.650648547817524e-06, "logits/chosen": -0.8962364792823792, "logits/rejected": -0.8676077127456665, "logps/chosen": -86.72909545898438, "logps/rejected": -93.40963745117188, "loss": 0.138, "rewards/accuracies": 1.0, "rewards/chosen": -0.5535950064659119, "rewards/margins": 1.2357017993927002, "rewards/rejected": -1.7892967462539673, "step": 1321 }, { "epoch": 0.29, "learning_rate": 9.649990050317806e-06, "logits/chosen": -1.0756884813308716, "logits/rejected": -1.1297051906585693, "logps/chosen": -224.29086303710938, "logps/rejected": -152.0416259765625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.6120163202285767, "rewards/margins": 4.4648613929748535, "rewards/rejected": -5.076877593994141, "step": 1322 }, { "epoch": 0.29, "learning_rate": 9.649330955305547e-06, "logits/chosen": -0.7708548903465271, "logits/rejected": -0.7708548903465271, "logps/chosen": -358.1086120605469, "logps/rejected": -358.1086120605469, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.477029323577881, "rewards/margins": 0.0, "rewards/rejected": -4.477029323577881, "step": 1323 }, { "epoch": 0.29, "learning_rate": 9.648671262865434e-06, "logits/chosen": -0.9945632219314575, "logits/rejected": -0.9666488766670227, "logps/chosen": -84.98831176757812, "logps/rejected": -141.8841094970703, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": 1.3583115339279175, "rewards/margins": 4.531070232391357, "rewards/rejected": -3.1727585792541504, "step": 1324 }, { "epoch": 0.29, "learning_rate": 9.648010973082243e-06, "logits/chosen": -0.9189040660858154, "logits/rejected": -0.9417515993118286, "logps/chosen": -206.3853302001953, "logps/rejected": -157.1527099609375, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": 0.5227737426757812, "rewards/margins": 3.3684310913085938, "rewards/rejected": -2.8456573486328125, "step": 1325 }, { "epoch": 0.29, "learning_rate": 9.647350086040812e-06, "logits/chosen": -0.8730274438858032, "logits/rejected": -0.8606052398681641, "logps/chosen": -68.84210205078125, "logps/rejected": -78.01126098632812, "loss": 0.2963, "rewards/accuracies": 1.0, "rewards/chosen": 0.30597230792045593, "rewards/margins": 0.7555908560752869, "rewards/rejected": -0.44961854815483093, "step": 1326 }, { "epoch": 0.29, "learning_rate": 9.646688601826068e-06, "logits/chosen": -0.6699433326721191, "logits/rejected": -0.5915197730064392, "logps/chosen": -163.04856872558594, "logps/rejected": -118.69221496582031, "loss": 0.1392, "rewards/accuracies": 1.0, "rewards/chosen": 0.6895523071289062, "rewards/margins": 4.314178466796875, "rewards/rejected": -3.6246261596679688, "step": 1327 }, { "epoch": 0.29, "learning_rate": 9.646026520523008e-06, "logits/chosen": -0.9395834803581238, "logits/rejected": -0.951849102973938, "logps/chosen": -225.89297485351562, "logps/rejected": -142.65245056152344, "loss": 0.0979, "rewards/accuracies": 1.0, "rewards/chosen": 0.1394500732421875, "rewards/margins": 1.5307663679122925, "rewards/rejected": -1.391316294670105, "step": 1328 }, { "epoch": 0.29, "learning_rate": 9.64536384221671e-06, "logits/chosen": -0.47749418020248413, "logits/rejected": -0.4540599584579468, "logps/chosen": -139.61727905273438, "logps/rejected": -131.5063018798828, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.085302710533142, "rewards/margins": 4.432916164398193, "rewards/rejected": -5.518218994140625, "step": 1329 }, { "epoch": 0.29, "learning_rate": 9.644700566992324e-06, "logits/chosen": -1.052309274673462, "logits/rejected": -1.021437644958496, "logps/chosen": -85.33297729492188, "logps/rejected": -68.18304443359375, "loss": 0.1227, "rewards/accuracies": 1.0, "rewards/chosen": -2.4837753772735596, "rewards/margins": 1.3909251689910889, "rewards/rejected": -3.8747005462646484, "step": 1330 }, { "epoch": 0.29, "learning_rate": 9.644036694935083e-06, "logits/chosen": -1.215591549873352, "logits/rejected": -1.1763293743133545, "logps/chosen": -68.63581085205078, "logps/rejected": -117.14190673828125, "loss": 0.1915, "rewards/accuracies": 1.0, "rewards/chosen": -0.5976341366767883, "rewards/margins": 1.1368918418884277, "rewards/rejected": -1.7345260381698608, "step": 1331 }, { "epoch": 0.29, "learning_rate": 9.64337222613029e-06, "logits/chosen": -1.1052963733673096, "logits/rejected": -1.1020056009292603, "logps/chosen": -137.46694946289062, "logps/rejected": -119.04205322265625, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": 0.979504406452179, "rewards/margins": 3.226414442062378, "rewards/rejected": -2.2469100952148438, "step": 1332 }, { "epoch": 0.3, "learning_rate": 9.642707160663326e-06, "logits/chosen": -0.7111749053001404, "logits/rejected": -0.7124417424201965, "logps/chosen": -124.35000610351562, "logps/rejected": -115.92960357666016, "loss": 0.2958, "rewards/accuracies": 1.0, "rewards/chosen": -0.49732133746147156, "rewards/margins": 1.089640736579895, "rewards/rejected": -1.586962103843689, "step": 1333 }, { "epoch": 0.3, "learning_rate": 9.642041498619655e-06, "logits/chosen": -0.6789337396621704, "logits/rejected": -0.6220108866691589, "logps/chosen": -102.94615173339844, "logps/rejected": -148.33035278320312, "loss": 0.0761, "rewards/accuracies": 1.0, "rewards/chosen": -1.0280380249023438, "rewards/margins": 3.1122794151306152, "rewards/rejected": -4.140317440032959, "step": 1334 }, { "epoch": 0.3, "learning_rate": 9.64137524008481e-06, "logits/chosen": -0.6486400365829468, "logits/rejected": -0.6193879842758179, "logps/chosen": -79.37152099609375, "logps/rejected": -104.80718231201172, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": 1.0808082818984985, "rewards/margins": 3.0999560356140137, "rewards/rejected": -2.0191476345062256, "step": 1335 }, { "epoch": 0.3, "learning_rate": 9.640708385144403e-06, "logits/chosen": -0.6559804677963257, "logits/rejected": -0.6443623304367065, "logps/chosen": -154.0615234375, "logps/rejected": -212.44580078125, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": -0.7639404535293579, "rewards/margins": 2.1504058837890625, "rewards/rejected": -2.91434645652771, "step": 1336 }, { "epoch": 0.3, "learning_rate": 9.640040933884126e-06, "logits/chosen": -0.5084769129753113, "logits/rejected": -0.4494827091693878, "logps/chosen": -96.30113220214844, "logps/rejected": -60.66786193847656, "loss": 0.1147, "rewards/accuracies": 1.0, "rewards/chosen": -1.4621918201446533, "rewards/margins": 1.433556318283081, "rewards/rejected": -2.8957481384277344, "step": 1337 }, { "epoch": 0.3, "learning_rate": 9.639372886389743e-06, "logits/chosen": -1.0711661577224731, "logits/rejected": -1.0151067972183228, "logps/chosen": -174.78611755371094, "logps/rejected": -213.171142578125, "loss": 0.1638, "rewards/accuracies": 1.0, "rewards/chosen": 2.003347873687744, "rewards/margins": 2.301837205886841, "rewards/rejected": -0.29848939180374146, "step": 1338 }, { "epoch": 0.3, "learning_rate": 9.638704242747097e-06, "logits/chosen": -0.7976199388504028, "logits/rejected": -0.8653879761695862, "logps/chosen": -80.53998565673828, "logps/rejected": -95.41455078125, "loss": 0.043, "rewards/accuracies": 1.0, "rewards/chosen": -0.041950225830078125, "rewards/margins": 2.5147805213928223, "rewards/rejected": -2.5567307472229004, "step": 1339 }, { "epoch": 0.3, "learning_rate": 9.638035003042108e-06, "logits/chosen": -0.9801295399665833, "logits/rejected": -0.9255753755569458, "logps/chosen": -261.0672607421875, "logps/rejected": -262.8541259765625, "loss": 0.1301, "rewards/accuracies": 1.0, "rewards/chosen": -1.0861374139785767, "rewards/margins": 1.2151185274124146, "rewards/rejected": -2.301255941390991, "step": 1340 }, { "epoch": 0.3, "learning_rate": 9.637365167360769e-06, "logits/chosen": -0.8124873042106628, "logits/rejected": -0.841870129108429, "logps/chosen": -48.39892578125, "logps/rejected": -28.857891082763672, "loss": 0.1203, "rewards/accuracies": 1.0, "rewards/chosen": 0.1298469603061676, "rewards/margins": 1.3289823532104492, "rewards/rejected": -1.199135422706604, "step": 1341 }, { "epoch": 0.3, "learning_rate": 9.636694735789153e-06, "logits/chosen": -0.8611322641372681, "logits/rejected": -0.8985604047775269, "logps/chosen": -154.56698608398438, "logps/rejected": -183.02679443359375, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": 0.09246521443128586, "rewards/margins": 5.612362861633301, "rewards/rejected": -5.5198974609375, "step": 1342 }, { "epoch": 0.3, "learning_rate": 9.636023708413412e-06, "logits/chosen": -0.8206358551979065, "logits/rejected": -0.7944449782371521, "logps/chosen": -171.3910369873047, "logps/rejected": -143.42201232910156, "loss": 0.1616, "rewards/accuracies": 1.0, "rewards/chosen": 0.16688385605812073, "rewards/margins": 5.94982385635376, "rewards/rejected": -5.782939910888672, "step": 1343 }, { "epoch": 0.3, "learning_rate": 9.635352085319768e-06, "logits/chosen": -1.04790198802948, "logits/rejected": -0.9952126145362854, "logps/chosen": -155.84661865234375, "logps/rejected": -170.45333862304688, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.16572876274585724, "rewards/margins": 7.738042831420898, "rewards/rejected": -7.90377140045166, "step": 1344 }, { "epoch": 0.3, "learning_rate": 9.634679866594525e-06, "logits/chosen": -1.0873125791549683, "logits/rejected": -1.0825350284576416, "logps/chosen": -64.73736572265625, "logps/rejected": -93.41401672363281, "loss": 0.1694, "rewards/accuracies": 1.0, "rewards/chosen": -0.47632789611816406, "rewards/margins": 0.9089527130126953, "rewards/rejected": -1.3852806091308594, "step": 1345 }, { "epoch": 0.3, "learning_rate": 9.63400705232406e-06, "logits/chosen": -0.8843316435813904, "logits/rejected": -0.7060477137565613, "logps/chosen": -194.1475067138672, "logps/rejected": -151.59986877441406, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.15022888779640198, "rewards/margins": 5.045811653137207, "rewards/rejected": -5.196040630340576, "step": 1346 }, { "epoch": 0.3, "learning_rate": 9.633333642594828e-06, "logits/chosen": -1.2623542547225952, "logits/rejected": -1.229154109954834, "logps/chosen": -83.21340942382812, "logps/rejected": -115.20951843261719, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/chosen": -0.9793807864189148, "rewards/margins": 2.7451202869415283, "rewards/rejected": -3.724501132965088, "step": 1347 }, { "epoch": 0.3, "learning_rate": 9.632659637493362e-06, "logits/chosen": -1.3664283752441406, "logits/rejected": -1.43026864528656, "logps/chosen": -124.68889617919922, "logps/rejected": -81.06631469726562, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": -0.42309877276420593, "rewards/margins": 3.4397988319396973, "rewards/rejected": -3.8628976345062256, "step": 1348 }, { "epoch": 0.3, "learning_rate": 9.631985037106268e-06, "logits/chosen": -1.0832123756408691, "logits/rejected": -1.0962204933166504, "logps/chosen": -113.19503784179688, "logps/rejected": -147.20628356933594, "loss": 0.1761, "rewards/accuracies": 1.0, "rewards/chosen": -1.637690782546997, "rewards/margins": 1.9129929542541504, "rewards/rejected": -3.5506837368011475, "step": 1349 }, { "epoch": 0.3, "learning_rate": 9.631309841520233e-06, "logits/chosen": -0.9786189198493958, "logits/rejected": -0.9215856194496155, "logps/chosen": -173.52545166015625, "logps/rejected": -304.9490661621094, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.4268081784248352, "rewards/margins": 11.118388175964355, "rewards/rejected": -11.545196533203125, "step": 1350 }, { "epoch": 0.3, "learning_rate": 9.630634050822016e-06, "logits/chosen": -0.6574403047561646, "logits/rejected": -0.5894061326980591, "logps/chosen": -162.56834411621094, "logps/rejected": -209.26138305664062, "loss": 0.1327, "rewards/accuracies": 1.0, "rewards/chosen": 0.3809188902378082, "rewards/margins": 1.2831283807754517, "rewards/rejected": -0.902209460735321, "step": 1351 }, { "epoch": 0.3, "learning_rate": 9.629957665098458e-06, "logits/chosen": -0.7459353804588318, "logits/rejected": -0.6996416449546814, "logps/chosen": -61.75774002075195, "logps/rejected": -53.2838249206543, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": -0.7127429842948914, "rewards/margins": 2.106649398803711, "rewards/rejected": -2.819392442703247, "step": 1352 }, { "epoch": 0.3, "learning_rate": 9.629280684436467e-06, "logits/chosen": -1.0369144678115845, "logits/rejected": -1.037829041481018, "logps/chosen": -216.28271484375, "logps/rejected": -203.61566162109375, "loss": 0.2122, "rewards/accuracies": 1.0, "rewards/chosen": 2.0113770961761475, "rewards/margins": 0.6394532918930054, "rewards/rejected": 1.371923804283142, "step": 1353 }, { "epoch": 0.3, "learning_rate": 9.628603108923037e-06, "logits/chosen": -0.8117716908454895, "logits/rejected": -0.8107477426528931, "logps/chosen": -69.28395080566406, "logps/rejected": -77.3825454711914, "loss": 0.4721, "rewards/accuracies": 0.0, "rewards/chosen": -0.11600189656019211, "rewards/margins": -0.4433037042617798, "rewards/rejected": 0.3273018002510071, "step": 1354 }, { "epoch": 0.3, "learning_rate": 9.627924938645234e-06, "logits/chosen": -0.7791777849197388, "logits/rejected": -0.7669455409049988, "logps/chosen": -116.1119613647461, "logps/rejected": -144.83108520507812, "loss": 0.5686, "rewards/accuracies": 0.0, "rewards/chosen": -0.9754287600517273, "rewards/margins": -0.7298171520233154, "rewards/rejected": -0.24561157822608948, "step": 1355 }, { "epoch": 0.3, "learning_rate": 9.627246173690202e-06, "logits/chosen": -0.918835461139679, "logits/rejected": -0.9037768244743347, "logps/chosen": -93.46342468261719, "logps/rejected": -123.7332992553711, "loss": 0.3176, "rewards/accuracies": 1.0, "rewards/chosen": 0.5949714779853821, "rewards/margins": 0.12988588213920593, "rewards/rejected": 0.46508559584617615, "step": 1356 }, { "epoch": 0.3, "learning_rate": 9.62656681414516e-06, "logits/chosen": -0.8866793513298035, "logits/rejected": -0.8520803451538086, "logps/chosen": -93.05644226074219, "logps/rejected": -124.51737976074219, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 1.1504173278808594, "rewards/margins": 5.0991082191467285, "rewards/rejected": -3.948690891265869, "step": 1357 }, { "epoch": 0.3, "learning_rate": 9.625886860097406e-06, "logits/chosen": -0.776889979839325, "logits/rejected": -0.7394204139709473, "logps/chosen": -94.19502258300781, "logps/rejected": -61.784507751464844, "loss": 1.1701, "rewards/accuracies": 1.0, "rewards/chosen": -0.01669769361615181, "rewards/margins": 2.820716142654419, "rewards/rejected": -2.837413787841797, "step": 1358 }, { "epoch": 0.3, "learning_rate": 9.62520631163431e-06, "logits/chosen": -1.076361894607544, "logits/rejected": -1.9731862545013428, "logps/chosen": -113.13030242919922, "logps/rejected": -129.98764038085938, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": 0.05753784254193306, "rewards/margins": 2.1008553504943848, "rewards/rejected": -2.0433175563812256, "step": 1359 }, { "epoch": 0.3, "learning_rate": 9.62452516884332e-06, "logits/chosen": -0.765516459941864, "logits/rejected": -0.7381364107131958, "logps/chosen": -186.65115356445312, "logps/rejected": -109.18875122070312, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": 1.4295380115509033, "rewards/margins": 3.1048951148986816, "rewards/rejected": -1.6753571033477783, "step": 1360 }, { "epoch": 0.3, "learning_rate": 9.623843431811964e-06, "logits/chosen": -0.5741367340087891, "logits/rejected": -0.5741367340087891, "logps/chosen": -182.4356689453125, "logps/rejected": -182.4356689453125, "loss": 0.7422, "rewards/accuracies": 0.0, "rewards/chosen": -2.623356580734253, "rewards/margins": 0.0, "rewards/rejected": -2.623356580734253, "step": 1361 }, { "epoch": 0.3, "learning_rate": 9.623161100627842e-06, "logits/chosen": -0.8825780153274536, "logits/rejected": -0.8303552269935608, "logps/chosen": -181.98004150390625, "logps/rejected": -303.31646728515625, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": 1.130268931388855, "rewards/margins": 3.286311626434326, "rewards/rejected": -2.1560425758361816, "step": 1362 }, { "epoch": 0.3, "learning_rate": 9.622478175378634e-06, "logits/chosen": -0.9121668338775635, "logits/rejected": -0.9121668338775635, "logps/chosen": -160.1877899169922, "logps/rejected": -160.1877899169922, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -5.488865852355957, "rewards/margins": 0.0, "rewards/rejected": -5.488865852355957, "step": 1363 }, { "epoch": 0.3, "learning_rate": 9.62179465615209e-06, "logits/chosen": -0.9790414571762085, "logits/rejected": -0.9790414571762085, "logps/chosen": -149.7374267578125, "logps/rejected": -149.7374267578125, "loss": 0.3697, "rewards/accuracies": 0.0, "rewards/chosen": -3.917048692703247, "rewards/margins": 0.0, "rewards/rejected": -3.917048692703247, "step": 1364 }, { "epoch": 0.3, "learning_rate": 9.621110543036047e-06, "logits/chosen": -0.7823068499565125, "logits/rejected": -0.7197813987731934, "logps/chosen": -142.70361328125, "logps/rejected": -183.23484802246094, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": 0.7532684206962585, "rewards/margins": 7.659013271331787, "rewards/rejected": -6.905745029449463, "step": 1365 }, { "epoch": 0.3, "learning_rate": 9.620425836118406e-06, "logits/chosen": -0.8040516376495361, "logits/rejected": -0.8181800246238708, "logps/chosen": -95.68887329101562, "logps/rejected": -191.74362182617188, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 0.5332931876182556, "rewards/margins": 4.030813694000244, "rewards/rejected": -3.4975204467773438, "step": 1366 }, { "epoch": 0.3, "learning_rate": 9.619740535487151e-06, "logits/chosen": -0.7117908000946045, "logits/rejected": -0.6711849570274353, "logps/chosen": -85.67914581298828, "logps/rejected": -113.306396484375, "loss": 0.0771, "rewards/accuracies": 1.0, "rewards/chosen": 0.25565338134765625, "rewards/margins": 4.159360885620117, "rewards/rejected": -3.903707265853882, "step": 1367 }, { "epoch": 0.3, "learning_rate": 9.619054641230343e-06, "logits/chosen": -1.0856374502182007, "logits/rejected": -1.0739879608154297, "logps/chosen": -142.437255859375, "logps/rejected": -132.17404174804688, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": -0.3805603086948395, "rewards/margins": 3.3755645751953125, "rewards/rejected": -3.756124973297119, "step": 1368 }, { "epoch": 0.3, "learning_rate": 9.618368153436119e-06, "logits/chosen": -0.8835764527320862, "logits/rejected": -0.8581951260566711, "logps/chosen": -221.98733520507812, "logps/rejected": -170.69461059570312, "loss": 0.1606, "rewards/accuracies": 1.0, "rewards/chosen": 0.6034515500068665, "rewards/margins": 0.976654052734375, "rewards/rejected": -0.37320253252983093, "step": 1369 }, { "epoch": 0.3, "learning_rate": 9.617681072192688e-06, "logits/chosen": -1.1045342683792114, "logits/rejected": -1.0623403787612915, "logps/chosen": -115.74024963378906, "logps/rejected": -157.2443084716797, "loss": 0.1803, "rewards/accuracies": 1.0, "rewards/chosen": 1.837249755859375, "rewards/margins": 0.837811291217804, "rewards/rejected": 0.999438464641571, "step": 1370 }, { "epoch": 0.3, "learning_rate": 9.616993397588342e-06, "logits/chosen": -1.1821197271347046, "logits/rejected": -1.2125273942947388, "logps/chosen": -93.71214294433594, "logps/rejected": -68.3383560180664, "loss": 0.1911, "rewards/accuracies": 1.0, "rewards/chosen": -0.10876388847827911, "rewards/margins": 0.8875347375869751, "rewards/rejected": -0.996298611164093, "step": 1371 }, { "epoch": 0.3, "learning_rate": 9.61630512971144e-06, "logits/chosen": -1.0688297748565674, "logits/rejected": -0.9736710786819458, "logps/chosen": -151.18692016601562, "logps/rejected": -211.42605590820312, "loss": 0.3772, "rewards/accuracies": 1.0, "rewards/chosen": 1.9301437139511108, "rewards/margins": 8.34931755065918, "rewards/rejected": -6.4191741943359375, "step": 1372 }, { "epoch": 0.3, "learning_rate": 9.61561626865043e-06, "logits/chosen": -1.1940419673919678, "logits/rejected": -1.0823264122009277, "logps/chosen": -166.6497344970703, "logps/rejected": -195.18960571289062, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": 3.2677764892578125, "rewards/margins": 3.119511365890503, "rewards/rejected": 0.148265078663826, "step": 1373 }, { "epoch": 0.3, "learning_rate": 9.614926814493822e-06, "logits/chosen": -0.9736632704734802, "logits/rejected": -0.8982536792755127, "logps/chosen": -154.91258239746094, "logps/rejected": -62.95281219482422, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 2.030696153640747, "rewards/margins": 4.62337589263916, "rewards/rejected": -2.592679738998413, "step": 1374 }, { "epoch": 0.3, "learning_rate": 9.614236767330214e-06, "logits/chosen": -0.9976822733879089, "logits/rejected": -0.9825534224510193, "logps/chosen": -239.05484008789062, "logps/rejected": -199.76638793945312, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.9661834836006165, "rewards/margins": 6.76375150680542, "rewards/rejected": -5.797567844390869, "step": 1375 }, { "epoch": 0.3, "learning_rate": 9.613546127248272e-06, "logits/chosen": -0.7775229215621948, "logits/rejected": -0.7735077142715454, "logps/chosen": -69.42982482910156, "logps/rejected": -70.63140106201172, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": 0.7113571166992188, "rewards/margins": 2.493075370788574, "rewards/rejected": -1.781718134880066, "step": 1376 }, { "epoch": 0.3, "learning_rate": 9.612854894336746e-06, "logits/chosen": -1.2800577878952026, "logits/rejected": -1.2414467334747314, "logps/chosen": -80.63961791992188, "logps/rejected": -100.08077239990234, "loss": 0.497, "rewards/accuracies": 0.0, "rewards/chosen": 0.42671510577201843, "rewards/margins": -0.26582029461860657, "rewards/rejected": 0.692535400390625, "step": 1377 }, { "epoch": 0.31, "learning_rate": 9.612163068684453e-06, "logits/chosen": -0.96452397108078, "logits/rejected": -0.9038070440292358, "logps/chosen": -174.91900634765625, "logps/rejected": -274.1636657714844, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 2.162057638168335, "rewards/margins": 5.534704685211182, "rewards/rejected": -3.3726470470428467, "step": 1378 }, { "epoch": 0.31, "learning_rate": 9.611470650380293e-06, "logits/chosen": -0.7483649849891663, "logits/rejected": -0.7395755648612976, "logps/chosen": -186.7518310546875, "logps/rejected": -55.87097930908203, "loss": 0.7712, "rewards/accuracies": 0.0, "rewards/chosen": -3.7709929943084717, "rewards/margins": -1.3001031875610352, "rewards/rejected": -2.4708898067474365, "step": 1379 }, { "epoch": 0.31, "learning_rate": 9.61077763951324e-06, "logits/chosen": -0.868142306804657, "logits/rejected": -0.856110155582428, "logps/chosen": -78.5933609008789, "logps/rejected": -55.51403045654297, "loss": 0.4898, "rewards/accuracies": 1.0, "rewards/chosen": -2.0689685344696045, "rewards/margins": 1.1036927700042725, "rewards/rejected": -3.172661304473877, "step": 1380 }, { "epoch": 0.31, "learning_rate": 9.610084036172346e-06, "logits/chosen": -0.7868853211402893, "logits/rejected": -0.7465661764144897, "logps/chosen": -298.6216735839844, "logps/rejected": -135.1954345703125, "loss": 0.0392, "rewards/accuracies": 1.0, "rewards/chosen": 0.6900726556777954, "rewards/margins": 3.126260280609131, "rewards/rejected": -2.436187744140625, "step": 1381 }, { "epoch": 0.31, "learning_rate": 9.609389840446734e-06, "logits/chosen": -0.6160290241241455, "logits/rejected": -0.5760849714279175, "logps/chosen": -213.8111114501953, "logps/rejected": -192.04421997070312, "loss": 0.1882, "rewards/accuracies": 1.0, "rewards/chosen": 1.5505722761154175, "rewards/margins": 0.7828766703605652, "rewards/rejected": 0.7676956057548523, "step": 1382 }, { "epoch": 0.31, "learning_rate": 9.60869505242561e-06, "logits/chosen": -0.9958206415176392, "logits/rejected": -0.9786420464515686, "logps/chosen": -78.41888427734375, "logps/rejected": -109.4600830078125, "loss": 0.4954, "rewards/accuracies": 1.0, "rewards/chosen": -0.09565124660730362, "rewards/margins": 1.0589706897735596, "rewards/rejected": -1.154621958732605, "step": 1383 }, { "epoch": 0.31, "learning_rate": 9.60799967219825e-06, "logits/chosen": -0.9436690807342529, "logits/rejected": -0.8252880573272705, "logps/chosen": -155.98899841308594, "logps/rejected": -220.55618286132812, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 2.2361221313476562, "rewards/margins": 5.2870588302612305, "rewards/rejected": -3.0509369373321533, "step": 1384 }, { "epoch": 0.31, "learning_rate": 9.607303699854009e-06, "logits/chosen": -1.2909669876098633, "logits/rejected": -1.2839070558547974, "logps/chosen": -107.7350082397461, "logps/rejected": -89.45072937011719, "loss": 0.3081, "rewards/accuracies": 1.0, "rewards/chosen": -0.2760520875453949, "rewards/margins": 0.3855896294116974, "rewards/rejected": -0.6616417169570923, "step": 1385 }, { "epoch": 0.31, "learning_rate": 9.606607135482318e-06, "logits/chosen": -1.0037623643875122, "logits/rejected": -0.9930958151817322, "logps/chosen": -156.2731170654297, "logps/rejected": -180.495361328125, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": 0.4095169007778168, "rewards/margins": 3.7211227416992188, "rewards/rejected": -3.311605930328369, "step": 1386 }, { "epoch": 0.31, "learning_rate": 9.605909979172683e-06, "logits/chosen": -0.7916244268417358, "logits/rejected": -0.7916244268417358, "logps/chosen": -120.9972152709961, "logps/rejected": -120.9972152709961, "loss": 0.3472, "rewards/accuracies": 0.0, "rewards/chosen": -1.4989327192306519, "rewards/margins": 0.0, "rewards/rejected": -1.4989327192306519, "step": 1387 }, { "epoch": 0.31, "learning_rate": 9.60521223101469e-06, "logits/chosen": -0.7539266347885132, "logits/rejected": -0.3116573393344879, "logps/chosen": -211.42279052734375, "logps/rejected": -500.2718505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.03457336500287056, "rewards/margins": 33.893314361572266, "rewards/rejected": -33.927886962890625, "step": 1388 }, { "epoch": 0.31, "learning_rate": 9.604513891097995e-06, "logits/chosen": -0.5933928489685059, "logits/rejected": -0.5933928489685059, "logps/chosen": -384.0113220214844, "logps/rejected": -384.0113220214844, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -14.24510383605957, "rewards/margins": 0.0, "rewards/rejected": -14.24510383605957, "step": 1389 }, { "epoch": 0.31, "learning_rate": 9.603814959512334e-06, "logits/chosen": -0.9224846959114075, "logits/rejected": -0.9283217191696167, "logps/chosen": -76.99626159667969, "logps/rejected": -173.86001586914062, "loss": 0.4228, "rewards/accuracies": 0.0, "rewards/chosen": 0.8067825436592102, "rewards/margins": -0.2824447751045227, "rewards/rejected": 1.089227318763733, "step": 1390 }, { "epoch": 0.31, "learning_rate": 9.603115436347519e-06, "logits/chosen": -1.2504005432128906, "logits/rejected": -1.2303942441940308, "logps/chosen": -150.25645446777344, "logps/rejected": -196.61651611328125, "loss": 0.0925, "rewards/accuracies": 1.0, "rewards/chosen": 1.48265540599823, "rewards/margins": 1.623555064201355, "rewards/rejected": -0.140899658203125, "step": 1391 }, { "epoch": 0.31, "learning_rate": 9.602415321693434e-06, "logits/chosen": -0.9420344233512878, "logits/rejected": -0.7071007490158081, "logps/chosen": -236.51577758789062, "logps/rejected": -351.88568115234375, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 1.9745025634765625, "rewards/margins": 17.565460205078125, "rewards/rejected": -15.590957641601562, "step": 1392 }, { "epoch": 0.31, "learning_rate": 9.601714615640046e-06, "logits/chosen": -0.813788652420044, "logits/rejected": -0.8230071067810059, "logps/chosen": -201.11741638183594, "logps/rejected": -91.43757629394531, "loss": 0.0891, "rewards/accuracies": 1.0, "rewards/chosen": 0.5611678957939148, "rewards/margins": 1.6394028663635254, "rewards/rejected": -1.0782349109649658, "step": 1393 }, { "epoch": 0.31, "learning_rate": 9.601013318277391e-06, "logits/chosen": -1.233963131904602, "logits/rejected": -1.3088792562484741, "logps/chosen": -124.954833984375, "logps/rejected": -81.91868591308594, "loss": 0.7937, "rewards/accuracies": 1.0, "rewards/chosen": 1.0120972394943237, "rewards/margins": 5.881297588348389, "rewards/rejected": -4.869200229644775, "step": 1394 }, { "epoch": 0.31, "learning_rate": 9.600311429695586e-06, "logits/chosen": -1.0326967239379883, "logits/rejected": -1.0455389022827148, "logps/chosen": -102.50784301757812, "logps/rejected": -125.93498229980469, "loss": 0.7238, "rewards/accuracies": 0.0, "rewards/chosen": -0.6540069580078125, "rewards/margins": -1.1795716285705566, "rewards/rejected": 0.5255646109580994, "step": 1395 }, { "epoch": 0.31, "learning_rate": 9.59960894998482e-06, "logits/chosen": -1.1534423828125, "logits/rejected": -1.1282517910003662, "logps/chosen": -71.3589096069336, "logps/rejected": -118.18302154541016, "loss": 0.1379, "rewards/accuracies": 1.0, "rewards/chosen": -0.5625358819961548, "rewards/margins": 1.4329712390899658, "rewards/rejected": -1.9955071210861206, "step": 1396 }, { "epoch": 0.31, "learning_rate": 9.598905879235362e-06, "logits/chosen": -1.0114058256149292, "logits/rejected": -1.0244402885437012, "logps/chosen": -95.22393798828125, "logps/rejected": -120.5733413696289, "loss": 0.2954, "rewards/accuracies": 1.0, "rewards/chosen": 0.3330116271972656, "rewards/margins": 1.6997253894805908, "rewards/rejected": -1.3667137622833252, "step": 1397 }, { "epoch": 0.31, "learning_rate": 9.598202217537554e-06, "logits/chosen": -0.7148739695549011, "logits/rejected": -0.6730125546455383, "logps/chosen": -128.89898681640625, "logps/rejected": -65.47136688232422, "loss": 0.0957, "rewards/accuracies": 1.0, "rewards/chosen": -0.25328370928764343, "rewards/margins": 2.7188937664031982, "rewards/rejected": -2.972177505493164, "step": 1398 }, { "epoch": 0.31, "learning_rate": 9.597497964981815e-06, "logits/chosen": -0.7664007544517517, "logits/rejected": -0.7494125962257385, "logps/chosen": -91.17420959472656, "logps/rejected": -140.96807861328125, "loss": 0.0323, "rewards/accuracies": 1.0, "rewards/chosen": -0.02265319786965847, "rewards/margins": 3.2611496448516846, "rewards/rejected": -3.2838027477264404, "step": 1399 }, { "epoch": 0.31, "learning_rate": 9.59679312165864e-06, "logits/chosen": -1.2241582870483398, "logits/rejected": -1.1284297704696655, "logps/chosen": -184.71810913085938, "logps/rejected": -261.2669982910156, "loss": 0.0527, "rewards/accuracies": 1.0, "rewards/chosen": -0.9872803092002869, "rewards/margins": 2.1968185901641846, "rewards/rejected": -3.184098958969116, "step": 1400 }, { "epoch": 0.31, "learning_rate": 9.596087687658598e-06, "logits/chosen": -1.2347941398620605, "logits/rejected": -1.1743448972702026, "logps/chosen": -109.03011322021484, "logps/rejected": -124.81315612792969, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 0.307412713766098, "rewards/margins": 3.976283311843872, "rewards/rejected": -3.668870687484741, "step": 1401 }, { "epoch": 0.31, "learning_rate": 9.595381663072335e-06, "logits/chosen": -1.207615613937378, "logits/rejected": -1.2248564958572388, "logps/chosen": -229.55364990234375, "logps/rejected": -147.86598205566406, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": 2.6926026344299316, "rewards/margins": 7.204297065734863, "rewards/rejected": -4.511694431304932, "step": 1402 }, { "epoch": 0.31, "learning_rate": 9.594675047990578e-06, "logits/chosen": -1.0732975006103516, "logits/rejected": -1.100314736366272, "logps/chosen": -108.01289367675781, "logps/rejected": -78.90190887451172, "loss": 0.2216, "rewards/accuracies": 1.0, "rewards/chosen": -2.0911705493927, "rewards/margins": 1.574601650238037, "rewards/rejected": -3.6657721996307373, "step": 1403 }, { "epoch": 0.31, "learning_rate": 9.593967842504121e-06, "logits/chosen": -1.1404184103012085, "logits/rejected": -1.1724623441696167, "logps/chosen": -154.61221313476562, "logps/rejected": -106.71212768554688, "loss": 1.0742, "rewards/accuracies": 0.0, "rewards/chosen": -2.318673849105835, "rewards/margins": -1.189436435699463, "rewards/rejected": -1.129237413406372, "step": 1404 }, { "epoch": 0.31, "learning_rate": 9.593260046703842e-06, "logits/chosen": -1.037819743156433, "logits/rejected": -0.9984795451164246, "logps/chosen": -221.99667358398438, "logps/rejected": -321.2298583984375, "loss": 0.0953, "rewards/accuracies": 1.0, "rewards/chosen": 1.76251220703125, "rewards/margins": 1.9295928478240967, "rewards/rejected": -0.16708068549633026, "step": 1405 }, { "epoch": 0.31, "learning_rate": 9.592551660680687e-06, "logits/chosen": -0.9687854051589966, "logits/rejected": -0.7847989797592163, "logps/chosen": -176.5792236328125, "logps/rejected": -329.19879150390625, "loss": 0.2715, "rewards/accuracies": 1.0, "rewards/chosen": 4.837823390960693, "rewards/margins": 12.342384338378906, "rewards/rejected": -7.504560947418213, "step": 1406 }, { "epoch": 0.31, "learning_rate": 9.591842684525685e-06, "logits/chosen": -0.9905865788459778, "logits/rejected": -0.8947449922561646, "logps/chosen": -88.74256896972656, "logps/rejected": -270.0150146484375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.25397491455078125, "rewards/margins": 10.442300796508789, "rewards/rejected": -10.188325881958008, "step": 1407 }, { "epoch": 0.31, "learning_rate": 9.591133118329936e-06, "logits/chosen": -0.9802274703979492, "logits/rejected": -0.9042633175849915, "logps/chosen": -75.211669921875, "logps/rejected": -196.541259765625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.11905670166015625, "rewards/margins": 6.317737102508545, "rewards/rejected": -6.198680400848389, "step": 1408 }, { "epoch": 0.31, "learning_rate": 9.590422962184619e-06, "logits/chosen": -1.3417519330978394, "logits/rejected": -0.7458454370498657, "logps/chosen": -123.12113189697266, "logps/rejected": -325.3932189941406, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -2.93721079826355, "rewards/margins": 3.32538104057312, "rewards/rejected": -6.26259183883667, "step": 1409 }, { "epoch": 0.31, "learning_rate": 9.589712216180986e-06, "logits/chosen": -1.0066639184951782, "logits/rejected": -0.9845952987670898, "logps/chosen": -69.51809692382812, "logps/rejected": -104.03816986083984, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": 1.37871253490448, "rewards/margins": 4.321645259857178, "rewards/rejected": -2.9429328441619873, "step": 1410 }, { "epoch": 0.31, "learning_rate": 9.589000880410366e-06, "logits/chosen": -1.0660451650619507, "logits/rejected": -1.0774070024490356, "logps/chosen": -102.33506774902344, "logps/rejected": -124.67686462402344, "loss": 0.3088, "rewards/accuracies": 1.0, "rewards/chosen": -1.154577612876892, "rewards/margins": 0.3455498218536377, "rewards/rejected": -1.5001274347305298, "step": 1411 }, { "epoch": 0.31, "learning_rate": 9.588288954964164e-06, "logits/chosen": -0.8769636154174805, "logits/rejected": -0.838257372379303, "logps/chosen": -116.82194519042969, "logps/rejected": -147.62940979003906, "loss": 0.1093, "rewards/accuracies": 1.0, "rewards/chosen": -1.951106309890747, "rewards/margins": 3.923596143722534, "rewards/rejected": -5.874702453613281, "step": 1412 }, { "epoch": 0.31, "learning_rate": 9.587576439933862e-06, "logits/chosen": -1.091834545135498, "logits/rejected": -1.0403499603271484, "logps/chosen": -212.8385772705078, "logps/rejected": -211.22080993652344, "loss": 0.0376, "rewards/accuracies": 1.0, "rewards/chosen": 1.2911514043807983, "rewards/margins": 11.358159065246582, "rewards/rejected": -10.067008018493652, "step": 1413 }, { "epoch": 0.31, "learning_rate": 9.586863335411017e-06, "logits/chosen": -0.9796624779701233, "logits/rejected": -0.9227901101112366, "logps/chosen": -134.9320526123047, "logps/rejected": -237.04656982421875, "loss": 0.5162, "rewards/accuracies": 0.0, "rewards/chosen": -0.46083375811576843, "rewards/margins": -0.5338989496231079, "rewards/rejected": 0.07306518405675888, "step": 1414 }, { "epoch": 0.31, "learning_rate": 9.586149641487257e-06, "logits/chosen": -0.955275297164917, "logits/rejected": -0.8823381662368774, "logps/chosen": -89.03355407714844, "logps/rejected": -146.506591796875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.27808839082717896, "rewards/margins": 5.964376926422119, "rewards/rejected": -5.686288356781006, "step": 1415 }, { "epoch": 0.31, "learning_rate": 9.585435358254295e-06, "logits/chosen": -1.0939314365386963, "logits/rejected": -0.7107706665992737, "logps/chosen": -180.05947875976562, "logps/rejected": -332.5304260253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.915326118469238, "rewards/margins": 22.161720275878906, "rewards/rejected": -27.07704734802246, "step": 1416 }, { "epoch": 0.31, "learning_rate": 9.584720485803912e-06, "logits/chosen": -1.010593295097351, "logits/rejected": -0.866197943687439, "logps/chosen": -236.09303283691406, "logps/rejected": -486.561767578125, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": 2.3163743019104004, "rewards/margins": 20.581790924072266, "rewards/rejected": -18.265417098999023, "step": 1417 }, { "epoch": 0.31, "learning_rate": 9.584005024227967e-06, "logits/chosen": -0.8802647590637207, "logits/rejected": -0.8537725806236267, "logps/chosen": -57.55653381347656, "logps/rejected": -112.54454803466797, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.16706542670726776, "rewards/margins": 4.245298862457275, "rewards/rejected": -4.412364482879639, "step": 1418 }, { "epoch": 0.31, "learning_rate": 9.583288973618398e-06, "logits/chosen": -1.1447932720184326, "logits/rejected": -0.7839075922966003, "logps/chosen": -204.68594360351562, "logps/rejected": -337.3130798339844, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.7523865103721619, "rewards/margins": 21.56846046447754, "rewards/rejected": -20.81607437133789, "step": 1419 }, { "epoch": 0.31, "learning_rate": 9.582572334067213e-06, "logits/chosen": -1.094709038734436, "logits/rejected": -1.094709038734436, "logps/chosen": -156.66848754882812, "logps/rejected": -156.66848754882812, "loss": 0.5357, "rewards/accuracies": 0.0, "rewards/chosen": -0.917034924030304, "rewards/margins": 0.0, "rewards/rejected": -0.917034924030304, "step": 1420 }, { "epoch": 0.31, "learning_rate": 9.581855105666497e-06, "logits/chosen": -1.2443889379501343, "logits/rejected": -1.2443889379501343, "logps/chosen": -143.39874267578125, "logps/rejected": -143.39874267578125, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -4.7342448234558105, "rewards/margins": 0.0, "rewards/rejected": -4.7342448234558105, "step": 1421 }, { "epoch": 0.31, "learning_rate": 9.581137288508417e-06, "logits/chosen": -1.1910698413848877, "logits/rejected": -1.1712381839752197, "logps/chosen": -116.07515716552734, "logps/rejected": -66.9998779296875, "loss": 0.0893, "rewards/accuracies": 1.0, "rewards/chosen": -2.2029716968536377, "rewards/margins": 1.6316521167755127, "rewards/rejected": -3.8346238136291504, "step": 1422 }, { "epoch": 0.31, "learning_rate": 9.580418882685208e-06, "logits/chosen": -1.1192703247070312, "logits/rejected": -1.004140019416809, "logps/chosen": -246.0431671142578, "logps/rejected": -437.7327880859375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.3781753480434418, "rewards/margins": 5.440788269042969, "rewards/rejected": -5.818963527679443, "step": 1423 }, { "epoch": 0.32, "learning_rate": 9.579699888289184e-06, "logits/chosen": -1.2887678146362305, "logits/rejected": -1.1846224069595337, "logps/chosen": -198.58006286621094, "logps/rejected": -163.68435668945312, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 1.693843126296997, "rewards/margins": 8.582794189453125, "rewards/rejected": -6.888951301574707, "step": 1424 }, { "epoch": 0.32, "learning_rate": 9.578980305412733e-06, "logits/chosen": -0.9652576446533203, "logits/rejected": -1.0846182107925415, "logps/chosen": -193.97630310058594, "logps/rejected": -73.26802062988281, "loss": 0.1455, "rewards/accuracies": 1.0, "rewards/chosen": -1.5781387090682983, "rewards/margins": 1.0936912298202515, "rewards/rejected": -2.67182993888855, "step": 1425 }, { "epoch": 0.32, "learning_rate": 9.57826013414832e-06, "logits/chosen": -1.0295155048370361, "logits/rejected": -1.0187294483184814, "logps/chosen": -69.3857421875, "logps/rejected": -71.28131103515625, "loss": 2.1941, "rewards/accuracies": 1.0, "rewards/chosen": -0.6604156494140625, "rewards/margins": 1.3394111394882202, "rewards/rejected": -1.9998267889022827, "step": 1426 }, { "epoch": 0.32, "learning_rate": 9.577539374588486e-06, "logits/chosen": -1.0136847496032715, "logits/rejected": -0.9718562364578247, "logps/chosen": -162.60694885253906, "logps/rejected": -234.3511962890625, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": 2.0658891201019287, "rewards/margins": 2.721151828765869, "rewards/rejected": -0.6552627682685852, "step": 1427 }, { "epoch": 0.32, "learning_rate": 9.576818026825846e-06, "logits/chosen": -1.3325860500335693, "logits/rejected": -1.2961701154708862, "logps/chosen": -109.3924789428711, "logps/rejected": -84.40245056152344, "loss": 0.0456, "rewards/accuracies": 1.0, "rewards/chosen": -2.3967552185058594, "rewards/margins": 2.3563156127929688, "rewards/rejected": -4.753070831298828, "step": 1428 }, { "epoch": 0.32, "learning_rate": 9.57609609095309e-06, "logits/chosen": -1.0535271167755127, "logits/rejected": -0.6640841364860535, "logps/chosen": -92.83182525634766, "logps/rejected": -538.59423828125, "loss": 1.9592, "rewards/accuracies": 1.0, "rewards/chosen": 0.8253952264785767, "rewards/margins": 43.324607849121094, "rewards/rejected": -42.49921417236328, "step": 1429 }, { "epoch": 0.32, "learning_rate": 9.57537356706299e-06, "logits/chosen": -1.4891477823257446, "logits/rejected": -1.6442205905914307, "logps/chosen": -222.96429443359375, "logps/rejected": -39.25193405151367, "loss": 0.2227, "rewards/accuracies": 1.0, "rewards/chosen": -0.931805431842804, "rewards/margins": 0.5839269757270813, "rewards/rejected": -1.5157324075698853, "step": 1430 }, { "epoch": 0.32, "learning_rate": 9.574650455248384e-06, "logits/chosen": -0.9855403304100037, "logits/rejected": -0.9855403304100037, "logps/chosen": -142.47166442871094, "logps/rejected": -142.47166442871094, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.908982276916504, "rewards/margins": 0.0, "rewards/rejected": -5.908982276916504, "step": 1431 }, { "epoch": 0.32, "learning_rate": 9.573926755602194e-06, "logits/chosen": -0.9294427633285522, "logits/rejected": -0.8584463596343994, "logps/chosen": -134.137939453125, "logps/rejected": -167.7684783935547, "loss": 1.0979, "rewards/accuracies": 0.0, "rewards/chosen": -1.3943496942520142, "rewards/margins": -2.0777831077575684, "rewards/rejected": 0.6834335327148438, "step": 1432 }, { "epoch": 0.32, "learning_rate": 9.573202468217408e-06, "logits/chosen": -1.3350828886032104, "logits/rejected": -1.3042809963226318, "logps/chosen": -123.09126281738281, "logps/rejected": -128.91024780273438, "loss": 1.3596, "rewards/accuracies": 1.0, "rewards/chosen": -1.7992477416992188, "rewards/margins": 2.1972367763519287, "rewards/rejected": -3.9964845180511475, "step": 1433 }, { "epoch": 0.32, "learning_rate": 9.572477593187101e-06, "logits/chosen": -0.9427924752235413, "logits/rejected": -0.8583434820175171, "logps/chosen": -97.27067565917969, "logps/rejected": -152.0930938720703, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.7298080325126648, "rewards/margins": 4.432287693023682, "rewards/rejected": -5.162095546722412, "step": 1434 }, { "epoch": 0.32, "learning_rate": 9.571752130604414e-06, "logits/chosen": -0.8476680517196655, "logits/rejected": -0.8023329377174377, "logps/chosen": -131.4503173828125, "logps/rejected": -143.5374755859375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.652636706829071, "rewards/margins": 6.718058109283447, "rewards/rejected": -6.0654215812683105, "step": 1435 }, { "epoch": 0.32, "learning_rate": 9.571026080562569e-06, "logits/chosen": -0.7830942869186401, "logits/rejected": -0.6417547464370728, "logps/chosen": -130.7147979736328, "logps/rejected": -255.88548278808594, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/chosen": -1.8236099481582642, "rewards/margins": 3.7511444091796875, "rewards/rejected": -5.574754238128662, "step": 1436 }, { "epoch": 0.32, "learning_rate": 9.57029944315486e-06, "logits/chosen": -0.8120819926261902, "logits/rejected": -0.7170283794403076, "logps/chosen": -73.5621337890625, "logps/rejected": -130.61361694335938, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": -0.7221725583076477, "rewards/margins": 2.7006607055664062, "rewards/rejected": -3.422833204269409, "step": 1437 }, { "epoch": 0.32, "learning_rate": 9.569572218474662e-06, "logits/chosen": -1.0935503244400024, "logits/rejected": -0.9878449440002441, "logps/chosen": -139.39175415039062, "logps/rejected": -177.053466796875, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": 0.4109817445278168, "rewards/margins": 6.233250617980957, "rewards/rejected": -5.822268962860107, "step": 1438 }, { "epoch": 0.32, "learning_rate": 9.568844406615416e-06, "logits/chosen": -0.7629790306091309, "logits/rejected": -0.7896048426628113, "logps/chosen": -133.3916778564453, "logps/rejected": -102.76324462890625, "loss": 0.8467, "rewards/accuracies": 0.0, "rewards/chosen": -3.6460137367248535, "rewards/margins": -1.4833869934082031, "rewards/rejected": -2.1626267433166504, "step": 1439 }, { "epoch": 0.32, "learning_rate": 9.568116007670647e-06, "logits/chosen": -0.8690988421440125, "logits/rejected": -0.7980567812919617, "logps/chosen": -231.97222900390625, "logps/rejected": -158.35635375976562, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 3.759014844894409, "rewards/margins": 9.028020858764648, "rewards/rejected": -5.269006252288818, "step": 1440 }, { "epoch": 0.32, "learning_rate": 9.567387021733954e-06, "logits/chosen": -0.8475185632705688, "logits/rejected": -0.8475185632705688, "logps/chosen": -101.13662719726562, "logps/rejected": -101.13662719726562, "loss": 0.5344, "rewards/accuracies": 0.0, "rewards/chosen": -2.8350021839141846, "rewards/margins": 0.0, "rewards/rejected": -2.8350021839141846, "step": 1441 }, { "epoch": 0.32, "learning_rate": 9.566657448899009e-06, "logits/chosen": -0.8957375288009644, "logits/rejected": -0.7520368099212646, "logps/chosen": -166.9318084716797, "logps/rejected": -262.0289611816406, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.3221389949321747, "rewards/margins": 4.02760648727417, "rewards/rejected": -4.349745273590088, "step": 1442 }, { "epoch": 0.32, "learning_rate": 9.565927289259558e-06, "logits/chosen": -1.0273964405059814, "logits/rejected": -1.0351073741912842, "logps/chosen": -197.03982543945312, "logps/rejected": -134.0653076171875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.7997329831123352, "rewards/margins": 7.031240463256836, "rewards/rejected": -6.231507301330566, "step": 1443 }, { "epoch": 0.32, "learning_rate": 9.565196542909425e-06, "logits/chosen": -0.9305521249771118, "logits/rejected": -0.8845061659812927, "logps/chosen": -167.27740478515625, "logps/rejected": -183.21722412109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 3.0911622047424316, "rewards/margins": 12.336494445800781, "rewards/rejected": -9.245332717895508, "step": 1444 }, { "epoch": 0.32, "learning_rate": 9.564465209942512e-06, "logits/chosen": -0.8800886869430542, "logits/rejected": -0.8218404054641724, "logps/chosen": -117.2659912109375, "logps/rejected": -216.4378662109375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.572283923625946, "rewards/margins": 4.920120716094971, "rewards/rejected": -5.492404460906982, "step": 1445 }, { "epoch": 0.32, "learning_rate": 9.563733290452795e-06, "logits/chosen": -1.0479114055633545, "logits/rejected": -1.0427563190460205, "logps/chosen": -91.48406219482422, "logps/rejected": -77.7095718383789, "loss": 0.2797, "rewards/accuracies": 1.0, "rewards/chosen": -0.032419588416814804, "rewards/margins": 0.4529830813407898, "rewards/rejected": -0.4854026734828949, "step": 1446 }, { "epoch": 0.32, "learning_rate": 9.56300078453432e-06, "logits/chosen": -0.8947790861129761, "logits/rejected": -0.8947790861129761, "logps/chosen": -146.5155029296875, "logps/rejected": -146.5155029296875, "loss": 0.3583, "rewards/accuracies": 0.0, "rewards/chosen": -5.567635536193848, "rewards/margins": 0.0, "rewards/rejected": -5.567635536193848, "step": 1447 }, { "epoch": 0.32, "learning_rate": 9.562267692281212e-06, "logits/chosen": -0.8741936683654785, "logits/rejected": -0.8641306757926941, "logps/chosen": -136.25955200195312, "logps/rejected": -141.37832641601562, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -1.6797798871994019, "rewards/margins": 3.6809043884277344, "rewards/rejected": -5.360684394836426, "step": 1448 }, { "epoch": 0.32, "learning_rate": 9.561534013787671e-06, "logits/chosen": -1.0182815790176392, "logits/rejected": -0.9383592009544373, "logps/chosen": -218.56692504882812, "logps/rejected": -97.05929565429688, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": 1.0149978399276733, "rewards/margins": 5.6317338943481445, "rewards/rejected": -4.616735935211182, "step": 1449 }, { "epoch": 0.32, "learning_rate": 9.560799749147977e-06, "logits/chosen": -0.9223170280456543, "logits/rejected": -0.8804346919059753, "logps/chosen": -88.65178680419922, "logps/rejected": -139.4505615234375, "loss": 0.0531, "rewards/accuracies": 1.0, "rewards/chosen": -0.7731941342353821, "rewards/margins": 2.2503128051757812, "rewards/rejected": -3.0235068798065186, "step": 1450 }, { "epoch": 0.32, "learning_rate": 9.56006489845648e-06, "logits/chosen": -0.7514037489891052, "logits/rejected": -0.7439612746238708, "logps/chosen": -110.45671081542969, "logps/rejected": -114.2802734375, "loss": 0.3199, "rewards/accuracies": 1.0, "rewards/chosen": -0.995928943157196, "rewards/margins": 0.23545759916305542, "rewards/rejected": -1.2313865423202515, "step": 1451 }, { "epoch": 0.32, "learning_rate": 9.559329461807605e-06, "logits/chosen": -1.220391035079956, "logits/rejected": -1.1932108402252197, "logps/chosen": -69.09497833251953, "logps/rejected": -66.92276000976562, "loss": 0.5444, "rewards/accuracies": 0.0, "rewards/chosen": -1.0603569746017456, "rewards/margins": -0.6783505082130432, "rewards/rejected": -0.3820064663887024, "step": 1452 }, { "epoch": 0.32, "learning_rate": 9.558593439295853e-06, "logits/chosen": -0.7300064563751221, "logits/rejected": -0.41828837990760803, "logps/chosen": -92.10838317871094, "logps/rejected": -305.861083984375, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -1.2880394458770752, "rewards/margins": 14.411014556884766, "rewards/rejected": -15.699053764343262, "step": 1453 }, { "epoch": 0.32, "learning_rate": 9.557856831015805e-06, "logits/chosen": -0.8262705206871033, "logits/rejected": -0.8093515634536743, "logps/chosen": -198.74954223632812, "logps/rejected": -153.11769104003906, "loss": 0.0465, "rewards/accuracies": 1.0, "rewards/chosen": 0.05183868482708931, "rewards/margins": 4.33404016494751, "rewards/rejected": -4.282201290130615, "step": 1454 }, { "epoch": 0.32, "learning_rate": 9.55711963706211e-06, "logits/chosen": -0.9999414086341858, "logits/rejected": -1.0102094411849976, "logps/chosen": -126.1861572265625, "logps/rejected": -97.98750305175781, "loss": 0.634, "rewards/accuracies": 0.0, "rewards/chosen": -1.1178604364395142, "rewards/margins": -0.9005188345909119, "rewards/rejected": -0.2173416167497635, "step": 1455 }, { "epoch": 0.32, "learning_rate": 9.556381857529497e-06, "logits/chosen": -1.0271915197372437, "logits/rejected": -1.0015860795974731, "logps/chosen": -134.91104125976562, "logps/rejected": -149.8902587890625, "loss": 0.3305, "rewards/accuracies": 1.0, "rewards/chosen": -0.12167053669691086, "rewards/margins": 0.0652465745806694, "rewards/rejected": -0.18691711127758026, "step": 1456 }, { "epoch": 0.32, "learning_rate": 9.555643492512767e-06, "logits/chosen": -1.1317607164382935, "logits/rejected": -1.149983286857605, "logps/chosen": -203.91152954101562, "logps/rejected": -224.863037109375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 3.061663866043091, "rewards/margins": 4.287588596343994, "rewards/rejected": -1.2259247303009033, "step": 1457 }, { "epoch": 0.32, "learning_rate": 9.554904542106802e-06, "logits/chosen": -0.8858094215393066, "logits/rejected": -0.7412226796150208, "logps/chosen": -140.84146118164062, "logps/rejected": -408.0035400390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.8534088134765625, "rewards/margins": 8.382772445678711, "rewards/rejected": -6.529364109039307, "step": 1458 }, { "epoch": 0.32, "learning_rate": 9.55416500640655e-06, "logits/chosen": -1.092682957649231, "logits/rejected": -1.1029831171035767, "logps/chosen": -155.400634765625, "logps/rejected": -207.194580078125, "loss": 0.7852, "rewards/accuracies": 0.0, "rewards/chosen": -3.3637139797210693, "rewards/margins": -1.3370780944824219, "rewards/rejected": -2.0266358852386475, "step": 1459 }, { "epoch": 0.32, "learning_rate": 9.553424885507045e-06, "logits/chosen": -0.6045842170715332, "logits/rejected": -0.5945568084716797, "logps/chosen": -68.3244857788086, "logps/rejected": -38.815223693847656, "loss": 0.1694, "rewards/accuracies": 1.0, "rewards/chosen": -0.8695442080497742, "rewards/margins": 1.094785451889038, "rewards/rejected": -1.9643296003341675, "step": 1460 }, { "epoch": 0.32, "learning_rate": 9.552684179503389e-06, "logits/chosen": -0.457265168428421, "logits/rejected": -0.46830394864082336, "logps/chosen": -183.54830932617188, "logps/rejected": -134.28839111328125, "loss": 0.043, "rewards/accuracies": 1.0, "rewards/chosen": 0.18508301675319672, "rewards/margins": 2.511136531829834, "rewards/rejected": -2.3260536193847656, "step": 1461 }, { "epoch": 0.32, "learning_rate": 9.551942888490759e-06, "logits/chosen": -1.0637682676315308, "logits/rejected": -0.9649878740310669, "logps/chosen": -141.88438415527344, "logps/rejected": -214.80238342285156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.6166900396347046, "rewards/margins": 9.662287712097168, "rewards/rejected": -8.045598030090332, "step": 1462 }, { "epoch": 0.32, "learning_rate": 9.55120101256441e-06, "logits/chosen": -1.2227157354354858, "logits/rejected": -1.1800627708435059, "logps/chosen": -146.38217163085938, "logps/rejected": -165.9560546875, "loss": 0.2176, "rewards/accuracies": 1.0, "rewards/chosen": 0.21725769340991974, "rewards/margins": 5.742944240570068, "rewards/rejected": -5.525686740875244, "step": 1463 }, { "epoch": 0.32, "learning_rate": 9.550458551819672e-06, "logits/chosen": -1.1930633783340454, "logits/rejected": -1.1930633783340454, "logps/chosen": -180.39849853515625, "logps/rejected": -180.39849853515625, "loss": 0.4891, "rewards/accuracies": 0.0, "rewards/chosen": -3.590101718902588, "rewards/margins": 0.0, "rewards/rejected": -3.590101718902588, "step": 1464 }, { "epoch": 0.32, "learning_rate": 9.54971550635195e-06, "logits/chosen": -0.939628005027771, "logits/rejected": -0.9412680268287659, "logps/chosen": -122.79289245605469, "logps/rejected": -106.68470001220703, "loss": 0.0882, "rewards/accuracies": 1.0, "rewards/chosen": -2.516653537750244, "rewards/margins": 1.894944190979004, "rewards/rejected": -4.411597728729248, "step": 1465 }, { "epoch": 0.32, "learning_rate": 9.548971876256721e-06, "logits/chosen": -1.253414511680603, "logits/rejected": -1.253414511680603, "logps/chosen": -143.15760803222656, "logps/rejected": -143.15760803222656, "loss": 0.4485, "rewards/accuracies": 0.0, "rewards/chosen": -2.7045586109161377, "rewards/margins": 0.0, "rewards/rejected": -2.7045586109161377, "step": 1466 }, { "epoch": 0.32, "learning_rate": 9.548227661629541e-06, "logits/chosen": -0.85029536485672, "logits/rejected": -0.8479486107826233, "logps/chosen": -84.10977172851562, "logps/rejected": -105.41028594970703, "loss": 0.1416, "rewards/accuracies": 1.0, "rewards/chosen": 0.1669876128435135, "rewards/margins": 1.1173515319824219, "rewards/rejected": -0.9503639340400696, "step": 1467 }, { "epoch": 0.32, "learning_rate": 9.547482862566043e-06, "logits/chosen": -1.445603370666504, "logits/rejected": -1.5019550323486328, "logps/chosen": -129.80233764648438, "logps/rejected": -128.71595764160156, "loss": 0.0771, "rewards/accuracies": 1.0, "rewards/chosen": 1.428181529045105, "rewards/margins": 3.5909180641174316, "rewards/rejected": -2.162736654281616, "step": 1468 }, { "epoch": 0.33, "learning_rate": 9.546737479161926e-06, "logits/chosen": -0.733610212802887, "logits/rejected": -0.7269195318222046, "logps/chosen": -102.2914810180664, "logps/rejected": -133.0010528564453, "loss": 0.1809, "rewards/accuracies": 1.0, "rewards/chosen": -0.9083046317100525, "rewards/margins": 0.87766033411026, "rewards/rejected": -1.7859649658203125, "step": 1469 }, { "epoch": 0.33, "learning_rate": 9.545991511512975e-06, "logits/chosen": -0.8330610394477844, "logits/rejected": -0.7957985997200012, "logps/chosen": -91.19976806640625, "logps/rejected": -188.47320556640625, "loss": 0.1168, "rewards/accuracies": 1.0, "rewards/chosen": -2.0369622707366943, "rewards/margins": 1.3812811374664307, "rewards/rejected": -3.418243408203125, "step": 1470 }, { "epoch": 0.33, "learning_rate": 9.545244959715041e-06, "logits/chosen": -1.2275563478469849, "logits/rejected": -1.2197130918502808, "logps/chosen": -94.67607116699219, "logps/rejected": -116.80465698242188, "loss": 0.1722, "rewards/accuracies": 1.0, "rewards/chosen": -0.38564378023147583, "rewards/margins": 1.3229308128356934, "rewards/rejected": -1.708574652671814, "step": 1471 }, { "epoch": 0.33, "learning_rate": 9.544497823864058e-06, "logits/chosen": -0.8706384301185608, "logits/rejected": -0.8506556749343872, "logps/chosen": -78.63034057617188, "logps/rejected": -54.645591735839844, "loss": 0.1439, "rewards/accuracies": 1.0, "rewards/chosen": -0.3848098814487457, "rewards/margins": 1.1091232299804688, "rewards/rejected": -1.493933081626892, "step": 1472 }, { "epoch": 0.33, "learning_rate": 9.543750104056029e-06, "logits/chosen": -0.9759712219238281, "logits/rejected": -0.9341124296188354, "logps/chosen": -132.51869201660156, "logps/rejected": -179.8668670654297, "loss": 0.0302, "rewards/accuracies": 1.0, "rewards/chosen": -0.18620453774929047, "rewards/margins": 5.572553634643555, "rewards/rejected": -5.758758068084717, "step": 1473 }, { "epoch": 0.33, "learning_rate": 9.543001800387034e-06, "logits/chosen": -0.7216985821723938, "logits/rejected": -0.7181999087333679, "logps/chosen": -14.155651092529297, "logps/rejected": -22.472625732421875, "loss": 0.2027, "rewards/accuracies": 1.0, "rewards/chosen": -0.17960987985134125, "rewards/margins": 0.6947519183158875, "rewards/rejected": -0.8743618130683899, "step": 1474 }, { "epoch": 0.33, "learning_rate": 9.54225291295323e-06, "logits/chosen": -0.854357898235321, "logits/rejected": -0.7923890948295593, "logps/chosen": -132.74476623535156, "logps/rejected": -253.4404754638672, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.9522567987442017, "rewards/margins": 7.684605598449707, "rewards/rejected": -6.732348918914795, "step": 1475 }, { "epoch": 0.33, "learning_rate": 9.541503441850844e-06, "logits/chosen": -0.9612948894500732, "logits/rejected": -0.9000799655914307, "logps/chosen": -77.57056427001953, "logps/rejected": -145.47674560546875, "loss": 0.1092, "rewards/accuracies": 1.0, "rewards/chosen": 2.0511252880096436, "rewards/margins": 1.4113578796386719, "rewards/rejected": 0.6397674679756165, "step": 1476 }, { "epoch": 0.33, "learning_rate": 9.540753387176183e-06, "logits/chosen": -0.7766066789627075, "logits/rejected": -0.7469969987869263, "logps/chosen": -83.66488647460938, "logps/rejected": -65.0684814453125, "loss": 0.0881, "rewards/accuracies": 1.0, "rewards/chosen": -1.897222876548767, "rewards/margins": 1.6854721307754517, "rewards/rejected": -3.5826950073242188, "step": 1477 }, { "epoch": 0.33, "learning_rate": 9.54000274902563e-06, "logits/chosen": -1.1326044797897339, "logits/rejected": -1.0997188091278076, "logps/chosen": -96.81497955322266, "logps/rejected": -173.31317138671875, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": 0.36725541949272156, "rewards/margins": 4.335813045501709, "rewards/rejected": -3.968557834625244, "step": 1478 }, { "epoch": 0.33, "learning_rate": 9.539251527495636e-06, "logits/chosen": -1.22384512424469, "logits/rejected": -1.22384512424469, "logps/chosen": -222.66351318359375, "logps/rejected": -222.66351318359375, "loss": 0.348, "rewards/accuracies": 0.0, "rewards/chosen": -9.272911071777344, "rewards/margins": 0.0, "rewards/rejected": -9.272911071777344, "step": 1479 }, { "epoch": 0.33, "learning_rate": 9.538499722682733e-06, "logits/chosen": -1.2876609563827515, "logits/rejected": -1.2837787866592407, "logps/chosen": -128.83255004882812, "logps/rejected": -126.2373046875, "loss": 0.889, "rewards/accuracies": 0.0, "rewards/chosen": -1.320002794265747, "rewards/margins": -1.5924911499023438, "rewards/rejected": 0.27248841524124146, "step": 1480 }, { "epoch": 0.33, "learning_rate": 9.537747334683524e-06, "logits/chosen": -1.296110987663269, "logits/rejected": -1.2906423807144165, "logps/chosen": -113.08715057373047, "logps/rejected": -87.82368469238281, "loss": 0.3773, "rewards/accuracies": 1.0, "rewards/chosen": -3.3573060035705566, "rewards/margins": 0.11477541923522949, "rewards/rejected": -3.472081422805786, "step": 1481 }, { "epoch": 0.33, "learning_rate": 9.536994363594694e-06, "logits/chosen": -0.6182641983032227, "logits/rejected": -0.6357842087745667, "logps/chosen": -107.31689453125, "logps/rejected": -125.50276947021484, "loss": 0.136, "rewards/accuracies": 1.0, "rewards/chosen": -2.2557830810546875, "rewards/margins": 1.2155189514160156, "rewards/rejected": -3.471302032470703, "step": 1482 }, { "epoch": 0.33, "learning_rate": 9.536240809512994e-06, "logits/chosen": -1.1090277433395386, "logits/rejected": -1.1090277433395386, "logps/chosen": -193.18460083007812, "logps/rejected": -193.18460083007812, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -5.270947456359863, "rewards/margins": 0.0, "rewards/rejected": -5.270947456359863, "step": 1483 }, { "epoch": 0.33, "learning_rate": 9.535486672535255e-06, "logits/chosen": -0.8058130145072937, "logits/rejected": -0.8243414759635925, "logps/chosen": -99.68463134765625, "logps/rejected": -82.18284606933594, "loss": 0.6238, "rewards/accuracies": 0.0, "rewards/chosen": -2.0905601978302, "rewards/margins": -0.9089454412460327, "rewards/rejected": -1.1816147565841675, "step": 1484 }, { "epoch": 0.33, "learning_rate": 9.53473195275838e-06, "logits/chosen": -1.2421513795852661, "logits/rejected": -1.2248001098632812, "logps/chosen": -135.35153198242188, "logps/rejected": -181.61639404296875, "loss": 0.827, "rewards/accuracies": 0.0, "rewards/chosen": -2.666844129562378, "rewards/margins": -1.1892532110214233, "rewards/rejected": -1.4775909185409546, "step": 1485 }, { "epoch": 0.33, "learning_rate": 9.53397665027935e-06, "logits/chosen": -1.0195872783660889, "logits/rejected": -1.081834077835083, "logps/chosen": -242.92935180664062, "logps/rejected": -201.61135864257812, "loss": 0.0634, "rewards/accuracies": 1.0, "rewards/chosen": 1.2884048223495483, "rewards/margins": 2.0216355323791504, "rewards/rejected": -0.7332305908203125, "step": 1486 }, { "epoch": 0.33, "learning_rate": 9.533220765195223e-06, "logits/chosen": -0.8262786865234375, "logits/rejected": -0.7717193365097046, "logps/chosen": -197.96951293945312, "logps/rejected": -101.27803039550781, "loss": 0.1629, "rewards/accuracies": 1.0, "rewards/chosen": -3.844372510910034, "rewards/margins": 0.9540059566497803, "rewards/rejected": -4.7983784675598145, "step": 1487 }, { "epoch": 0.33, "learning_rate": 9.532464297603124e-06, "logits/chosen": -0.9379470944404602, "logits/rejected": -0.9014647006988525, "logps/chosen": -168.09942626953125, "logps/rejected": -150.58407592773438, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": 0.08710785210132599, "rewards/margins": 4.1455397605896, "rewards/rejected": -4.058432102203369, "step": 1488 }, { "epoch": 0.33, "learning_rate": 9.531707247600258e-06, "logits/chosen": -0.9989267587661743, "logits/rejected": -0.4223194718360901, "logps/chosen": -156.0202178955078, "logps/rejected": -423.3818359375, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": 2.659105062484741, "rewards/margins": 31.89604949951172, "rewards/rejected": -29.2369441986084, "step": 1489 }, { "epoch": 0.33, "learning_rate": 9.530949615283902e-06, "logits/chosen": -0.7781082987785339, "logits/rejected": -0.7323649525642395, "logps/chosen": -83.1884994506836, "logps/rejected": -98.45222473144531, "loss": 0.3958, "rewards/accuracies": 1.0, "rewards/chosen": -1.5873702764511108, "rewards/margins": 2.2692103385925293, "rewards/rejected": -3.8565804958343506, "step": 1490 }, { "epoch": 0.33, "learning_rate": 9.530191400751416e-06, "logits/chosen": -1.121066927909851, "logits/rejected": -1.0814684629440308, "logps/chosen": -97.42316436767578, "logps/rejected": -46.52416229248047, "loss": 0.2457, "rewards/accuracies": 1.0, "rewards/chosen": -0.9614952206611633, "rewards/margins": 0.7487127184867859, "rewards/rejected": -1.7102079391479492, "step": 1491 }, { "epoch": 0.33, "learning_rate": 9.529432604100223e-06, "logits/chosen": -1.0984766483306885, "logits/rejected": -1.0731604099273682, "logps/chosen": -98.3084945678711, "logps/rejected": -81.71446228027344, "loss": 0.1004, "rewards/accuracies": 1.0, "rewards/chosen": -1.3590980768203735, "rewards/margins": 4.2578277587890625, "rewards/rejected": -5.6169257164001465, "step": 1492 }, { "epoch": 0.33, "learning_rate": 9.528673225427831e-06, "logits/chosen": -0.806930422782898, "logits/rejected": -0.7787973880767822, "logps/chosen": -174.70077514648438, "logps/rejected": -125.37063598632812, "loss": 0.0652, "rewards/accuracies": 1.0, "rewards/chosen": -0.6204315423965454, "rewards/margins": 2.500993251800537, "rewards/rejected": -3.121424913406372, "step": 1493 }, { "epoch": 0.33, "learning_rate": 9.527913264831817e-06, "logits/chosen": -1.2857202291488647, "logits/rejected": -1.3007793426513672, "logps/chosen": -133.96615600585938, "logps/rejected": -138.5460205078125, "loss": 0.2314, "rewards/accuracies": 1.0, "rewards/chosen": 0.897003173828125, "rewards/margins": 0.5301803350448608, "rewards/rejected": 0.3668228089809418, "step": 1494 }, { "epoch": 0.33, "learning_rate": 9.52715272240983e-06, "logits/chosen": -1.151491641998291, "logits/rejected": -1.164708137512207, "logps/chosen": -190.79949951171875, "logps/rejected": -155.28741455078125, "loss": 0.0735, "rewards/accuracies": 1.0, "rewards/chosen": 2.0781219005584717, "rewards/margins": 5.1447930335998535, "rewards/rejected": -3.066671133041382, "step": 1495 }, { "epoch": 0.33, "learning_rate": 9.526391598259604e-06, "logits/chosen": -0.5888912677764893, "logits/rejected": -0.46887314319610596, "logps/chosen": -251.7169647216797, "logps/rejected": -52.41289520263672, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 2.652052402496338, "rewards/margins": 5.006302833557129, "rewards/rejected": -2.35425066947937, "step": 1496 }, { "epoch": 0.33, "learning_rate": 9.525629892478936e-06, "logits/chosen": -1.1913644075393677, "logits/rejected": -1.1701432466506958, "logps/chosen": -95.6836929321289, "logps/rejected": -104.72117614746094, "loss": 0.4908, "rewards/accuracies": 0.0, "rewards/chosen": -1.5852638483047485, "rewards/margins": -0.4966834783554077, "rewards/rejected": -1.0885803699493408, "step": 1497 }, { "epoch": 0.33, "learning_rate": 9.524867605165709e-06, "logits/chosen": -1.0935189723968506, "logits/rejected": -1.099317193031311, "logps/chosen": -154.7146453857422, "logps/rejected": -68.89292907714844, "loss": 0.5382, "rewards/accuracies": 0.0, "rewards/chosen": -5.292163848876953, "rewards/margins": -0.5516924858093262, "rewards/rejected": -4.740471363067627, "step": 1498 }, { "epoch": 0.33, "learning_rate": 9.52410473641787e-06, "logits/chosen": -0.8629369139671326, "logits/rejected": -0.8352292776107788, "logps/chosen": -103.87052917480469, "logps/rejected": -156.3438262939453, "loss": 0.2819, "rewards/accuracies": 1.0, "rewards/chosen": -0.8480087518692017, "rewards/margins": 0.35730135440826416, "rewards/rejected": -1.2053101062774658, "step": 1499 }, { "epoch": 0.33, "learning_rate": 9.523341286333448e-06, "logits/chosen": -0.6521677374839783, "logits/rejected": -0.5728943347930908, "logps/chosen": -81.6292724609375, "logps/rejected": -154.59344482421875, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/chosen": 0.5924347043037415, "rewards/margins": 5.082266330718994, "rewards/rejected": -4.489831447601318, "step": 1500 }, { "epoch": 0.33, "learning_rate": 9.522577255010546e-06, "logits/chosen": -1.3342779874801636, "logits/rejected": -1.3492639064788818, "logps/chosen": -176.34280395507812, "logps/rejected": -166.82696533203125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 2.6523163318634033, "rewards/margins": 5.8931121826171875, "rewards/rejected": -3.240795850753784, "step": 1501 }, { "epoch": 0.33, "learning_rate": 9.521812642547337e-06, "logits/chosen": -0.8913530707359314, "logits/rejected": -0.9011659026145935, "logps/chosen": -133.9514923095703, "logps/rejected": -166.85751342773438, "loss": 0.2634, "rewards/accuracies": 1.0, "rewards/chosen": -5.171366214752197, "rewards/margins": 2.2645010948181152, "rewards/rejected": -7.4358673095703125, "step": 1502 }, { "epoch": 0.33, "learning_rate": 9.521047449042075e-06, "logits/chosen": -0.747696042060852, "logits/rejected": -0.7215796709060669, "logps/chosen": -95.010009765625, "logps/rejected": -165.95973205566406, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.0103241205215454, "rewards/margins": 6.164178848266602, "rewards/rejected": -7.174502849578857, "step": 1503 }, { "epoch": 0.33, "learning_rate": 9.520281674593084e-06, "logits/chosen": -0.8321494460105896, "logits/rejected": -0.7798017859458923, "logps/chosen": -181.8812713623047, "logps/rejected": -198.39480590820312, "loss": 1.7636, "rewards/accuracies": 0.0, "rewards/chosen": -4.478021144866943, "rewards/margins": -3.492401123046875, "rewards/rejected": -0.9856201410293579, "step": 1504 }, { "epoch": 0.33, "learning_rate": 9.519515319298765e-06, "logits/chosen": -0.9556368589401245, "logits/rejected": -0.8803284764289856, "logps/chosen": -166.21328735351562, "logps/rejected": -50.61231231689453, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": 2.0580780506134033, "rewards/margins": 4.273453712463379, "rewards/rejected": -2.2153756618499756, "step": 1505 }, { "epoch": 0.33, "learning_rate": 9.51874838325759e-06, "logits/chosen": -0.9716039896011353, "logits/rejected": -0.9656436443328857, "logps/chosen": -115.61669158935547, "logps/rejected": -155.50555419921875, "loss": 0.1302, "rewards/accuracies": 1.0, "rewards/chosen": -1.7302360534667969, "rewards/margins": 1.4706552028656006, "rewards/rejected": -3.2008912563323975, "step": 1506 }, { "epoch": 0.33, "learning_rate": 9.517980866568112e-06, "logits/chosen": -1.0095704793930054, "logits/rejected": -1.0015068054199219, "logps/chosen": -148.85728454589844, "logps/rejected": -134.79412841796875, "loss": 0.069, "rewards/accuracies": 1.0, "rewards/chosen": 0.451324462890625, "rewards/margins": 1.9203437566757202, "rewards/rejected": -1.4690192937850952, "step": 1507 }, { "epoch": 0.33, "learning_rate": 9.517212769328952e-06, "logits/chosen": -0.7456738352775574, "logits/rejected": -0.7406860589981079, "logps/chosen": -141.7916717529297, "logps/rejected": -73.06564331054688, "loss": 0.0485, "rewards/accuracies": 1.0, "rewards/chosen": 0.5963058471679688, "rewards/margins": 3.6150238513946533, "rewards/rejected": -3.0187180042266846, "step": 1508 }, { "epoch": 0.33, "learning_rate": 9.516444091638812e-06, "logits/chosen": -0.8235673904418945, "logits/rejected": -0.7719396352767944, "logps/chosen": -131.22593688964844, "logps/rejected": -157.67245483398438, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -0.7495254874229431, "rewards/margins": 3.335080146789551, "rewards/rejected": -4.084605693817139, "step": 1509 }, { "epoch": 0.33, "learning_rate": 9.515674833596464e-06, "logits/chosen": -1.1612433195114136, "logits/rejected": -1.1612433195114136, "logps/chosen": -174.40496826171875, "logps/rejected": -174.40496826171875, "loss": 0.3546, "rewards/accuracies": 0.0, "rewards/chosen": -1.7926971912384033, "rewards/margins": 0.0, "rewards/rejected": -1.7926971912384033, "step": 1510 }, { "epoch": 0.33, "learning_rate": 9.514904995300754e-06, "logits/chosen": -1.0117130279541016, "logits/rejected": -1.0527732372283936, "logps/chosen": -87.10745239257812, "logps/rejected": -39.619327545166016, "loss": 0.1696, "rewards/accuracies": 1.0, "rewards/chosen": -1.0005959272384644, "rewards/margins": 1.1993004083633423, "rewards/rejected": -2.1998963356018066, "step": 1511 }, { "epoch": 0.33, "learning_rate": 9.514134576850605e-06, "logits/chosen": -0.9027466773986816, "logits/rejected": -0.9027466773986816, "logps/chosen": -164.25079345703125, "logps/rejected": -164.25079345703125, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -5.429229736328125, "rewards/margins": 0.0, "rewards/rejected": -5.429229736328125, "step": 1512 }, { "epoch": 0.33, "learning_rate": 9.513363578345014e-06, "logits/chosen": -0.9684589505195618, "logits/rejected": -1.0300483703613281, "logps/chosen": -155.7423858642578, "logps/rejected": -168.78907775878906, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 1.5083709955215454, "rewards/margins": 5.833796501159668, "rewards/rejected": -4.325425624847412, "step": 1513 }, { "epoch": 0.34, "learning_rate": 9.512591999883056e-06, "logits/chosen": -0.932152271270752, "logits/rejected": -0.8031623959541321, "logps/chosen": -200.64895629882812, "logps/rejected": -242.72998046875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.119648814201355, "rewards/margins": 5.9967942237854, "rewards/rejected": -7.116443157196045, "step": 1514 }, { "epoch": 0.34, "learning_rate": 9.511819841563872e-06, "logits/chosen": -1.3345032930374146, "logits/rejected": -1.3493443727493286, "logps/chosen": -209.3316650390625, "logps/rejected": -209.45269775390625, "loss": 0.0828, "rewards/accuracies": 1.0, "rewards/chosen": 2.928022861480713, "rewards/margins": 1.7146821022033691, "rewards/rejected": 1.2133407592773438, "step": 1515 }, { "epoch": 0.34, "learning_rate": 9.511047103486685e-06, "logits/chosen": -0.7041420936584473, "logits/rejected": -0.6402644515037537, "logps/chosen": -129.9480743408203, "logps/rejected": -157.44491577148438, "loss": 0.0893, "rewards/accuracies": 1.0, "rewards/chosen": -1.0198898315429688, "rewards/margins": 3.6546058654785156, "rewards/rejected": -4.674495697021484, "step": 1516 }, { "epoch": 0.34, "learning_rate": 9.510273785750788e-06, "logits/chosen": -1.0527853965759277, "logits/rejected": -1.0241968631744385, "logps/chosen": -157.81900024414062, "logps/rejected": -58.61931228637695, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": 2.125335693359375, "rewards/margins": 5.162448883056641, "rewards/rejected": -3.0371129512786865, "step": 1517 }, { "epoch": 0.34, "learning_rate": 9.509499888455554e-06, "logits/chosen": -1.0212873220443726, "logits/rejected": -0.9614475965499878, "logps/chosen": -71.6847152709961, "logps/rejected": -92.91737365722656, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": 0.6608497500419617, "rewards/margins": 3.2886133193969727, "rewards/rejected": -2.627763509750366, "step": 1518 }, { "epoch": 0.34, "learning_rate": 9.508725411700424e-06, "logits/chosen": -0.786553144454956, "logits/rejected": -0.7795460224151611, "logps/chosen": -199.5059814453125, "logps/rejected": -140.30136108398438, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.9537933468818665, "rewards/margins": 3.8815271854400635, "rewards/rejected": -4.835320472717285, "step": 1519 }, { "epoch": 0.34, "learning_rate": 9.507950355584917e-06, "logits/chosen": -1.1491374969482422, "logits/rejected": -0.9779396057128906, "logps/chosen": -107.46440124511719, "logps/rejected": -231.7143096923828, "loss": 0.132, "rewards/accuracies": 1.0, "rewards/chosen": -0.1409042328596115, "rewards/margins": 1.1995270252227783, "rewards/rejected": -1.3404312133789062, "step": 1520 }, { "epoch": 0.34, "learning_rate": 9.507174720208627e-06, "logits/chosen": -1.1251704692840576, "logits/rejected": -1.0934385061264038, "logps/chosen": -137.22747802734375, "logps/rejected": -202.81161499023438, "loss": 0.0588, "rewards/accuracies": 1.0, "rewards/chosen": -0.11869507282972336, "rewards/margins": 3.038421630859375, "rewards/rejected": -3.157116651535034, "step": 1521 }, { "epoch": 0.34, "learning_rate": 9.50639850567122e-06, "logits/chosen": -1.0725133419036865, "logits/rejected": -1.0683152675628662, "logps/chosen": -174.123779296875, "logps/rejected": -53.65721130371094, "loss": 0.9417, "rewards/accuracies": 0.0, "rewards/chosen": -5.208587646484375, "rewards/margins": -1.7077827453613281, "rewards/rejected": -3.500804901123047, "step": 1522 }, { "epoch": 0.34, "learning_rate": 9.505621712072437e-06, "logits/chosen": -0.9850002527236938, "logits/rejected": -0.9871804118156433, "logps/chosen": -61.353755950927734, "logps/rejected": -122.1847915649414, "loss": 0.1565, "rewards/accuracies": 1.0, "rewards/chosen": -0.9719913601875305, "rewards/margins": 1.0675458908081055, "rewards/rejected": -2.039537191390991, "step": 1523 }, { "epoch": 0.34, "learning_rate": 9.504844339512096e-06, "logits/chosen": -0.9653880000114441, "logits/rejected": -0.5691691040992737, "logps/chosen": -102.87409210205078, "logps/rejected": -428.2920837402344, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -1.8469215631484985, "rewards/margins": 30.72402000427246, "rewards/rejected": -32.57094192504883, "step": 1524 }, { "epoch": 0.34, "learning_rate": 9.504066388090088e-06, "logits/chosen": -1.2826107740402222, "logits/rejected": -1.2468082904815674, "logps/chosen": -128.77947998046875, "logps/rejected": -195.6595458984375, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 1.8924896717071533, "rewards/margins": 2.150669813156128, "rewards/rejected": -0.2581802308559418, "step": 1525 }, { "epoch": 0.34, "learning_rate": 9.503287857906374e-06, "logits/chosen": -1.055016040802002, "logits/rejected": -1.0350089073181152, "logps/chosen": -100.52155303955078, "logps/rejected": -113.42241668701172, "loss": 0.2843, "rewards/accuracies": 1.0, "rewards/chosen": -2.3929526805877686, "rewards/margins": 0.26705026626586914, "rewards/rejected": -2.6600029468536377, "step": 1526 }, { "epoch": 0.34, "learning_rate": 9.502508749060998e-06, "logits/chosen": -1.257219672203064, "logits/rejected": -1.2064924240112305, "logps/chosen": -123.05117797851562, "logps/rejected": -164.02359008789062, "loss": 0.215, "rewards/accuracies": 1.0, "rewards/chosen": -2.2584762573242188, "rewards/margins": 0.622689962387085, "rewards/rejected": -2.8811662197113037, "step": 1527 }, { "epoch": 0.34, "learning_rate": 9.50172906165407e-06, "logits/chosen": -1.153174877166748, "logits/rejected": -1.1499449014663696, "logps/chosen": -86.75424194335938, "logps/rejected": -98.63124084472656, "loss": 0.3534, "rewards/accuracies": 1.0, "rewards/chosen": -2.34197998046875, "rewards/margins": 0.040644168853759766, "rewards/rejected": -2.3826241493225098, "step": 1528 }, { "epoch": 0.34, "learning_rate": 9.50094879578578e-06, "logits/chosen": -0.93651282787323, "logits/rejected": -0.869143009185791, "logps/chosen": -277.51025390625, "logps/rejected": -326.1165466308594, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.6557861566543579, "rewards/margins": 6.282553195953369, "rewards/rejected": -5.626767158508301, "step": 1529 }, { "epoch": 0.34, "learning_rate": 9.500167951556392e-06, "logits/chosen": -0.746984601020813, "logits/rejected": -0.7168468832969666, "logps/chosen": -106.53248596191406, "logps/rejected": -140.88113403320312, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.4958740174770355, "rewards/margins": 4.4178009033203125, "rewards/rejected": -4.913674831390381, "step": 1530 }, { "epoch": 0.34, "learning_rate": 9.499386529066236e-06, "logits/chosen": -1.0594667196273804, "logits/rejected": -0.976036787033081, "logps/chosen": -89.28813171386719, "logps/rejected": -178.0847930908203, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.1079994440078735, "rewards/margins": 4.7303056716918945, "rewards/rejected": -5.8383049964904785, "step": 1531 }, { "epoch": 0.34, "learning_rate": 9.498604528415731e-06, "logits/chosen": -0.9933674335479736, "logits/rejected": -0.9933674335479736, "logps/chosen": -110.42433166503906, "logps/rejected": -110.42433166503906, "loss": 0.5612, "rewards/accuracies": 0.0, "rewards/chosen": -3.4738709926605225, "rewards/margins": 0.0, "rewards/rejected": -3.4738709926605225, "step": 1532 }, { "epoch": 0.34, "learning_rate": 9.497821949705356e-06, "logits/chosen": -1.0976982116699219, "logits/rejected": -1.0157185792922974, "logps/chosen": -83.33372497558594, "logps/rejected": -67.21827697753906, "loss": 0.0426, "rewards/accuracies": 1.0, "rewards/chosen": 0.8989914059638977, "rewards/margins": 3.905991792678833, "rewards/rejected": -3.00700044631958, "step": 1533 }, { "epoch": 0.34, "learning_rate": 9.497038793035674e-06, "logits/chosen": -1.1230558156967163, "logits/rejected": -1.1187559366226196, "logps/chosen": -225.02975463867188, "logps/rejected": -153.76905822753906, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -1.0864410400390625, "rewards/margins": 3.1779847145080566, "rewards/rejected": -4.264425754547119, "step": 1534 }, { "epoch": 0.34, "learning_rate": 9.496255058507318e-06, "logits/chosen": -0.6983203291893005, "logits/rejected": -0.6398893594741821, "logps/chosen": -149.19427490234375, "logps/rejected": -257.71746826171875, "loss": 0.0418, "rewards/accuracies": 1.0, "rewards/chosen": 1.8828048706054688, "rewards/margins": 2.444197177886963, "rewards/rejected": -0.5613922476768494, "step": 1535 }, { "epoch": 0.34, "learning_rate": 9.495470746220995e-06, "logits/chosen": -1.045157551765442, "logits/rejected": -1.0623762607574463, "logps/chosen": -145.00662231445312, "logps/rejected": -178.322998046875, "loss": 0.347, "rewards/accuracies": 1.0, "rewards/chosen": -1.0847625732421875, "rewards/margins": 7.077181816101074, "rewards/rejected": -8.161944389343262, "step": 1536 }, { "epoch": 0.34, "learning_rate": 9.494685856277488e-06, "logits/chosen": -0.8857540488243103, "logits/rejected": -0.8973532915115356, "logps/chosen": -138.7845916748047, "logps/rejected": -203.3455810546875, "loss": 0.167, "rewards/accuracies": 1.0, "rewards/chosen": -5.640191078186035, "rewards/margins": 0.9247593879699707, "rewards/rejected": -6.564950466156006, "step": 1537 }, { "epoch": 0.34, "learning_rate": 9.493900388777654e-06, "logits/chosen": -1.125348687171936, "logits/rejected": -0.8609938025474548, "logps/chosen": -78.96554565429688, "logps/rejected": -350.0848083496094, "loss": 0.197, "rewards/accuracies": 1.0, "rewards/chosen": -1.5487568378448486, "rewards/margins": 27.191394805908203, "rewards/rejected": -28.74015235900879, "step": 1538 }, { "epoch": 0.34, "learning_rate": 9.493114343822422e-06, "logits/chosen": -1.150901436805725, "logits/rejected": -1.168503999710083, "logps/chosen": -188.91720581054688, "logps/rejected": -185.83465576171875, "loss": 0.0606, "rewards/accuracies": 1.0, "rewards/chosen": 1.7767364978790283, "rewards/margins": 9.577338218688965, "rewards/rejected": -7.800601959228516, "step": 1539 }, { "epoch": 0.34, "learning_rate": 9.4923277215128e-06, "logits/chosen": -1.2438931465148926, "logits/rejected": -1.2740217447280884, "logps/chosen": -151.67144775390625, "logps/rejected": -209.5793914794922, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 2.729656934738159, "rewards/margins": 5.910672187805176, "rewards/rejected": -3.1810150146484375, "step": 1540 }, { "epoch": 0.34, "learning_rate": 9.491540521949862e-06, "logits/chosen": -0.9377045631408691, "logits/rejected": -0.9396326541900635, "logps/chosen": -159.58517456054688, "logps/rejected": -160.75534057617188, "loss": 0.3506, "rewards/accuracies": 0.0, "rewards/chosen": -2.9712586402893066, "rewards/margins": -0.01172184944152832, "rewards/rejected": -2.9595367908477783, "step": 1541 }, { "epoch": 0.34, "learning_rate": 9.490752745234767e-06, "logits/chosen": -1.2635380029678345, "logits/rejected": -1.2984355688095093, "logps/chosen": -141.3357696533203, "logps/rejected": -93.1910629272461, "loss": 1.4219, "rewards/accuracies": 0.0, "rewards/chosen": -3.5291383266448975, "rewards/margins": -2.3311119079589844, "rewards/rejected": -1.1980262994766235, "step": 1542 }, { "epoch": 0.34, "learning_rate": 9.489964391468739e-06, "logits/chosen": -1.0636522769927979, "logits/rejected": -1.0944989919662476, "logps/chosen": -67.10946655273438, "logps/rejected": -108.35881805419922, "loss": 0.2131, "rewards/accuracies": 1.0, "rewards/chosen": -0.9092899560928345, "rewards/margins": 0.6320956945419312, "rewards/rejected": -1.5413856506347656, "step": 1543 }, { "epoch": 0.34, "learning_rate": 9.48917546075308e-06, "logits/chosen": -0.7800049781799316, "logits/rejected": -0.6809399724006653, "logps/chosen": -161.23455810546875, "logps/rejected": -101.7829818725586, "loss": 0.4985, "rewards/accuracies": 0.0, "rewards/chosen": -3.919034719467163, "rewards/margins": -0.4720451831817627, "rewards/rejected": -3.4469895362854004, "step": 1544 }, { "epoch": 0.34, "learning_rate": 9.488385953189165e-06, "logits/chosen": -1.3690778017044067, "logits/rejected": -1.367830514907837, "logps/chosen": -85.51560974121094, "logps/rejected": -66.68148040771484, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": 0.4721122682094574, "rewards/margins": 4.307258129119873, "rewards/rejected": -3.8351457118988037, "step": 1545 }, { "epoch": 0.34, "learning_rate": 9.487595868878447e-06, "logits/chosen": -0.9625514149665833, "logits/rejected": -0.9625514149665833, "logps/chosen": -83.928955078125, "logps/rejected": -83.928955078125, "loss": 0.3657, "rewards/accuracies": 0.0, "rewards/chosen": -0.16641007363796234, "rewards/margins": 0.0, "rewards/rejected": -0.16641007363796234, "step": 1546 }, { "epoch": 0.34, "learning_rate": 9.486805207922445e-06, "logits/chosen": -0.7175914645195007, "logits/rejected": -0.623634934425354, "logps/chosen": -69.29346466064453, "logps/rejected": -163.19134521484375, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -0.9874233603477478, "rewards/margins": 4.469708442687988, "rewards/rejected": -5.457131862640381, "step": 1547 }, { "epoch": 0.34, "learning_rate": 9.486013970422762e-06, "logits/chosen": -0.9220549464225769, "logits/rejected": -1.1655229330062866, "logps/chosen": -153.232421875, "logps/rejected": -204.81875610351562, "loss": 0.4589, "rewards/accuracies": 0.0, "rewards/chosen": -1.8842391967773438, "rewards/margins": -0.40446925163269043, "rewards/rejected": -1.4797699451446533, "step": 1548 }, { "epoch": 0.34, "learning_rate": 9.485222156481067e-06, "logits/chosen": -0.9811784029006958, "logits/rejected": -0.9929982423782349, "logps/chosen": -123.65531921386719, "logps/rejected": -145.74871826171875, "loss": 0.3587, "rewards/accuracies": 1.0, "rewards/chosen": -0.689288318157196, "rewards/margins": 0.5395340323448181, "rewards/rejected": -1.2288223505020142, "step": 1549 }, { "epoch": 0.34, "learning_rate": 9.484429766199107e-06, "logits/chosen": -1.126783013343811, "logits/rejected": -1.117297887802124, "logps/chosen": -129.5536346435547, "logps/rejected": -168.90347290039062, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": 0.103546142578125, "rewards/margins": 2.8870224952697754, "rewards/rejected": -2.7834763526916504, "step": 1550 }, { "epoch": 0.34, "learning_rate": 9.483636799678703e-06, "logits/chosen": -1.2158865928649902, "logits/rejected": -1.2102258205413818, "logps/chosen": -169.57791137695312, "logps/rejected": -184.55413818359375, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": 2.113481283187866, "rewards/margins": 7.4734697341918945, "rewards/rejected": -5.359988689422607, "step": 1551 }, { "epoch": 0.34, "learning_rate": 9.482843257021747e-06, "logits/chosen": -0.9784243106842041, "logits/rejected": -1.0323525667190552, "logps/chosen": -190.27552795410156, "logps/rejected": -241.7242889404297, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 1.848078966140747, "rewards/margins": 6.262399673461914, "rewards/rejected": -4.414320468902588, "step": 1552 }, { "epoch": 0.34, "learning_rate": 9.48204913833021e-06, "logits/chosen": -1.0566331148147583, "logits/rejected": -1.0208057165145874, "logps/chosen": -241.49855041503906, "logps/rejected": -168.5585479736328, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 2.0266952514648438, "rewards/margins": 7.4034929275512695, "rewards/rejected": -5.376797676086426, "step": 1553 }, { "epoch": 0.34, "learning_rate": 9.481254443706133e-06, "logits/chosen": -0.8668561577796936, "logits/rejected": -0.8608423471450806, "logps/chosen": -81.10000610351562, "logps/rejected": -115.1561279296875, "loss": 0.12, "rewards/accuracies": 1.0, "rewards/chosen": -1.6127808094024658, "rewards/margins": 2.031517744064331, "rewards/rejected": -3.644298553466797, "step": 1554 }, { "epoch": 0.34, "learning_rate": 9.480459173251634e-06, "logits/chosen": -1.1186907291412354, "logits/rejected": -1.1531028747558594, "logps/chosen": -148.23854064941406, "logps/rejected": -109.77940368652344, "loss": 2.1337, "rewards/accuracies": 0.0, "rewards/chosen": -3.319234609603882, "rewards/margins": -3.5457308292388916, "rewards/rejected": 0.2264961302280426, "step": 1555 }, { "epoch": 0.34, "learning_rate": 9.4796633270689e-06, "logits/chosen": -1.03429114818573, "logits/rejected": -0.6156960725784302, "logps/chosen": -74.59660339355469, "logps/rejected": -441.24371337890625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.4079940915107727, "rewards/margins": 24.836292266845703, "rewards/rejected": -25.244285583496094, "step": 1556 }, { "epoch": 0.34, "learning_rate": 9.478866905260198e-06, "logits/chosen": -0.8964347243309021, "logits/rejected": -0.8954964280128479, "logps/chosen": -81.40109252929688, "logps/rejected": -30.726802825927734, "loss": 0.5474, "rewards/accuracies": 0.0, "rewards/chosen": -1.7784637212753296, "rewards/margins": -0.6872897148132324, "rewards/rejected": -1.0911740064620972, "step": 1557 }, { "epoch": 0.34, "learning_rate": 9.478069907927867e-06, "logits/chosen": -1.0424764156341553, "logits/rejected": -0.9851114153862, "logps/chosen": -98.13689422607422, "logps/rejected": -126.36162567138672, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -1.3903656005859375, "rewards/margins": 2.9613490104675293, "rewards/rejected": -4.351714611053467, "step": 1558 }, { "epoch": 0.35, "learning_rate": 9.477272335174315e-06, "logits/chosen": -0.8243443369865417, "logits/rejected": -0.8243443369865417, "logps/chosen": -125.13851928710938, "logps/rejected": -125.13851928710938, "loss": 0.3481, "rewards/accuracies": 0.0, "rewards/chosen": -3.142672061920166, "rewards/margins": 0.0, "rewards/rejected": -3.142672061920166, "step": 1559 }, { "epoch": 0.35, "learning_rate": 9.476474187102033e-06, "logits/chosen": -1.0594168901443481, "logits/rejected": -1.0518995523452759, "logps/chosen": -252.61956787109375, "logps/rejected": -351.6239013671875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 3.1564698219299316, "rewards/margins": 6.098788261413574, "rewards/rejected": -2.9423186779022217, "step": 1560 }, { "epoch": 0.35, "learning_rate": 9.475675463813578e-06, "logits/chosen": -0.8978080153465271, "logits/rejected": -0.8261663317680359, "logps/chosen": -151.29859924316406, "logps/rejected": -170.01303100585938, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": 0.4295822083950043, "rewards/margins": 7.925433158874512, "rewards/rejected": -7.495851039886475, "step": 1561 }, { "epoch": 0.35, "learning_rate": 9.474876165411586e-06, "logits/chosen": -0.8701738119125366, "logits/rejected": -0.858936071395874, "logps/chosen": -77.69174194335938, "logps/rejected": -83.24649047851562, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": 0.6726341247558594, "rewards/margins": 2.1352319717407227, "rewards/rejected": -1.4625977277755737, "step": 1562 }, { "epoch": 0.35, "learning_rate": 9.474076291998765e-06, "logits/chosen": -1.1698049306869507, "logits/rejected": -1.1668082475662231, "logps/chosen": -73.94896697998047, "logps/rejected": -121.25773620605469, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 0.7963447570800781, "rewards/margins": 4.237717628479004, "rewards/rejected": -3.4413726329803467, "step": 1563 }, { "epoch": 0.35, "learning_rate": 9.473275843677893e-06, "logits/chosen": -0.8012800216674805, "logits/rejected": -0.7805424928665161, "logps/chosen": -67.18186950683594, "logps/rejected": -151.53341674804688, "loss": 0.0716, "rewards/accuracies": 1.0, "rewards/chosen": -1.2578140497207642, "rewards/margins": 1.871200680732727, "rewards/rejected": -3.129014730453491, "step": 1564 }, { "epoch": 0.35, "learning_rate": 9.472474820551831e-06, "logits/chosen": -0.7775875926017761, "logits/rejected": -0.7849332094192505, "logps/chosen": -96.43386840820312, "logps/rejected": -69.26911926269531, "loss": 0.7157, "rewards/accuracies": 0.0, "rewards/chosen": -3.674546957015991, "rewards/margins": -1.1579036712646484, "rewards/rejected": -2.5166432857513428, "step": 1565 }, { "epoch": 0.35, "learning_rate": 9.471673222723506e-06, "logits/chosen": -1.1696343421936035, "logits/rejected": -1.1352299451828003, "logps/chosen": -77.34786224365234, "logps/rejected": -142.64968872070312, "loss": 0.4684, "rewards/accuracies": 0.0, "rewards/chosen": -1.682230830192566, "rewards/margins": -0.43936121463775635, "rewards/rejected": -1.2428696155548096, "step": 1566 }, { "epoch": 0.35, "learning_rate": 9.47087105029592e-06, "logits/chosen": -0.5832646489143372, "logits/rejected": -0.5832646489143372, "logps/chosen": -61.155731201171875, "logps/rejected": -61.155731201171875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -2.2001569271087646, "rewards/margins": 0.0, "rewards/rejected": -2.2001569271087646, "step": 1567 }, { "epoch": 0.35, "learning_rate": 9.470068303372153e-06, "logits/chosen": -0.9597194790840149, "logits/rejected": -0.9379174113273621, "logps/chosen": -82.11683654785156, "logps/rejected": -128.7804412841797, "loss": 0.0609, "rewards/accuracies": 1.0, "rewards/chosen": -1.0673630237579346, "rewards/margins": 3.6981194019317627, "rewards/rejected": -4.765482425689697, "step": 1568 }, { "epoch": 0.35, "learning_rate": 9.469264982055355e-06, "logits/chosen": -0.8050518035888672, "logits/rejected": -0.8050518035888672, "logps/chosen": -115.25560760498047, "logps/rejected": -115.25560760498047, "loss": 1.1665, "rewards/accuracies": 0.0, "rewards/chosen": -3.1780097484588623, "rewards/margins": 0.0, "rewards/rejected": -3.1780097484588623, "step": 1569 }, { "epoch": 0.35, "learning_rate": 9.46846108644875e-06, "logits/chosen": -0.7280643582344055, "logits/rejected": -0.667761504650116, "logps/chosen": -154.89096069335938, "logps/rejected": -60.75886917114258, "loss": 0.2413, "rewards/accuracies": 1.0, "rewards/chosen": -2.1864044666290283, "rewards/margins": 0.5447726249694824, "rewards/rejected": -2.7311770915985107, "step": 1570 }, { "epoch": 0.35, "learning_rate": 9.467656616655636e-06, "logits/chosen": -0.7669768333435059, "logits/rejected": -0.6581694483757019, "logps/chosen": -200.2459716796875, "logps/rejected": -291.77728271484375, "loss": 0.4286, "rewards/accuracies": 1.0, "rewards/chosen": 3.619642734527588, "rewards/margins": 6.8500566482543945, "rewards/rejected": -3.2304139137268066, "step": 1571 }, { "epoch": 0.35, "learning_rate": 9.466851572779388e-06, "logits/chosen": -0.6090992093086243, "logits/rejected": -0.47368183732032776, "logps/chosen": -161.12823486328125, "logps/rejected": -173.77178955078125, "loss": 0.0995, "rewards/accuracies": 1.0, "rewards/chosen": 4.400674343109131, "rewards/margins": 9.674281120300293, "rewards/rejected": -5.273606777191162, "step": 1572 }, { "epoch": 0.35, "learning_rate": 9.46604595492345e-06, "logits/chosen": -0.9166020154953003, "logits/rejected": -0.9490375518798828, "logps/chosen": -158.17498779296875, "logps/rejected": -65.82414245605469, "loss": 1.5526, "rewards/accuracies": 0.0, "rewards/chosen": -5.818511962890625, "rewards/margins": -2.318157196044922, "rewards/rejected": -3.500354766845703, "step": 1573 }, { "epoch": 0.35, "learning_rate": 9.465239763191345e-06, "logits/chosen": -1.046675443649292, "logits/rejected": -1.0290838479995728, "logps/chosen": -86.4678726196289, "logps/rejected": -131.8075714111328, "loss": 0.0787, "rewards/accuracies": 1.0, "rewards/chosen": -1.4617851972579956, "rewards/margins": 1.7744804620742798, "rewards/rejected": -3.2362656593322754, "step": 1574 }, { "epoch": 0.35, "learning_rate": 9.464432997686664e-06, "logits/chosen": -0.9862299561500549, "logits/rejected": -0.9972293376922607, "logps/chosen": -149.1160125732422, "logps/rejected": -162.00474548339844, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 1.3061844110488892, "rewards/margins": 6.422621250152588, "rewards/rejected": -5.116436958312988, "step": 1575 }, { "epoch": 0.35, "learning_rate": 9.463625658513073e-06, "logits/chosen": -0.9940130114555359, "logits/rejected": -1.0924160480499268, "logps/chosen": -197.77943420410156, "logps/rejected": -40.170413970947266, "loss": 0.504, "rewards/accuracies": 1.0, "rewards/chosen": -0.47055360674858093, "rewards/margins": 0.9944087266921997, "rewards/rejected": -1.464962363243103, "step": 1576 }, { "epoch": 0.35, "learning_rate": 9.462817745774316e-06, "logits/chosen": -0.9472609162330627, "logits/rejected": -0.9808236360549927, "logps/chosen": -112.07565307617188, "logps/rejected": -162.9173126220703, "loss": 0.5206, "rewards/accuracies": 0.0, "rewards/chosen": 0.250875860452652, "rewards/margins": -0.6055037975311279, "rewards/rejected": 0.8563796877861023, "step": 1577 }, { "epoch": 0.35, "learning_rate": 9.462009259574207e-06, "logits/chosen": -0.9552527070045471, "logits/rejected": -0.9489070177078247, "logps/chosen": -81.60969543457031, "logps/rejected": -63.237083435058594, "loss": 0.677, "rewards/accuracies": 1.0, "rewards/chosen": 0.2720169126987457, "rewards/margins": 1.9533172845840454, "rewards/rejected": -1.681300401687622, "step": 1578 }, { "epoch": 0.35, "learning_rate": 9.461200200016636e-06, "logits/chosen": -1.107719898223877, "logits/rejected": -1.0631623268127441, "logps/chosen": -97.77033996582031, "logps/rejected": -145.34442138671875, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": -0.42998963594436646, "rewards/margins": 2.6932473182678223, "rewards/rejected": -3.123236894607544, "step": 1579 }, { "epoch": 0.35, "learning_rate": 9.460390567205562e-06, "logits/chosen": -0.5800267457962036, "logits/rejected": -0.5800267457962036, "logps/chosen": -166.2445831298828, "logps/rejected": -166.2445831298828, "loss": 0.347, "rewards/accuracies": 0.0, "rewards/chosen": -4.468116283416748, "rewards/margins": 0.0, "rewards/rejected": -4.468116283416748, "step": 1580 }, { "epoch": 0.35, "learning_rate": 9.459580361245024e-06, "logits/chosen": -1.136610984802246, "logits/rejected": -1.1210325956344604, "logps/chosen": -189.10836791992188, "logps/rejected": -226.00230407714844, "loss": 0.1945, "rewards/accuracies": 1.0, "rewards/chosen": -5.323220729827881, "rewards/margins": 0.7445087432861328, "rewards/rejected": -6.067729473114014, "step": 1581 }, { "epoch": 0.35, "learning_rate": 9.458769582239128e-06, "logits/chosen": -1.142569899559021, "logits/rejected": -1.0120081901550293, "logps/chosen": -235.57545471191406, "logps/rejected": -257.57000732421875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.6792526245117188, "rewards/margins": 6.716716289520264, "rewards/rejected": -6.037463665008545, "step": 1582 }, { "epoch": 0.35, "learning_rate": 9.457958230292061e-06, "logits/chosen": -1.3526475429534912, "logits/rejected": -1.364588975906372, "logps/chosen": -95.18114471435547, "logps/rejected": -57.847015380859375, "loss": 0.0604, "rewards/accuracies": 1.0, "rewards/chosen": -1.4416801929473877, "rewards/margins": 2.066575288772583, "rewards/rejected": -3.5082554817199707, "step": 1583 }, { "epoch": 0.35, "learning_rate": 9.457146305508078e-06, "logits/chosen": -0.9882819056510925, "logits/rejected": -0.9086421728134155, "logps/chosen": -188.079345703125, "logps/rejected": -345.8687744140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.7883574962615967, "rewards/margins": 8.498562812805176, "rewards/rejected": -5.710205078125, "step": 1584 }, { "epoch": 0.35, "learning_rate": 9.45633380799151e-06, "logits/chosen": -0.7981892824172974, "logits/rejected": -0.7956005334854126, "logps/chosen": -87.56404113769531, "logps/rejected": -168.75732421875, "loss": 0.5921, "rewards/accuracies": 1.0, "rewards/chosen": -0.3737655580043793, "rewards/margins": 1.481632947921753, "rewards/rejected": -1.8553985357284546, "step": 1585 }, { "epoch": 0.35, "learning_rate": 9.455520737846757e-06, "logits/chosen": -1.1130802631378174, "logits/rejected": -1.0928806066513062, "logps/chosen": -86.05783081054688, "logps/rejected": -44.734039306640625, "loss": 0.1253, "rewards/accuracies": 1.0, "rewards/chosen": 0.18095551431179047, "rewards/margins": 2.4233908653259277, "rewards/rejected": -2.2424354553222656, "step": 1586 }, { "epoch": 0.35, "learning_rate": 9.454707095178304e-06, "logits/chosen": -0.8332147598266602, "logits/rejected": -0.836115300655365, "logps/chosen": -51.20918273925781, "logps/rejected": -51.824546813964844, "loss": 0.3154, "rewards/accuracies": 1.0, "rewards/chosen": -2.7003002166748047, "rewards/margins": 0.2650315761566162, "rewards/rejected": -2.965331792831421, "step": 1587 }, { "epoch": 0.35, "learning_rate": 9.453892880090696e-06, "logits/chosen": -0.8010882139205933, "logits/rejected": -0.8030517101287842, "logps/chosen": -55.3057746887207, "logps/rejected": -52.20793914794922, "loss": 0.1061, "rewards/accuracies": 1.0, "rewards/chosen": -1.0408257246017456, "rewards/margins": 1.529105305671692, "rewards/rejected": -2.5699310302734375, "step": 1588 }, { "epoch": 0.35, "learning_rate": 9.45307809268856e-06, "logits/chosen": -0.9024093747138977, "logits/rejected": -0.8855472803115845, "logps/chosen": -104.69705200195312, "logps/rejected": -93.18437194824219, "loss": 0.4326, "rewards/accuracies": 0.0, "rewards/chosen": -0.90865558385849, "rewards/margins": -0.31872713565826416, "rewards/rejected": -0.5899284482002258, "step": 1589 }, { "epoch": 0.35, "learning_rate": 9.452262733076594e-06, "logits/chosen": -0.8235868215560913, "logits/rejected": -0.7776681780815125, "logps/chosen": -105.55935668945312, "logps/rejected": -100.94298553466797, "loss": 0.1185, "rewards/accuracies": 1.0, "rewards/chosen": -1.6012871265411377, "rewards/margins": 1.702277421951294, "rewards/rejected": -3.3035645484924316, "step": 1590 }, { "epoch": 0.35, "learning_rate": 9.45144680135957e-06, "logits/chosen": -1.1145085096359253, "logits/rejected": -1.1205297708511353, "logps/chosen": -81.96046447753906, "logps/rejected": -62.05401611328125, "loss": 0.0858, "rewards/accuracies": 1.0, "rewards/chosen": 1.1400917768478394, "rewards/margins": 3.969604015350342, "rewards/rejected": -2.829512119293213, "step": 1591 }, { "epoch": 0.35, "learning_rate": 9.450630297642334e-06, "logits/chosen": -0.849032998085022, "logits/rejected": -0.7508974671363831, "logps/chosen": -106.17665100097656, "logps/rejected": -351.21820068359375, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -2.504795789718628, "rewards/margins": 6.44685173034668, "rewards/rejected": -8.951647758483887, "step": 1592 }, { "epoch": 0.35, "learning_rate": 9.449813222029802e-06, "logits/chosen": -0.6082505583763123, "logits/rejected": -0.6061916351318359, "logps/chosen": -127.85711669921875, "logps/rejected": -131.6591796875, "loss": 1.0838, "rewards/accuracies": 0.0, "rewards/chosen": -3.861677646636963, "rewards/margins": -1.0687310695648193, "rewards/rejected": -2.7929465770721436, "step": 1593 }, { "epoch": 0.35, "learning_rate": 9.448995574626969e-06, "logits/chosen": -0.9187115430831909, "logits/rejected": -0.9249910116195679, "logps/chosen": -227.0554656982422, "logps/rejected": -74.7441635131836, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": 2.9795303344726562, "rewards/margins": 3.2801856994628906, "rewards/rejected": -0.3006553649902344, "step": 1594 }, { "epoch": 0.35, "learning_rate": 9.448177355538899e-06, "logits/chosen": -0.7090333700180054, "logits/rejected": -0.7001578211784363, "logps/chosen": -113.05879974365234, "logps/rejected": -195.7285919189453, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": 1.1854164600372314, "rewards/margins": 3.406301259994507, "rewards/rejected": -2.2208847999572754, "step": 1595 }, { "epoch": 0.35, "learning_rate": 9.447358564870732e-06, "logits/chosen": -1.0122463703155518, "logits/rejected": -0.9541857838630676, "logps/chosen": -72.74954986572266, "logps/rejected": -216.88739013671875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.6039230227470398, "rewards/margins": 6.945253372192383, "rewards/rejected": -7.549176216125488, "step": 1596 }, { "epoch": 0.35, "learning_rate": 9.446539202727683e-06, "logits/chosen": -0.8061317801475525, "logits/rejected": -0.7997662425041199, "logps/chosen": -65.3779296875, "logps/rejected": -86.75387573242188, "loss": 0.3887, "rewards/accuracies": 1.0, "rewards/chosen": -1.5022248029708862, "rewards/margins": 0.24908208847045898, "rewards/rejected": -1.7513068914413452, "step": 1597 }, { "epoch": 0.35, "learning_rate": 9.445719269215032e-06, "logits/chosen": -0.7705632448196411, "logits/rejected": -0.7963252663612366, "logps/chosen": -78.2232437133789, "logps/rejected": -75.86505126953125, "loss": 0.1735, "rewards/accuracies": 1.0, "rewards/chosen": 0.5936309695243835, "rewards/margins": 0.8809211254119873, "rewards/rejected": -0.28729018568992615, "step": 1598 }, { "epoch": 0.35, "learning_rate": 9.444898764438144e-06, "logits/chosen": -0.7776590585708618, "logits/rejected": -0.790346622467041, "logps/chosen": -36.31366729736328, "logps/rejected": -58.76380920410156, "loss": 0.1635, "rewards/accuracies": 1.0, "rewards/chosen": -2.2975404262542725, "rewards/margins": 0.978757381439209, "rewards/rejected": -3.2762978076934814, "step": 1599 }, { "epoch": 0.35, "learning_rate": 9.444077688502451e-06, "logits/chosen": -1.0768226385116577, "logits/rejected": -1.0647083520889282, "logps/chosen": -150.1111602783203, "logps/rejected": -168.8863067626953, "loss": 0.9892, "rewards/accuracies": 0.0, "rewards/chosen": -0.6931869387626648, "rewards/margins": -1.0034821033477783, "rewards/rejected": 0.31029510498046875, "step": 1600 }, { "epoch": 0.35, "learning_rate": 9.443256041513457e-06, "logits/chosen": -1.1425769329071045, "logits/rejected": -1.083570122718811, "logps/chosen": -88.53317260742188, "logps/rejected": -219.5684814453125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.713513195514679, "rewards/margins": 5.822256565093994, "rewards/rejected": -6.535769939422607, "step": 1601 }, { "epoch": 0.35, "learning_rate": 9.442433823576741e-06, "logits/chosen": -0.8754118084907532, "logits/rejected": -0.8633473515510559, "logps/chosen": -117.11976623535156, "logps/rejected": -194.6464080810547, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 1.1421951055526733, "rewards/margins": 6.487829685211182, "rewards/rejected": -5.345634460449219, "step": 1602 }, { "epoch": 0.35, "learning_rate": 9.441611034797961e-06, "logits/chosen": -1.0010002851486206, "logits/rejected": -0.9920565485954285, "logps/chosen": -111.69525146484375, "logps/rejected": -171.29202270507812, "loss": 0.0479, "rewards/accuracies": 1.0, "rewards/chosen": 0.9242607355117798, "rewards/margins": 2.946343421936035, "rewards/rejected": -2.022082567214966, "step": 1603 }, { "epoch": 0.36, "learning_rate": 9.44078767528284e-06, "logits/chosen": -1.3931456804275513, "logits/rejected": -1.411421298980713, "logps/chosen": -205.83447265625, "logps/rejected": -303.994140625, "loss": 0.7079, "rewards/accuracies": 1.0, "rewards/chosen": 0.6026825308799744, "rewards/margins": 2.9591400623321533, "rewards/rejected": -2.356457471847534, "step": 1604 }, { "epoch": 0.36, "learning_rate": 9.439963745137177e-06, "logits/chosen": -1.0708450078964233, "logits/rejected": -1.035612940788269, "logps/chosen": -155.35092163085938, "logps/rejected": -138.49362182617188, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": 0.3656570613384247, "rewards/margins": 4.954433917999268, "rewards/rejected": -4.5887770652771, "step": 1605 }, { "epoch": 0.36, "learning_rate": 9.439139244466847e-06, "logits/chosen": -0.8125637769699097, "logits/rejected": -0.8718882203102112, "logps/chosen": -194.4771728515625, "logps/rejected": -174.2197265625, "loss": 0.4915, "rewards/accuracies": 1.0, "rewards/chosen": 2.1253249645233154, "rewards/margins": 2.235072374343872, "rewards/rejected": -0.10974731296300888, "step": 1606 }, { "epoch": 0.36, "learning_rate": 9.438314173377796e-06, "logits/chosen": -0.8734812140464783, "logits/rejected": -0.8162124752998352, "logps/chosen": -117.12493133544922, "logps/rejected": -167.08151245117188, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": 0.3148964047431946, "rewards/margins": 5.433282852172852, "rewards/rejected": -5.118386268615723, "step": 1607 }, { "epoch": 0.36, "learning_rate": 9.437488531976042e-06, "logits/chosen": -0.7945663332939148, "logits/rejected": -0.8273602724075317, "logps/chosen": -222.75439453125, "logps/rejected": -291.6790771484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 2.3656938076019287, "rewards/margins": 10.340295791625977, "rewards/rejected": -7.974601745605469, "step": 1608 }, { "epoch": 0.36, "learning_rate": 9.43666232036768e-06, "logits/chosen": -1.0669585466384888, "logits/rejected": -1.0702241659164429, "logps/chosen": -101.8243179321289, "logps/rejected": -47.679725646972656, "loss": 0.0603, "rewards/accuracies": 1.0, "rewards/chosen": -0.6455757021903992, "rewards/margins": 2.0946273803710938, "rewards/rejected": -2.7402031421661377, "step": 1609 }, { "epoch": 0.36, "learning_rate": 9.435835538658873e-06, "logits/chosen": -0.814821183681488, "logits/rejected": -0.7556841969490051, "logps/chosen": -241.4263153076172, "logps/rejected": -318.69879150390625, "loss": 0.7775, "rewards/accuracies": 0.0, "rewards/chosen": -1.7141830921173096, "rewards/margins": -1.317195177078247, "rewards/rejected": -0.3969879150390625, "step": 1610 }, { "epoch": 0.36, "learning_rate": 9.435008186955866e-06, "logits/chosen": -0.8496444821357727, "logits/rejected": -0.5268576741218567, "logps/chosen": -136.6253662109375, "logps/rejected": -480.5871276855469, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.03844909742474556, "rewards/margins": 36.18730926513672, "rewards/rejected": -36.22575759887695, "step": 1611 }, { "epoch": 0.36, "learning_rate": 9.434180265364965e-06, "logits/chosen": -1.038144826889038, "logits/rejected": -1.0126855373382568, "logps/chosen": -233.96817016601562, "logps/rejected": -201.4745330810547, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 2.8068573474884033, "rewards/margins": 9.525291442871094, "rewards/rejected": -6.7184343338012695, "step": 1612 }, { "epoch": 0.36, "learning_rate": 9.43335177399256e-06, "logits/chosen": -0.9147763848304749, "logits/rejected": -0.917668342590332, "logps/chosen": -68.55668640136719, "logps/rejected": -81.73932647705078, "loss": 0.0711, "rewards/accuracies": 1.0, "rewards/chosen": 1.4929672479629517, "rewards/margins": 2.025331974029541, "rewards/rejected": -0.5323646664619446, "step": 1613 }, { "epoch": 0.36, "learning_rate": 9.432522712945111e-06, "logits/chosen": -0.7960394024848938, "logits/rejected": -0.7228984832763672, "logps/chosen": -165.69403076171875, "logps/rejected": -421.4211730957031, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": 0.3336441218852997, "rewards/margins": 30.23619270324707, "rewards/rejected": -29.90254783630371, "step": 1614 }, { "epoch": 0.36, "learning_rate": 9.43169308232915e-06, "logits/chosen": -1.2863789796829224, "logits/rejected": -1.1709882020950317, "logps/chosen": -178.16751098632812, "logps/rejected": -225.64715576171875, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -0.15523071587085724, "rewards/margins": 2.7582504749298096, "rewards/rejected": -2.9134812355041504, "step": 1615 }, { "epoch": 0.36, "learning_rate": 9.430862882251279e-06, "logits/chosen": -1.0796269178390503, "logits/rejected": -1.1354966163635254, "logps/chosen": -176.651123046875, "logps/rejected": -145.86781311035156, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 1.0715789794921875, "rewards/margins": 5.869020938873291, "rewards/rejected": -4.7974419593811035, "step": 1616 }, { "epoch": 0.36, "learning_rate": 9.430032112818182e-06, "logits/chosen": -0.9206587672233582, "logits/rejected": -0.9139670729637146, "logps/chosen": -128.99002075195312, "logps/rejected": -138.95692443847656, "loss": 0.4501, "rewards/accuracies": 0.0, "rewards/chosen": -0.6516342163085938, "rewards/margins": -0.3783676028251648, "rewards/rejected": -0.27326661348342896, "step": 1617 }, { "epoch": 0.36, "learning_rate": 9.429200774136603e-06, "logits/chosen": -0.7633030414581299, "logits/rejected": -0.6724876165390015, "logps/chosen": -214.31610107421875, "logps/rejected": -302.31243896484375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 0.06323852390050888, "rewards/margins": 8.563868522644043, "rewards/rejected": -8.500630378723145, "step": 1618 }, { "epoch": 0.36, "learning_rate": 9.428368866313377e-06, "logits/chosen": -0.9837390184402466, "logits/rejected": -0.9679548144340515, "logps/chosen": -179.2528533935547, "logps/rejected": -257.1058044433594, "loss": 0.104, "rewards/accuracies": 1.0, "rewards/chosen": 1.4617751836776733, "rewards/margins": 1.4666732549667358, "rewards/rejected": -0.0048980712890625, "step": 1619 }, { "epoch": 0.36, "learning_rate": 9.427536389455394e-06, "logits/chosen": -1.0525832176208496, "logits/rejected": -1.0507385730743408, "logps/chosen": -154.63258361816406, "logps/rejected": -132.14443969726562, "loss": 0.7423, "rewards/accuracies": 1.0, "rewards/chosen": 0.6466049551963806, "rewards/margins": 2.40289306640625, "rewards/rejected": -1.7562881708145142, "step": 1620 }, { "epoch": 0.36, "learning_rate": 9.426703343669631e-06, "logits/chosen": -0.9406639933586121, "logits/rejected": -0.9352217316627502, "logps/chosen": -67.25804138183594, "logps/rejected": -35.64798355102539, "loss": 1.0827, "rewards/accuracies": 0.0, "rewards/chosen": -3.0093834400177, "rewards/margins": -0.47285032272338867, "rewards/rejected": -2.5365331172943115, "step": 1621 }, { "epoch": 0.36, "learning_rate": 9.425869729063129e-06, "logits/chosen": -0.8280796408653259, "logits/rejected": -0.8280796408653259, "logps/chosen": -167.84861755371094, "logps/rejected": -167.84861755371094, "loss": 0.3489, "rewards/accuracies": 0.0, "rewards/chosen": -5.3082194328308105, "rewards/margins": 0.0, "rewards/rejected": -5.3082194328308105, "step": 1622 }, { "epoch": 0.36, "learning_rate": 9.425035545743005e-06, "logits/chosen": -0.836402177810669, "logits/rejected": -0.8724904656410217, "logps/chosen": -154.61154174804688, "logps/rejected": -167.7050018310547, "loss": 0.2112, "rewards/accuracies": 1.0, "rewards/chosen": -5.540928840637207, "rewards/margins": 1.2866902351379395, "rewards/rejected": -6.8276190757751465, "step": 1623 }, { "epoch": 0.36, "learning_rate": 9.424200793816451e-06, "logits/chosen": -1.1076658964157104, "logits/rejected": -1.1733226776123047, "logps/chosen": -222.21334838867188, "logps/rejected": -71.1966323852539, "loss": 0.1434, "rewards/accuracies": 1.0, "rewards/chosen": 0.16478577256202698, "rewards/margins": 3.041300058364868, "rewards/rejected": -2.876514196395874, "step": 1624 }, { "epoch": 0.36, "learning_rate": 9.423365473390734e-06, "logits/chosen": -0.9911726117134094, "logits/rejected": -1.006150484085083, "logps/chosen": -72.6797866821289, "logps/rejected": -119.59651184082031, "loss": 0.145, "rewards/accuracies": 1.0, "rewards/chosen": -0.971308171749115, "rewards/margins": 1.0923941135406494, "rewards/rejected": -2.063702344894409, "step": 1625 }, { "epoch": 0.36, "learning_rate": 9.422529584573183e-06, "logits/chosen": -0.8974795341491699, "logits/rejected": -0.8974795341491699, "logps/chosen": -77.58773803710938, "logps/rejected": -77.58773803710938, "loss": 0.3565, "rewards/accuracies": 0.0, "rewards/chosen": -4.861292362213135, "rewards/margins": 0.0, "rewards/rejected": -4.861292362213135, "step": 1626 }, { "epoch": 0.36, "learning_rate": 9.421693127471214e-06, "logits/chosen": -0.729788601398468, "logits/rejected": -0.700085461139679, "logps/chosen": -91.99110412597656, "logps/rejected": -113.76085662841797, "loss": 0.0335, "rewards/accuracies": 1.0, "rewards/chosen": -0.31217193603515625, "rewards/margins": 2.674058675765991, "rewards/rejected": -2.9862306118011475, "step": 1627 }, { "epoch": 0.36, "learning_rate": 9.420856102192305e-06, "logits/chosen": -1.2230366468429565, "logits/rejected": -1.228567123413086, "logps/chosen": -108.32380676269531, "logps/rejected": -139.87698364257812, "loss": 0.2847, "rewards/accuracies": 1.0, "rewards/chosen": -1.5053497552871704, "rewards/margins": 0.2651550769805908, "rewards/rejected": -1.7705048322677612, "step": 1628 }, { "epoch": 0.36, "learning_rate": 9.420018508844017e-06, "logits/chosen": -0.7007690668106079, "logits/rejected": -0.7007690668106079, "logps/chosen": -104.51997375488281, "logps/rejected": -104.51997375488281, "loss": 0.3723, "rewards/accuracies": 0.0, "rewards/chosen": -4.319996356964111, "rewards/margins": 0.0, "rewards/rejected": -4.319996356964111, "step": 1629 }, { "epoch": 0.36, "learning_rate": 9.419180347533976e-06, "logits/chosen": -0.9974069595336914, "logits/rejected": -0.8505651950836182, "logps/chosen": -205.6595916748047, "logps/rejected": -283.26904296875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 1.4538040161132812, "rewards/margins": 5.7428879737854, "rewards/rejected": -4.289083957672119, "step": 1630 }, { "epoch": 0.36, "learning_rate": 9.418341618369882e-06, "logits/chosen": -0.7554024457931519, "logits/rejected": -0.7401206493377686, "logps/chosen": -218.7010498046875, "logps/rejected": -243.136962890625, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": 0.6971511840820312, "rewards/margins": 8.811059951782227, "rewards/rejected": -8.113908767700195, "step": 1631 }, { "epoch": 0.36, "learning_rate": 9.417502321459513e-06, "logits/chosen": -1.1095244884490967, "logits/rejected": -1.0795602798461914, "logps/chosen": -83.41191101074219, "logps/rejected": -129.30038452148438, "loss": 0.0947, "rewards/accuracies": 1.0, "rewards/chosen": -1.8002967834472656, "rewards/margins": 1.6791298389434814, "rewards/rejected": -3.479426622390747, "step": 1632 }, { "epoch": 0.36, "learning_rate": 9.416662456910714e-06, "logits/chosen": -1.061968445777893, "logits/rejected": -1.0449833869934082, "logps/chosen": -101.16267395019531, "logps/rejected": -173.14752197265625, "loss": 0.3173, "rewards/accuracies": 1.0, "rewards/chosen": 0.8217933773994446, "rewards/margins": 0.40875932574272156, "rewards/rejected": 0.413034051656723, "step": 1633 }, { "epoch": 0.36, "learning_rate": 9.415822024831407e-06, "logits/chosen": -0.8969511389732361, "logits/rejected": -0.8582216501235962, "logps/chosen": -78.82904052734375, "logps/rejected": -58.589054107666016, "loss": 0.0672, "rewards/accuracies": 1.0, "rewards/chosen": -0.599273681640625, "rewards/margins": 2.2546489238739014, "rewards/rejected": -2.8539226055145264, "step": 1634 }, { "epoch": 0.36, "learning_rate": 9.414981025329585e-06, "logits/chosen": -0.9085407257080078, "logits/rejected": -0.7914305329322815, "logps/chosen": -165.3707275390625, "logps/rejected": -226.45851135253906, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": 1.5386199951171875, "rewards/margins": 6.522088527679443, "rewards/rejected": -4.983468532562256, "step": 1635 }, { "epoch": 0.36, "learning_rate": 9.414139458513316e-06, "logits/chosen": -0.905001163482666, "logits/rejected": -0.8512397408485413, "logps/chosen": -128.7450408935547, "logps/rejected": -106.96314239501953, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": 0.7886185050010681, "rewards/margins": 4.139845848083496, "rewards/rejected": -3.3512275218963623, "step": 1636 }, { "epoch": 0.36, "learning_rate": 9.413297324490736e-06, "logits/chosen": -1.0952250957489014, "logits/rejected": -1.0264228582382202, "logps/chosen": -127.06834411621094, "logps/rejected": -208.76060485839844, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 1.0070008039474487, "rewards/margins": 6.662442207336426, "rewards/rejected": -5.6554412841796875, "step": 1637 }, { "epoch": 0.36, "learning_rate": 9.41245462337006e-06, "logits/chosen": -0.7331345677375793, "logits/rejected": -0.7658737897872925, "logps/chosen": -212.04403686523438, "logps/rejected": -222.57366943359375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.4234069883823395, "rewards/margins": 5.523536682128906, "rewards/rejected": -5.1001296043396, "step": 1638 }, { "epoch": 0.36, "learning_rate": 9.41161135525957e-06, "logits/chosen": -0.7440598011016846, "logits/rejected": -0.6606031060218811, "logps/chosen": -283.4642639160156, "logps/rejected": -157.54417419433594, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.47050172090530396, "rewards/margins": 8.488236427307129, "rewards/rejected": -8.01773452758789, "step": 1639 }, { "epoch": 0.36, "learning_rate": 9.410767520267629e-06, "logits/chosen": -1.023013949394226, "logits/rejected": -1.0052359104156494, "logps/chosen": -170.30947875976562, "logps/rejected": -140.1873779296875, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": 0.05066375806927681, "rewards/margins": 3.4134202003479004, "rewards/rejected": -3.3627564907073975, "step": 1640 }, { "epoch": 0.36, "learning_rate": 9.409923118502665e-06, "logits/chosen": -1.206532597541809, "logits/rejected": -1.199689269065857, "logps/chosen": -111.16773223876953, "logps/rejected": -208.18106079101562, "loss": 0.2108, "rewards/accuracies": 1.0, "rewards/chosen": -1.1181541681289673, "rewards/margins": 0.6524330377578735, "rewards/rejected": -1.7705872058868408, "step": 1641 }, { "epoch": 0.36, "learning_rate": 9.40907815007318e-06, "logits/chosen": -1.137265682220459, "logits/rejected": -1.194231390953064, "logps/chosen": -151.84719848632812, "logps/rejected": -144.6543731689453, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -2.4644813537597656, "rewards/margins": 2.923430919647217, "rewards/rejected": -5.387912273406982, "step": 1642 }, { "epoch": 0.36, "learning_rate": 9.408232615087752e-06, "logits/chosen": -0.9102188944816589, "logits/rejected": -0.8897987604141235, "logps/chosen": -135.96957397460938, "logps/rejected": -197.38894653320312, "loss": 0.8037, "rewards/accuracies": 0.0, "rewards/chosen": -1.763179063796997, "rewards/margins": -1.331129550933838, "rewards/rejected": -0.43204957246780396, "step": 1643 }, { "epoch": 0.36, "learning_rate": 9.40738651365503e-06, "logits/chosen": -0.8448360562324524, "logits/rejected": -0.809270977973938, "logps/chosen": -86.57752990722656, "logps/rejected": -194.4115447998047, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -2.532217502593994, "rewards/margins": 7.647593975067139, "rewards/rejected": -10.179811477661133, "step": 1644 }, { "epoch": 0.36, "learning_rate": 9.406539845883736e-06, "logits/chosen": -0.6053679585456848, "logits/rejected": -0.6284503936767578, "logps/chosen": -63.08110046386719, "logps/rejected": -94.99034118652344, "loss": 0.5131, "rewards/accuracies": 0.0, "rewards/chosen": -2.3156814575195312, "rewards/margins": -0.5539458990097046, "rewards/rejected": -1.7617355585098267, "step": 1645 }, { "epoch": 0.36, "learning_rate": 9.405692611882666e-06, "logits/chosen": -1.0087072849273682, "logits/rejected": -0.8957441449165344, "logps/chosen": -125.19500732421875, "logps/rejected": -63.31413269042969, "loss": 0.8925, "rewards/accuracies": 1.0, "rewards/chosen": 0.3217971920967102, "rewards/margins": 3.144458055496216, "rewards/rejected": -2.8226609230041504, "step": 1646 }, { "epoch": 0.36, "learning_rate": 9.404844811760685e-06, "logits/chosen": -1.1149297952651978, "logits/rejected": -1.0852363109588623, "logps/chosen": -155.61358642578125, "logps/rejected": -202.76144409179688, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -0.3746658265590668, "rewards/margins": 3.5347001552581787, "rewards/rejected": -3.9093658924102783, "step": 1647 }, { "epoch": 0.36, "learning_rate": 9.403996445626735e-06, "logits/chosen": -0.8557932376861572, "logits/rejected": -0.8448167443275452, "logps/chosen": -211.98147583007812, "logps/rejected": -140.23422241210938, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.2100127935409546, "rewards/margins": 4.29439640045166, "rewards/rejected": -5.504409313201904, "step": 1648 }, { "epoch": 0.36, "learning_rate": 9.403147513589829e-06, "logits/chosen": -1.243240475654602, "logits/rejected": -1.2234735488891602, "logps/chosen": -93.97794342041016, "logps/rejected": -194.38592529296875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.4525917172431946, "rewards/margins": 6.35454797744751, "rewards/rejected": -6.807139873504639, "step": 1649 }, { "epoch": 0.37, "learning_rate": 9.402298015759052e-06, "logits/chosen": -0.9474706053733826, "logits/rejected": -0.9714497923851013, "logps/chosen": -60.958648681640625, "logps/rejected": -48.97309112548828, "loss": 0.4659, "rewards/accuracies": 1.0, "rewards/chosen": 0.625872790813446, "rewards/margins": 0.9650192260742188, "rewards/rejected": -0.3391464352607727, "step": 1650 }, { "epoch": 0.37, "learning_rate": 9.401447952243563e-06, "logits/chosen": -0.8117954134941101, "logits/rejected": -0.7701706290245056, "logps/chosen": -107.14724731445312, "logps/rejected": -121.12577819824219, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -0.5420471429824829, "rewards/margins": 3.3836755752563477, "rewards/rejected": -3.925722599029541, "step": 1651 }, { "epoch": 0.37, "learning_rate": 9.400597323152591e-06, "logits/chosen": -1.0177546739578247, "logits/rejected": -1.0181918144226074, "logps/chosen": -251.0790252685547, "logps/rejected": -121.69107055664062, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": 0.4041290283203125, "rewards/margins": 2.281684160232544, "rewards/rejected": -1.8775551319122314, "step": 1652 }, { "epoch": 0.37, "learning_rate": 9.399746128595444e-06, "logits/chosen": -0.6681589484214783, "logits/rejected": -0.6528656482696533, "logps/chosen": -263.1535339355469, "logps/rejected": -246.11126708984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.3584564924240112, "rewards/margins": 12.825833320617676, "rewards/rejected": -11.467376708984375, "step": 1653 }, { "epoch": 0.37, "learning_rate": 9.398894368681496e-06, "logits/chosen": -0.5587798953056335, "logits/rejected": -0.6142194867134094, "logps/chosen": -199.61935424804688, "logps/rejected": -267.2706298828125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.009674072265625, "rewards/margins": 5.0262908935546875, "rewards/rejected": -6.0359649658203125, "step": 1654 }, { "epoch": 0.37, "learning_rate": 9.398042043520197e-06, "logits/chosen": -0.8356319665908813, "logits/rejected": -0.8073352575302124, "logps/chosen": -213.2391357421875, "logps/rejected": -164.4429473876953, "loss": 0.3019, "rewards/accuracies": 1.0, "rewards/chosen": 2.0712661743164062, "rewards/margins": 6.459342956542969, "rewards/rejected": -4.3880767822265625, "step": 1655 }, { "epoch": 0.37, "learning_rate": 9.397189153221067e-06, "logits/chosen": -1.269364595413208, "logits/rejected": -1.2436707019805908, "logps/chosen": -113.555419921875, "logps/rejected": -195.570068359375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.4430946409702301, "rewards/margins": 5.7678093910217285, "rewards/rejected": -6.210904121398926, "step": 1656 }, { "epoch": 0.37, "learning_rate": 9.396335697893702e-06, "logits/chosen": -0.8776854872703552, "logits/rejected": -0.8272547125816345, "logps/chosen": -114.39056396484375, "logps/rejected": -118.5941390991211, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": 0.8073440790176392, "rewards/margins": 4.630326271057129, "rewards/rejected": -3.8229820728302, "step": 1657 }, { "epoch": 0.37, "learning_rate": 9.395481677647767e-06, "logits/chosen": -1.1874933242797852, "logits/rejected": -1.1874878406524658, "logps/chosen": -193.27508544921875, "logps/rejected": -207.0060272216797, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.19824524223804474, "rewards/margins": 6.850955009460449, "rewards/rejected": -6.6527099609375, "step": 1658 }, { "epoch": 0.37, "learning_rate": 9.394627092593002e-06, "logits/chosen": -0.700410008430481, "logits/rejected": -0.694466769695282, "logps/chosen": -98.89105224609375, "logps/rejected": -113.27942657470703, "loss": 0.0809, "rewards/accuracies": 1.0, "rewards/chosen": -0.2574203610420227, "rewards/margins": 1.808600664138794, "rewards/rejected": -2.066020965576172, "step": 1659 }, { "epoch": 0.37, "learning_rate": 9.393771942839223e-06, "logits/chosen": -0.8601545691490173, "logits/rejected": -0.8020706176757812, "logps/chosen": -188.01809692382812, "logps/rejected": -281.09503173828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.6137451529502869, "rewards/margins": 9.515668869018555, "rewards/rejected": -8.901924133300781, "step": 1660 }, { "epoch": 0.37, "learning_rate": 9.392916228496309e-06, "logits/chosen": -0.9053030610084534, "logits/rejected": -0.8633906841278076, "logps/chosen": -81.34750366210938, "logps/rejected": -182.5376434326172, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.7655059695243835, "rewards/margins": 5.47006368637085, "rewards/rejected": -4.7045578956604, "step": 1661 }, { "epoch": 0.37, "learning_rate": 9.392059949674222e-06, "logits/chosen": -0.7693155407905579, "logits/rejected": -0.6658833026885986, "logps/chosen": -95.4415054321289, "logps/rejected": -118.39009857177734, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.040810465812683, "rewards/margins": 4.415513038635254, "rewards/rejected": -5.456323623657227, "step": 1662 }, { "epoch": 0.37, "learning_rate": 9.39120310648299e-06, "logits/chosen": -1.0256479978561401, "logits/rejected": -1.0422134399414062, "logps/chosen": -197.57281494140625, "logps/rejected": -86.67529296875, "loss": 0.216, "rewards/accuracies": 1.0, "rewards/chosen": 0.3570907711982727, "rewards/margins": 2.627676486968994, "rewards/rejected": -2.270585775375366, "step": 1663 }, { "epoch": 0.37, "learning_rate": 9.390345699032712e-06, "logits/chosen": -0.8861031532287598, "logits/rejected": -0.9044811129570007, "logps/chosen": -191.33383178710938, "logps/rejected": -277.78076171875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.7221618890762329, "rewards/margins": 7.566427707672119, "rewards/rejected": -6.844265937805176, "step": 1664 }, { "epoch": 0.37, "learning_rate": 9.389487727433569e-06, "logits/chosen": -0.8110182285308838, "logits/rejected": -0.8110182285308838, "logps/chosen": -83.22254180908203, "logps/rejected": -83.22254180908203, "loss": 0.4109, "rewards/accuracies": 0.0, "rewards/chosen": -5.032341957092285, "rewards/margins": 0.0, "rewards/rejected": -5.032341957092285, "step": 1665 }, { "epoch": 0.37, "learning_rate": 9.388629191795804e-06, "logits/chosen": -0.7476668953895569, "logits/rejected": -0.727425754070282, "logps/chosen": -77.7039794921875, "logps/rejected": -117.85440063476562, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": 0.5518341064453125, "rewards/margins": 2.9167115688323975, "rewards/rejected": -2.364877462387085, "step": 1666 }, { "epoch": 0.37, "learning_rate": 9.387770092229736e-06, "logits/chosen": -0.8764597177505493, "logits/rejected": -0.7789046168327332, "logps/chosen": -285.85089111328125, "logps/rejected": -190.86856079101562, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.6524017453193665, "rewards/margins": 7.688198566436768, "rewards/rejected": -7.035796642303467, "step": 1667 }, { "epoch": 0.37, "learning_rate": 9.386910428845762e-06, "logits/chosen": -1.0857057571411133, "logits/rejected": -1.0573070049285889, "logps/chosen": -97.3279800415039, "logps/rejected": -133.81301879882812, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": 1.3776848316192627, "rewards/margins": 2.686387538909912, "rewards/rejected": -1.308702826499939, "step": 1668 }, { "epoch": 0.37, "learning_rate": 9.386050201754342e-06, "logits/chosen": -0.7609682083129883, "logits/rejected": -0.7609682083129883, "logps/chosen": -120.90440368652344, "logps/rejected": -120.90440368652344, "loss": 1.2025, "rewards/accuracies": 0.0, "rewards/chosen": -0.18471527099609375, "rewards/margins": 0.0, "rewards/rejected": -0.18471527099609375, "step": 1669 }, { "epoch": 0.37, "learning_rate": 9.385189411066014e-06, "logits/chosen": -0.9443947672843933, "logits/rejected": -0.9104987382888794, "logps/chosen": -155.56158447265625, "logps/rejected": -223.740478515625, "loss": 0.1075, "rewards/accuracies": 1.0, "rewards/chosen": 0.4653945863246918, "rewards/margins": 1.706568956375122, "rewards/rejected": -1.241174340248108, "step": 1670 }, { "epoch": 0.37, "learning_rate": 9.384328056891389e-06, "logits/chosen": -0.9934135675430298, "logits/rejected": -0.9386518001556396, "logps/chosen": -72.09064483642578, "logps/rejected": -118.53590393066406, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": -1.1487464904785156, "rewards/margins": 3.071103096008301, "rewards/rejected": -4.219849586486816, "step": 1671 }, { "epoch": 0.37, "learning_rate": 9.38346613934115e-06, "logits/chosen": -0.6111868023872375, "logits/rejected": -0.6111868023872375, "logps/chosen": -116.36126708984375, "logps/rejected": -116.36126708984375, "loss": 0.3473, "rewards/accuracies": 0.0, "rewards/chosen": -3.786673069000244, "rewards/margins": 0.0, "rewards/rejected": -3.786673069000244, "step": 1672 }, { "epoch": 0.37, "learning_rate": 9.382603658526048e-06, "logits/chosen": -1.0595513582229614, "logits/rejected": -1.0595513582229614, "logps/chosen": -246.23828125, "logps/rejected": -246.23828125, "loss": 0.3643, "rewards/accuracies": 0.0, "rewards/chosen": -6.811978340148926, "rewards/margins": 0.0, "rewards/rejected": -6.811978340148926, "step": 1673 }, { "epoch": 0.37, "learning_rate": 9.381740614556911e-06, "logits/chosen": -0.9078683853149414, "logits/rejected": -0.9078683853149414, "logps/chosen": -57.511863708496094, "logps/rejected": -57.511863708496094, "loss": 0.7368, "rewards/accuracies": 0.0, "rewards/chosen": -1.1479625701904297, "rewards/margins": 0.0, "rewards/rejected": -1.1479625701904297, "step": 1674 }, { "epoch": 0.37, "learning_rate": 9.38087700754464e-06, "logits/chosen": -1.1412017345428467, "logits/rejected": -1.1822178363800049, "logps/chosen": -178.8680877685547, "logps/rejected": -140.57705688476562, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": 1.4901946783065796, "rewards/margins": 4.1180877685546875, "rewards/rejected": -2.6278932094573975, "step": 1675 }, { "epoch": 0.37, "learning_rate": 9.380012837600205e-06, "logits/chosen": -0.8862724900245667, "logits/rejected": -0.8862724900245667, "logps/chosen": -117.11766052246094, "logps/rejected": -117.11766052246094, "loss": 0.3956, "rewards/accuracies": 0.0, "rewards/chosen": -2.9918365478515625, "rewards/margins": 0.0, "rewards/rejected": -2.9918365478515625, "step": 1676 }, { "epoch": 0.37, "learning_rate": 9.379148104834648e-06, "logits/chosen": -0.9632222652435303, "logits/rejected": -0.9632222652435303, "logps/chosen": -125.62611389160156, "logps/rejected": -125.62611389160156, "loss": 1.2478, "rewards/accuracies": 0.0, "rewards/chosen": -1.3717864751815796, "rewards/margins": 0.0, "rewards/rejected": -1.3717864751815796, "step": 1677 }, { "epoch": 0.37, "learning_rate": 9.378282809359087e-06, "logits/chosen": -0.658644437789917, "logits/rejected": -0.42586463689804077, "logps/chosen": -138.79714965820312, "logps/rejected": -675.4638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.755366563796997, "rewards/margins": 50.38329315185547, "rewards/rejected": -53.1386604309082, "step": 1678 }, { "epoch": 0.37, "learning_rate": 9.377416951284712e-06, "logits/chosen": -0.6957770586013794, "logits/rejected": -0.684751570224762, "logps/chosen": -89.40574645996094, "logps/rejected": -175.1638946533203, "loss": 0.1635, "rewards/accuracies": 1.0, "rewards/chosen": -0.29217836260795593, "rewards/margins": 4.004512310028076, "rewards/rejected": -4.296690464019775, "step": 1679 }, { "epoch": 0.37, "learning_rate": 9.376550530722778e-06, "logits/chosen": -0.9537294507026672, "logits/rejected": -0.9434003829956055, "logps/chosen": -218.61785888671875, "logps/rejected": -226.57923889160156, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": 3.5308518409729004, "rewards/margins": 11.566932678222656, "rewards/rejected": -8.036081314086914, "step": 1680 }, { "epoch": 0.37, "learning_rate": 9.375683547784626e-06, "logits/chosen": -0.8586399555206299, "logits/rejected": -0.8009023070335388, "logps/chosen": -205.74459838867188, "logps/rejected": -137.00575256347656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.542428731918335, "rewards/margins": 9.28613567352295, "rewards/rejected": -6.743707180023193, "step": 1681 }, { "epoch": 0.37, "learning_rate": 9.374816002581654e-06, "logits/chosen": -0.9770479798316956, "logits/rejected": -0.8584839701652527, "logps/chosen": -95.14366149902344, "logps/rejected": -108.7681655883789, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -0.08884888142347336, "rewards/margins": 4.9951348304748535, "rewards/rejected": -5.083983898162842, "step": 1682 }, { "epoch": 0.37, "learning_rate": 9.373947895225345e-06, "logits/chosen": -1.0823841094970703, "logits/rejected": -1.082883596420288, "logps/chosen": -155.77352905273438, "logps/rejected": -209.52381896972656, "loss": 0.5846, "rewards/accuracies": 0.0, "rewards/chosen": 0.21842193603515625, "rewards/margins": -0.7654953002929688, "rewards/rejected": 0.983917236328125, "step": 1683 }, { "epoch": 0.37, "learning_rate": 9.373079225827243e-06, "logits/chosen": -0.9115181565284729, "logits/rejected": -0.8952867388725281, "logps/chosen": -151.8499755859375, "logps/rejected": -253.26980590820312, "loss": 1.0308, "rewards/accuracies": 0.0, "rewards/chosen": -4.472535610198975, "rewards/margins": -1.9172422885894775, "rewards/rejected": -2.555293321609497, "step": 1684 }, { "epoch": 0.37, "learning_rate": 9.372209994498976e-06, "logits/chosen": -0.9308846592903137, "logits/rejected": -0.8665019273757935, "logps/chosen": -117.07575225830078, "logps/rejected": -43.01017379760742, "loss": 0.1724, "rewards/accuracies": 1.0, "rewards/chosen": -0.37529221177101135, "rewards/margins": 0.9052647352218628, "rewards/rejected": -1.2805569171905518, "step": 1685 }, { "epoch": 0.37, "learning_rate": 9.371340201352234e-06, "logits/chosen": -0.7942445874214172, "logits/rejected": -0.807293176651001, "logps/chosen": -71.6649169921875, "logps/rejected": -59.022274017333984, "loss": 0.5535, "rewards/accuracies": 0.0, "rewards/chosen": -0.2985641658306122, "rewards/margins": -0.4671558737754822, "rewards/rejected": 0.1685916930437088, "step": 1686 }, { "epoch": 0.37, "learning_rate": 9.370469846498784e-06, "logits/chosen": -1.0848801136016846, "logits/rejected": -1.1294986009597778, "logps/chosen": -114.03207397460938, "logps/rejected": -161.3760986328125, "loss": 0.1188, "rewards/accuracies": 1.0, "rewards/chosen": -2.2511062622070312, "rewards/margins": 1.5095245838165283, "rewards/rejected": -3.7606308460235596, "step": 1687 }, { "epoch": 0.37, "learning_rate": 9.369598930050466e-06, "logits/chosen": -0.8943277597427368, "logits/rejected": -0.8199960589408875, "logps/chosen": -142.49728393554688, "logps/rejected": -215.06991577148438, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.7445602416992188, "rewards/margins": 3.872767925262451, "rewards/rejected": -4.61732816696167, "step": 1688 }, { "epoch": 0.37, "learning_rate": 9.368727452119188e-06, "logits/chosen": -1.038782000541687, "logits/rejected": -0.9658190011978149, "logps/chosen": -105.0589828491211, "logps/rejected": -189.27215576171875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.22110596299171448, "rewards/margins": 4.700085639953613, "rewards/rejected": -4.921191692352295, "step": 1689 }, { "epoch": 0.37, "learning_rate": 9.367855412816935e-06, "logits/chosen": -0.897619903087616, "logits/rejected": -0.8705994486808777, "logps/chosen": -122.94821166992188, "logps/rejected": -132.69723510742188, "loss": 1.0381, "rewards/accuracies": 1.0, "rewards/chosen": 1.4161666631698608, "rewards/margins": 6.457247257232666, "rewards/rejected": -5.041080474853516, "step": 1690 }, { "epoch": 0.37, "learning_rate": 9.366982812255764e-06, "logits/chosen": -1.0682659149169922, "logits/rejected": -1.0324862003326416, "logps/chosen": -91.30325317382812, "logps/rejected": -177.4668731689453, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -1.7367454767227173, "rewards/margins": 5.321151256561279, "rewards/rejected": -7.057896614074707, "step": 1691 }, { "epoch": 0.37, "learning_rate": 9.366109650547798e-06, "logits/chosen": -0.9686817526817322, "logits/rejected": -0.9540907144546509, "logps/chosen": -83.01544952392578, "logps/rejected": -106.96504211425781, "loss": 0.3693, "rewards/accuracies": 1.0, "rewards/chosen": -1.9586209058761597, "rewards/margins": 0.32072627544403076, "rewards/rejected": -2.2793471813201904, "step": 1692 }, { "epoch": 0.37, "learning_rate": 9.365235927805237e-06, "logits/chosen": -0.8916820883750916, "logits/rejected": -0.8632431626319885, "logps/chosen": -107.17304992675781, "logps/rejected": -64.45893096923828, "loss": 0.4183, "rewards/accuracies": 1.0, "rewards/chosen": -0.6023483276367188, "rewards/margins": 1.8692269325256348, "rewards/rejected": -2.4715752601623535, "step": 1693 }, { "epoch": 0.37, "learning_rate": 9.364361644140353e-06, "logits/chosen": -0.9941256046295166, "logits/rejected": -0.9815834760665894, "logps/chosen": -135.90597534179688, "logps/rejected": -209.6269989013672, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.41314697265625, "rewards/margins": 7.581920146942139, "rewards/rejected": -7.168773174285889, "step": 1694 }, { "epoch": 0.38, "learning_rate": 9.36348679966549e-06, "logits/chosen": -1.045677661895752, "logits/rejected": -1.008522629737854, "logps/chosen": -118.85933685302734, "logps/rejected": -163.1275634765625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.24177169799804688, "rewards/margins": 4.974589824676514, "rewards/rejected": -5.2163615226745605, "step": 1695 }, { "epoch": 0.38, "learning_rate": 9.362611394493063e-06, "logits/chosen": -0.8345662355422974, "logits/rejected": -0.880132794380188, "logps/chosen": -178.1083221435547, "logps/rejected": -95.80130004882812, "loss": 0.2552, "rewards/accuracies": 1.0, "rewards/chosen": 0.2341568022966385, "rewards/margins": 0.40664902329444885, "rewards/rejected": -0.17249222099781036, "step": 1696 }, { "epoch": 0.38, "learning_rate": 9.361735428735558e-06, "logits/chosen": -0.9153962135314941, "logits/rejected": -0.8856169581413269, "logps/chosen": -147.14608764648438, "logps/rejected": -167.97698974609375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.040277123451233, "rewards/margins": 5.9083757400512695, "rewards/rejected": -6.948652744293213, "step": 1697 }, { "epoch": 0.38, "learning_rate": 9.360858902505539e-06, "logits/chosen": -0.8439429998397827, "logits/rejected": -0.8760847449302673, "logps/chosen": -207.934814453125, "logps/rejected": -65.31209564208984, "loss": 0.1262, "rewards/accuracies": 1.0, "rewards/chosen": -1.2489136457443237, "rewards/margins": 1.7084773778915405, "rewards/rejected": -2.9573910236358643, "step": 1698 }, { "epoch": 0.38, "learning_rate": 9.359981815915632e-06, "logits/chosen": -0.7778258323669434, "logits/rejected": -0.7461804747581482, "logps/chosen": -147.90892028808594, "logps/rejected": -124.88426971435547, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -0.3612228333950043, "rewards/margins": 3.871184825897217, "rewards/rejected": -4.232407569885254, "step": 1699 }, { "epoch": 0.38, "learning_rate": 9.359104169078541e-06, "logits/chosen": -0.9153595566749573, "logits/rejected": -0.8812369704246521, "logps/chosen": -175.01962280273438, "logps/rejected": -200.82656860351562, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 1.07537841796875, "rewards/margins": 8.442359924316406, "rewards/rejected": -7.366981029510498, "step": 1700 }, { "epoch": 0.38, "learning_rate": 9.358225962107047e-06, "logits/chosen": -1.1881893873214722, "logits/rejected": -1.1956932544708252, "logps/chosen": -128.59515380859375, "logps/rejected": -73.6177749633789, "loss": 0.0771, "rewards/accuracies": 1.0, "rewards/chosen": -2.220740556716919, "rewards/margins": 1.8239195346832275, "rewards/rejected": -4.0446600914001465, "step": 1701 }, { "epoch": 0.38, "learning_rate": 9.35734719511399e-06, "logits/chosen": -1.1695261001586914, "logits/rejected": -1.100846290588379, "logps/chosen": -124.01348114013672, "logps/rejected": -219.09130859375, "loss": 0.0766, "rewards/accuracies": 1.0, "rewards/chosen": -2.6099915504455566, "rewards/margins": 2.3375043869018555, "rewards/rejected": -4.947495937347412, "step": 1702 }, { "epoch": 0.38, "learning_rate": 9.356467868212295e-06, "logits/chosen": -0.8803505897521973, "logits/rejected": -0.9212077856063843, "logps/chosen": -192.2685089111328, "logps/rejected": -209.403564453125, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": -6.3696608543396, "rewards/margins": 2.8286185264587402, "rewards/rejected": -9.19827938079834, "step": 1703 }, { "epoch": 0.38, "learning_rate": 9.35558798151495e-06, "logits/chosen": -1.0577678680419922, "logits/rejected": -1.0385795831680298, "logps/chosen": -92.71577453613281, "logps/rejected": -148.72406005859375, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": 0.6573944091796875, "rewards/margins": 4.987729072570801, "rewards/rejected": -4.330334663391113, "step": 1704 }, { "epoch": 0.38, "learning_rate": 9.354707535135022e-06, "logits/chosen": -0.8085958957672119, "logits/rejected": -0.7050086855888367, "logps/chosen": -139.89984130859375, "logps/rejected": -284.5604248046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.2984146177768707, "rewards/margins": 7.34381103515625, "rewards/rejected": -7.045396327972412, "step": 1705 }, { "epoch": 0.38, "learning_rate": 9.353826529185644e-06, "logits/chosen": -0.8553171753883362, "logits/rejected": -0.8553171753883362, "logps/chosen": -100.2443618774414, "logps/rejected": -100.2443618774414, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -3.8752388954162598, "rewards/margins": 0.0, "rewards/rejected": -3.8752388954162598, "step": 1706 }, { "epoch": 0.38, "learning_rate": 9.352944963780024e-06, "logits/chosen": -1.1189395189285278, "logits/rejected": -1.1189395189285278, "logps/chosen": -191.26889038085938, "logps/rejected": -191.26889038085938, "loss": 0.3476, "rewards/accuracies": 0.0, "rewards/chosen": -1.954193115234375, "rewards/margins": 0.0, "rewards/rejected": -1.954193115234375, "step": 1707 }, { "epoch": 0.38, "learning_rate": 9.352062839031438e-06, "logits/chosen": -1.1021873950958252, "logits/rejected": -1.0927437543869019, "logps/chosen": -211.1580810546875, "logps/rejected": -213.8718719482422, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.539105236530304, "rewards/margins": 6.057185173034668, "rewards/rejected": -6.596290588378906, "step": 1708 }, { "epoch": 0.38, "learning_rate": 9.351180155053242e-06, "logits/chosen": -1.207879900932312, "logits/rejected": -1.207879900932312, "logps/chosen": -71.15869140625, "logps/rejected": -71.15869140625, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -1.3776839971542358, "rewards/margins": 0.0, "rewards/rejected": -1.3776839971542358, "step": 1709 }, { "epoch": 0.38, "learning_rate": 9.350296911958854e-06, "logits/chosen": -1.0810884237289429, "logits/rejected": -1.1434470415115356, "logps/chosen": -205.6533203125, "logps/rejected": -116.58667755126953, "loss": 0.0351, "rewards/accuracies": 1.0, "rewards/chosen": 0.3478988707065582, "rewards/margins": 2.696530342102051, "rewards/rejected": -2.3486313819885254, "step": 1710 }, { "epoch": 0.38, "learning_rate": 9.34941310986177e-06, "logits/chosen": -1.1978063583374023, "logits/rejected": -1.1583503484725952, "logps/chosen": -109.80913543701172, "logps/rejected": -175.1727752685547, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.8775581121444702, "rewards/margins": 8.384444236755371, "rewards/rejected": -6.5068864822387695, "step": 1711 }, { "epoch": 0.38, "learning_rate": 9.348528748875558e-06, "logits/chosen": -0.910020112991333, "logits/rejected": -0.864739179611206, "logps/chosen": -159.38868713378906, "logps/rejected": -57.09661865234375, "loss": 0.658, "rewards/accuracies": 0.0, "rewards/chosen": -3.143864393234253, "rewards/margins": -1.003758192062378, "rewards/rejected": -2.140106201171875, "step": 1712 }, { "epoch": 0.38, "learning_rate": 9.347643829113856e-06, "logits/chosen": -1.041820764541626, "logits/rejected": -1.0408923625946045, "logps/chosen": -83.33382415771484, "logps/rejected": -142.8637237548828, "loss": 0.1139, "rewards/accuracies": 1.0, "rewards/chosen": -0.07141800224781036, "rewards/margins": 1.5714064836502075, "rewards/rejected": -1.6428245306015015, "step": 1713 }, { "epoch": 0.38, "learning_rate": 9.346758350690373e-06, "logits/chosen": -1.135786533355713, "logits/rejected": -1.1336218118667603, "logps/chosen": -158.36309814453125, "logps/rejected": -84.38634490966797, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": 2.049273729324341, "rewards/margins": 5.534411430358887, "rewards/rejected": -3.485137939453125, "step": 1714 }, { "epoch": 0.38, "learning_rate": 9.34587231371889e-06, "logits/chosen": -0.9747374057769775, "logits/rejected": -0.9464428424835205, "logps/chosen": -193.37185668945312, "logps/rejected": -119.34039306640625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.22401276230812073, "rewards/margins": 6.91463565826416, "rewards/rejected": -7.138648509979248, "step": 1715 }, { "epoch": 0.38, "learning_rate": 9.344985718313264e-06, "logits/chosen": -0.6782841682434082, "logits/rejected": -0.5989472270011902, "logps/chosen": -90.26423645019531, "logps/rejected": -77.18990325927734, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.248016357421875, "rewards/margins": 4.559408664703369, "rewards/rejected": -4.807425022125244, "step": 1716 }, { "epoch": 0.38, "learning_rate": 9.344098564587418e-06, "logits/chosen": -1.1017944812774658, "logits/rejected": -1.1167393922805786, "logps/chosen": -119.5687255859375, "logps/rejected": -129.2660675048828, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 0.07543182373046875, "rewards/margins": 4.348095893859863, "rewards/rejected": -4.2726640701293945, "step": 1717 }, { "epoch": 0.38, "learning_rate": 9.343210852655348e-06, "logits/chosen": -1.0722565650939941, "logits/rejected": -0.9657111167907715, "logps/chosen": -172.30526733398438, "logps/rejected": -247.30157470703125, "loss": 0.0487, "rewards/accuracies": 1.0, "rewards/chosen": -2.2131593227386475, "rewards/margins": 5.140846252441406, "rewards/rejected": -7.354005336761475, "step": 1718 }, { "epoch": 0.38, "learning_rate": 9.342322582631125e-06, "logits/chosen": -0.8897711038589478, "logits/rejected": -0.8488049507141113, "logps/chosen": -125.74771118164062, "logps/rejected": -138.45962524414062, "loss": 0.1058, "rewards/accuracies": 1.0, "rewards/chosen": 0.25714111328125, "rewards/margins": 3.218376874923706, "rewards/rejected": -2.961235761642456, "step": 1719 }, { "epoch": 0.38, "learning_rate": 9.341433754628888e-06, "logits/chosen": -1.1032167673110962, "logits/rejected": -1.1032167673110962, "logps/chosen": -159.2425537109375, "logps/rejected": -159.2425537109375, "loss": 0.4947, "rewards/accuracies": 0.0, "rewards/chosen": 0.07251586765050888, "rewards/margins": 0.0, "rewards/rejected": 0.07251586765050888, "step": 1720 }, { "epoch": 0.38, "learning_rate": 9.340544368762851e-06, "logits/chosen": -0.8380519151687622, "logits/rejected": -0.8562614321708679, "logps/chosen": -181.939453125, "logps/rejected": -264.0439758300781, "loss": 0.0546, "rewards/accuracies": 1.0, "rewards/chosen": 0.8102157711982727, "rewards/margins": 2.1728012561798096, "rewards/rejected": -1.362585425376892, "step": 1721 }, { "epoch": 0.38, "learning_rate": 9.339654425147297e-06, "logits/chosen": -0.6383607387542725, "logits/rejected": -0.6362934112548828, "logps/chosen": -21.813642501831055, "logps/rejected": -11.765741348266602, "loss": 0.4932, "rewards/accuracies": 0.0, "rewards/chosen": -0.7408818602561951, "rewards/margins": -0.2678360342979431, "rewards/rejected": -0.47304582595825195, "step": 1722 }, { "epoch": 0.38, "learning_rate": 9.338763923896583e-06, "logits/chosen": -1.0169038772583008, "logits/rejected": -1.0114291906356812, "logps/chosen": -80.48886108398438, "logps/rejected": -81.3401870727539, "loss": 0.1553, "rewards/accuracies": 1.0, "rewards/chosen": -0.8219230771064758, "rewards/margins": 1.3058736324310303, "rewards/rejected": -2.1277966499328613, "step": 1723 }, { "epoch": 0.38, "learning_rate": 9.337872865125133e-06, "logits/chosen": -0.8579022884368896, "logits/rejected": -0.8015550374984741, "logps/chosen": -87.04085540771484, "logps/rejected": -175.83450317382812, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.4369972348213196, "rewards/margins": 5.143876075744629, "rewards/rejected": -4.706878662109375, "step": 1724 }, { "epoch": 0.38, "learning_rate": 9.336981248947447e-06, "logits/chosen": -0.9000326991081238, "logits/rejected": -0.8657695651054382, "logps/chosen": -281.2234802246094, "logps/rejected": -200.74496459960938, "loss": 0.9514, "rewards/accuracies": 0.0, "rewards/chosen": -5.3404083251953125, "rewards/margins": -0.7422547340393066, "rewards/rejected": -4.598153591156006, "step": 1725 }, { "epoch": 0.38, "learning_rate": 9.336089075478098e-06, "logits/chosen": -0.7494918704032898, "logits/rejected": -0.6632353067398071, "logps/chosen": -99.19159698486328, "logps/rejected": -52.79047393798828, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": -0.03406219556927681, "rewards/margins": 2.362198829650879, "rewards/rejected": -2.396260976791382, "step": 1726 }, { "epoch": 0.38, "learning_rate": 9.335196344831727e-06, "logits/chosen": -1.2203218936920166, "logits/rejected": -1.2018646001815796, "logps/chosen": -98.95533752441406, "logps/rejected": -127.88713073730469, "loss": 0.1416, "rewards/accuracies": 1.0, "rewards/chosen": -0.1663261502981186, "rewards/margins": 1.1171966791152954, "rewards/rejected": -1.2835228443145752, "step": 1727 }, { "epoch": 0.38, "learning_rate": 9.334303057123044e-06, "logits/chosen": -1.2701719999313354, "logits/rejected": -1.2173479795455933, "logps/chosen": -117.21208953857422, "logps/rejected": -291.750732421875, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": 0.578687310218811, "rewards/margins": 3.451941967010498, "rewards/rejected": -2.8732545375823975, "step": 1728 }, { "epoch": 0.38, "learning_rate": 9.33340921246684e-06, "logits/chosen": -1.1211601495742798, "logits/rejected": -1.2039333581924438, "logps/chosen": -198.12887573242188, "logps/rejected": -53.175682067871094, "loss": 0.2664, "rewards/accuracies": 1.0, "rewards/chosen": -1.9667510986328125, "rewards/margins": 0.42913293838500977, "rewards/rejected": -2.3958840370178223, "step": 1729 }, { "epoch": 0.38, "learning_rate": 9.332514810977969e-06, "logits/chosen": -1.2043770551681519, "logits/rejected": -1.1990915536880493, "logps/chosen": -99.5674819946289, "logps/rejected": -118.85441589355469, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": 0.4053505063056946, "rewards/margins": 3.5926597118377686, "rewards/rejected": -3.1873092651367188, "step": 1730 }, { "epoch": 0.38, "learning_rate": 9.331619852771361e-06, "logits/chosen": -1.3394464254379272, "logits/rejected": -1.3394464254379272, "logps/chosen": -35.747657775878906, "logps/rejected": -35.747657775878906, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -1.5449345111846924, "rewards/margins": 0.0, "rewards/rejected": -1.5449345111846924, "step": 1731 }, { "epoch": 0.38, "learning_rate": 9.330724337962013e-06, "logits/chosen": -1.3541669845581055, "logits/rejected": -1.4044290781021118, "logps/chosen": -201.40841674804688, "logps/rejected": -209.4649200439453, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.9710968136787415, "rewards/margins": 6.863765239715576, "rewards/rejected": -5.8926682472229, "step": 1732 }, { "epoch": 0.38, "learning_rate": 9.329828266665e-06, "logits/chosen": -0.9625391364097595, "logits/rejected": -0.9208576679229736, "logps/chosen": -69.64970397949219, "logps/rejected": -137.86236572265625, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.3453308045864105, "rewards/margins": 4.484504222869873, "rewards/rejected": -4.829834938049316, "step": 1733 }, { "epoch": 0.38, "learning_rate": 9.328931638995461e-06, "logits/chosen": -0.7414447665214539, "logits/rejected": -0.7482356429100037, "logps/chosen": -139.5421905517578, "logps/rejected": -88.5674819946289, "loss": 1.565, "rewards/accuracies": 0.0, "rewards/chosen": -6.203184604644775, "rewards/margins": -3.0852973461151123, "rewards/rejected": -3.117887258529663, "step": 1734 }, { "epoch": 0.38, "learning_rate": 9.328034455068616e-06, "logits/chosen": -0.8328591585159302, "logits/rejected": -0.8328591585159302, "logps/chosen": -101.71595764160156, "logps/rejected": -101.71595764160156, "loss": 0.4694, "rewards/accuracies": 0.0, "rewards/chosen": -2.1430840492248535, "rewards/margins": 0.0, "rewards/rejected": -2.1430840492248535, "step": 1735 }, { "epoch": 0.38, "learning_rate": 9.327136714999745e-06, "logits/chosen": -0.7786930203437805, "logits/rejected": -0.38876157999038696, "logps/chosen": -85.44448852539062, "logps/rejected": -519.4511108398438, "loss": 0.3681, "rewards/accuracies": 1.0, "rewards/chosen": -0.4684310853481293, "rewards/margins": 37.97276306152344, "rewards/rejected": -38.441192626953125, "step": 1736 }, { "epoch": 0.38, "learning_rate": 9.32623841890421e-06, "logits/chosen": -0.8373913168907166, "logits/rejected": -0.8193231821060181, "logps/chosen": -203.92349243164062, "logps/rejected": -240.7482147216797, "loss": 0.2218, "rewards/accuracies": 1.0, "rewards/chosen": -6.207602024078369, "rewards/margins": 0.5951108932495117, "rewards/rejected": -6.802712917327881, "step": 1737 }, { "epoch": 0.38, "learning_rate": 9.325339566897437e-06, "logits/chosen": -1.3577847480773926, "logits/rejected": -1.31387197971344, "logps/chosen": -93.55754089355469, "logps/rejected": -155.1607666015625, "loss": 0.0496, "rewards/accuracies": 1.0, "rewards/chosen": -0.7449310421943665, "rewards/margins": 3.2795703411102295, "rewards/rejected": -4.024501323699951, "step": 1738 }, { "epoch": 0.38, "learning_rate": 9.324440159094927e-06, "logits/chosen": -1.1089057922363281, "logits/rejected": -0.9874996542930603, "logps/chosen": -173.03749084472656, "logps/rejected": -237.69921875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.09488677978515625, "rewards/margins": 5.505226135253906, "rewards/rejected": -5.41033935546875, "step": 1739 }, { "epoch": 0.39, "learning_rate": 9.323540195612255e-06, "logits/chosen": -1.113304615020752, "logits/rejected": -1.0634303092956543, "logps/chosen": -62.6029052734375, "logps/rejected": -190.8599853515625, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.3604053556919098, "rewards/margins": 6.720231056213379, "rewards/rejected": -7.080636501312256, "step": 1740 }, { "epoch": 0.39, "learning_rate": 9.322639676565059e-06, "logits/chosen": -0.801504909992218, "logits/rejected": -0.7960590720176697, "logps/chosen": -133.377197265625, "logps/rejected": -120.01551818847656, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 0.06063690409064293, "rewards/margins": 4.587709426879883, "rewards/rejected": -4.527072429656982, "step": 1741 }, { "epoch": 0.39, "learning_rate": 9.321738602069057e-06, "logits/chosen": -0.8878350257873535, "logits/rejected": -0.8657400608062744, "logps/chosen": -107.9534912109375, "logps/rejected": -146.63475036621094, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.0047409534454346, "rewards/margins": 4.213846206665039, "rewards/rejected": -5.218587398529053, "step": 1742 }, { "epoch": 0.39, "learning_rate": 9.320836972240034e-06, "logits/chosen": -1.0288015604019165, "logits/rejected": -1.045539140701294, "logps/chosen": -176.8387451171875, "logps/rejected": -132.163330078125, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -0.9320007562637329, "rewards/margins": 3.5114426612854004, "rewards/rejected": -4.443443298339844, "step": 1743 }, { "epoch": 0.39, "learning_rate": 9.319934787193846e-06, "logits/chosen": -1.416951298713684, "logits/rejected": -1.4802742004394531, "logps/chosen": -142.40863037109375, "logps/rejected": -144.30685424804688, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.539581298828125, "rewards/margins": 5.225902080535889, "rewards/rejected": -5.765483379364014, "step": 1744 }, { "epoch": 0.39, "learning_rate": 9.319032047046422e-06, "logits/chosen": -1.3085073232650757, "logits/rejected": -1.243116021156311, "logps/chosen": -193.12986755371094, "logps/rejected": -332.30267333984375, "loss": 0.0539, "rewards/accuracies": 1.0, "rewards/chosen": 2.565599203109741, "rewards/margins": 10.397761344909668, "rewards/rejected": -7.832162380218506, "step": 1745 }, { "epoch": 0.39, "learning_rate": 9.318128751913764e-06, "logits/chosen": -1.1208291053771973, "logits/rejected": -1.0411938428878784, "logps/chosen": -186.03347778320312, "logps/rejected": -343.078125, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -5.823089599609375, "rewards/margins": 4.103022575378418, "rewards/rejected": -9.926112174987793, "step": 1746 }, { "epoch": 0.39, "learning_rate": 9.317224901911941e-06, "logits/chosen": -0.741446316242218, "logits/rejected": -0.4615715444087982, "logps/chosen": -263.57220458984375, "logps/rejected": -484.31573486328125, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.7627761960029602, "rewards/margins": 17.918895721435547, "rewards/rejected": -18.681671142578125, "step": 1747 }, { "epoch": 0.39, "learning_rate": 9.316320497157097e-06, "logits/chosen": -0.9489016532897949, "logits/rejected": -0.9489016532897949, "logps/chosen": -37.91913986206055, "logps/rejected": -37.91913986206055, "loss": 0.4451, "rewards/accuracies": 0.0, "rewards/chosen": -2.2613203525543213, "rewards/margins": 0.0, "rewards/rejected": -2.2613203525543213, "step": 1748 }, { "epoch": 0.39, "learning_rate": 9.315415537765446e-06, "logits/chosen": -0.7918497920036316, "logits/rejected": -0.5740693807601929, "logps/chosen": -197.3500518798828, "logps/rejected": -335.903076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.7315048575401306, "rewards/margins": 15.392924308776855, "rewards/rejected": -14.661419868469238, "step": 1749 }, { "epoch": 0.39, "learning_rate": 9.314510023853272e-06, "logits/chosen": -1.0283092260360718, "logits/rejected": -1.0005496740341187, "logps/chosen": -107.2694320678711, "logps/rejected": -203.19497680664062, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.195465087890625, "rewards/margins": 5.346493721008301, "rewards/rejected": -7.541958808898926, "step": 1750 }, { "epoch": 0.39, "learning_rate": 9.313603955536931e-06, "logits/chosen": -0.7980421185493469, "logits/rejected": -0.7586056590080261, "logps/chosen": -78.74410247802734, "logps/rejected": -35.082332611083984, "loss": 0.1531, "rewards/accuracies": 1.0, "rewards/chosen": -0.7010551691055298, "rewards/margins": 1.1671333312988281, "rewards/rejected": -1.868188500404358, "step": 1751 }, { "epoch": 0.39, "learning_rate": 9.312697332932852e-06, "logits/chosen": -0.8349556922912598, "logits/rejected": -0.8204799890518188, "logps/chosen": -199.8553466796875, "logps/rejected": -162.86114501953125, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -0.16090241074562073, "rewards/margins": 2.9445831775665283, "rewards/rejected": -3.105485677719116, "step": 1752 }, { "epoch": 0.39, "learning_rate": 9.311790156157533e-06, "logits/chosen": -1.1874583959579468, "logits/rejected": -1.2335412502288818, "logps/chosen": -126.89810180664062, "logps/rejected": -108.98257446289062, "loss": 0.9276, "rewards/accuracies": 0.0, "rewards/chosen": -1.3305939435958862, "rewards/margins": -0.08939826488494873, "rewards/rejected": -1.2411956787109375, "step": 1753 }, { "epoch": 0.39, "learning_rate": 9.310882425327544e-06, "logits/chosen": -0.9502772688865662, "logits/rejected": -0.9405902624130249, "logps/chosen": -151.36070251464844, "logps/rejected": -101.2577133178711, "loss": 0.784, "rewards/accuracies": 0.0, "rewards/chosen": -3.360516309738159, "rewards/margins": -1.317885398864746, "rewards/rejected": -2.042630910873413, "step": 1754 }, { "epoch": 0.39, "learning_rate": 9.309974140559525e-06, "logits/chosen": -1.2780094146728516, "logits/rejected": -1.2629939317703247, "logps/chosen": -79.70658874511719, "logps/rejected": -50.29153823852539, "loss": 0.0348, "rewards/accuracies": 1.0, "rewards/chosen": 1.725293755531311, "rewards/margins": 3.384171724319458, "rewards/rejected": -1.658877968788147, "step": 1755 }, { "epoch": 0.39, "learning_rate": 9.309065301970193e-06, "logits/chosen": -1.0996716022491455, "logits/rejected": -1.090517282485962, "logps/chosen": -123.96772003173828, "logps/rejected": -153.49639892578125, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.087122343480587, "rewards/margins": 4.249798774719238, "rewards/rejected": -4.336921215057373, "step": 1756 }, { "epoch": 0.39, "learning_rate": 9.308155909676326e-06, "logits/chosen": -1.102404236793518, "logits/rejected": -1.0946788787841797, "logps/chosen": -61.228153228759766, "logps/rejected": -70.04013061523438, "loss": 0.3284, "rewards/accuracies": 1.0, "rewards/chosen": -2.920698642730713, "rewards/margins": 0.08757328987121582, "rewards/rejected": -3.0082719326019287, "step": 1757 }, { "epoch": 0.39, "learning_rate": 9.307245963794782e-06, "logits/chosen": -0.6730895638465881, "logits/rejected": -0.6461323499679565, "logps/chosen": -75.94676208496094, "logps/rejected": -71.423583984375, "loss": 0.9353, "rewards/accuracies": 1.0, "rewards/chosen": 0.053171541541814804, "rewards/margins": 1.3727058172225952, "rewards/rejected": -1.3195343017578125, "step": 1758 }, { "epoch": 0.39, "learning_rate": 9.306335464442485e-06, "logits/chosen": -1.0234010219573975, "logits/rejected": -1.0132030248641968, "logps/chosen": -133.4084014892578, "logps/rejected": -118.72291564941406, "loss": 0.0735, "rewards/accuracies": 1.0, "rewards/chosen": -2.6999497413635254, "rewards/margins": 2.1867895126342773, "rewards/rejected": -4.886739253997803, "step": 1759 }, { "epoch": 0.39, "learning_rate": 9.305424411736434e-06, "logits/chosen": -0.5435364246368408, "logits/rejected": -0.5435364246368408, "logps/chosen": -240.9064178466797, "logps/rejected": -240.9064178466797, "loss": 0.3513, "rewards/accuracies": 0.0, "rewards/chosen": -0.5229843258857727, "rewards/margins": 0.0, "rewards/rejected": -0.5229843258857727, "step": 1760 }, { "epoch": 0.39, "learning_rate": 9.304512805793696e-06, "logits/chosen": -0.8977506756782532, "logits/rejected": -0.9284707307815552, "logps/chosen": -181.2604522705078, "logps/rejected": -188.6062774658203, "loss": 0.2192, "rewards/accuracies": 1.0, "rewards/chosen": -1.4684768915176392, "rewards/margins": 0.6285706758499146, "rewards/rejected": -2.0970475673675537, "step": 1761 }, { "epoch": 0.39, "learning_rate": 9.30360064673141e-06, "logits/chosen": -0.9640070199966431, "logits/rejected": -0.9667142629623413, "logps/chosen": -71.86841583251953, "logps/rejected": -109.80606079101562, "loss": 0.968, "rewards/accuracies": 1.0, "rewards/chosen": -0.4648880064487457, "rewards/margins": 0.3815162479877472, "rewards/rejected": -0.8464042544364929, "step": 1762 }, { "epoch": 0.39, "learning_rate": 9.302687934666787e-06, "logits/chosen": -0.8188190460205078, "logits/rejected": -0.8188190460205078, "logps/chosen": -104.85577392578125, "logps/rejected": -104.85577392578125, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -2.0889344215393066, "rewards/margins": 0.0, "rewards/rejected": -2.0889344215393066, "step": 1763 }, { "epoch": 0.39, "learning_rate": 9.301774669717108e-06, "logits/chosen": -1.1266080141067505, "logits/rejected": -1.1025185585021973, "logps/chosen": -122.8846435546875, "logps/rejected": -106.96072387695312, "loss": 0.4842, "rewards/accuracies": 0.0, "rewards/chosen": -0.63671875, "rewards/margins": -0.48820340633392334, "rewards/rejected": -0.14851532876491547, "step": 1764 }, { "epoch": 0.39, "learning_rate": 9.300860851999723e-06, "logits/chosen": -1.2808645963668823, "logits/rejected": -1.2519147396087646, "logps/chosen": -122.60807800292969, "logps/rejected": -171.72364807128906, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": 0.33411940932273865, "rewards/margins": 3.270688533782959, "rewards/rejected": -2.9365692138671875, "step": 1765 }, { "epoch": 0.39, "learning_rate": 9.299946481632058e-06, "logits/chosen": -0.7423545718193054, "logits/rejected": -0.7354355454444885, "logps/chosen": -162.44276428222656, "logps/rejected": -180.2876739501953, "loss": 0.0825, "rewards/accuracies": 1.0, "rewards/chosen": -0.2519241273403168, "rewards/margins": 1.7301146984100342, "rewards/rejected": -1.9820388555526733, "step": 1766 }, { "epoch": 0.39, "learning_rate": 9.299031558731608e-06, "logits/chosen": -0.9116267561912537, "logits/rejected": -0.9271325469017029, "logps/chosen": -101.1871337890625, "logps/rejected": -133.2227020263672, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": -0.37736818194389343, "rewards/margins": 2.4039931297302246, "rewards/rejected": -2.7813613414764404, "step": 1767 }, { "epoch": 0.39, "learning_rate": 9.298116083415937e-06, "logits/chosen": -1.0334280729293823, "logits/rejected": -1.0334280729293823, "logps/chosen": -79.71000671386719, "logps/rejected": -79.71000671386719, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.2326576709747314, "rewards/margins": 0.0, "rewards/rejected": -2.2326576709747314, "step": 1768 }, { "epoch": 0.39, "learning_rate": 9.297200055802683e-06, "logits/chosen": -0.9218345880508423, "logits/rejected": -0.9301475882530212, "logps/chosen": -116.51058959960938, "logps/rejected": -254.22702026367188, "loss": 0.4299, "rewards/accuracies": 1.0, "rewards/chosen": -0.592175304889679, "rewards/margins": 1.7077865600585938, "rewards/rejected": -2.299961805343628, "step": 1769 }, { "epoch": 0.39, "learning_rate": 9.296283476009551e-06, "logits/chosen": -0.9033560752868652, "logits/rejected": -0.9118377566337585, "logps/chosen": -77.566650390625, "logps/rejected": -87.48494720458984, "loss": 0.2091, "rewards/accuracies": 1.0, "rewards/chosen": 0.2741653621196747, "rewards/margins": 0.6932258605957031, "rewards/rejected": -0.41906052827835083, "step": 1770 }, { "epoch": 0.39, "learning_rate": 9.295366344154319e-06, "logits/chosen": -1.0650875568389893, "logits/rejected": -1.022742509841919, "logps/chosen": -185.69384765625, "logps/rejected": -350.10723876953125, "loss": 0.0865, "rewards/accuracies": 1.0, "rewards/chosen": 0.8552963137626648, "rewards/margins": 10.274955749511719, "rewards/rejected": -9.419659614562988, "step": 1771 }, { "epoch": 0.39, "learning_rate": 9.29444866035484e-06, "logits/chosen": -1.1242419481277466, "logits/rejected": -1.1154265403747559, "logps/chosen": -177.00820922851562, "logps/rejected": -154.62139892578125, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -0.04635009914636612, "rewards/margins": 2.72324538230896, "rewards/rejected": -2.7695953845977783, "step": 1772 }, { "epoch": 0.39, "learning_rate": 9.293530424729029e-06, "logits/chosen": -0.801966667175293, "logits/rejected": -0.732858419418335, "logps/chosen": -130.1348876953125, "logps/rejected": -165.511474609375, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/chosen": -0.12895965576171875, "rewards/margins": 6.635841369628906, "rewards/rejected": -6.764801025390625, "step": 1773 }, { "epoch": 0.39, "learning_rate": 9.292611637394881e-06, "logits/chosen": -1.0272363424301147, "logits/rejected": -1.0121110677719116, "logps/chosen": -227.82049560546875, "logps/rejected": -211.58740234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 2.4374313354492188, "rewards/margins": 10.188581466674805, "rewards/rejected": -7.751150608062744, "step": 1774 }, { "epoch": 0.39, "learning_rate": 9.291692298470457e-06, "logits/chosen": -1.0933990478515625, "logits/rejected": -1.0068705081939697, "logps/chosen": -65.78585815429688, "logps/rejected": -356.3687744140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.09245147556066513, "rewards/margins": 9.627571105957031, "rewards/rejected": -9.535120010375977, "step": 1775 }, { "epoch": 0.39, "learning_rate": 9.29077240807389e-06, "logits/chosen": -0.9713777899742126, "logits/rejected": -0.9350612759590149, "logps/chosen": -267.27301025390625, "logps/rejected": -174.01893615722656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.7871460318565369, "rewards/margins": 8.577345848083496, "rewards/rejected": -7.790200233459473, "step": 1776 }, { "epoch": 0.39, "learning_rate": 9.289851966323382e-06, "logits/chosen": -1.0772287845611572, "logits/rejected": -1.166714072227478, "logps/chosen": -188.7679901123047, "logps/rejected": -70.09586334228516, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -0.3838790953159332, "rewards/margins": 3.593228816986084, "rewards/rejected": -3.9771080017089844, "step": 1777 }, { "epoch": 0.39, "learning_rate": 9.288930973337212e-06, "logits/chosen": -0.720417857170105, "logits/rejected": -0.6888987421989441, "logps/chosen": -109.22027587890625, "logps/rejected": -94.8987808227539, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -2.0565261840820312, "rewards/margins": 3.1565332412719727, "rewards/rejected": -5.213059425354004, "step": 1778 }, { "epoch": 0.39, "learning_rate": 9.288009429233717e-06, "logits/chosen": -0.7550575733184814, "logits/rejected": -0.7550575733184814, "logps/chosen": -159.70401000976562, "logps/rejected": -159.70401000976562, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -3.27485728263855, "rewards/margins": 0.0, "rewards/rejected": -3.27485728263855, "step": 1779 }, { "epoch": 0.39, "learning_rate": 9.287087334131322e-06, "logits/chosen": -1.0068937540054321, "logits/rejected": -1.081451416015625, "logps/chosen": -206.62222290039062, "logps/rejected": -101.24722290039062, "loss": 0.2892, "rewards/accuracies": 1.0, "rewards/chosen": -0.9615020751953125, "rewards/margins": 0.24838721752166748, "rewards/rejected": -1.20988929271698, "step": 1780 }, { "epoch": 0.39, "learning_rate": 9.28616468814851e-06, "logits/chosen": -0.8885388970375061, "logits/rejected": -0.8787174820899963, "logps/chosen": -200.71826171875, "logps/rejected": -131.3917999267578, "loss": 0.141, "rewards/accuracies": 1.0, "rewards/chosen": -2.156790256500244, "rewards/margins": 1.30702805519104, "rewards/rejected": -3.463818311691284, "step": 1781 }, { "epoch": 0.39, "learning_rate": 9.28524149140384e-06, "logits/chosen": -1.0709072351455688, "logits/rejected": -1.06058931350708, "logps/chosen": -212.83340454101562, "logps/rejected": -260.75848388671875, "loss": 0.06, "rewards/accuracies": 1.0, "rewards/chosen": -4.631213665008545, "rewards/margins": 2.059420585632324, "rewards/rejected": -6.690634250640869, "step": 1782 }, { "epoch": 0.39, "learning_rate": 9.284317744015938e-06, "logits/chosen": -1.0063058137893677, "logits/rejected": -0.8763988614082336, "logps/chosen": -212.2509765625, "logps/rejected": -285.0426940917969, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": 2.857560873031616, "rewards/margins": 8.991734504699707, "rewards/rejected": -6.13417387008667, "step": 1783 }, { "epoch": 0.39, "learning_rate": 9.283393446103506e-06, "logits/chosen": -0.7375516295433044, "logits/rejected": -0.7208096981048584, "logps/chosen": -172.69517517089844, "logps/rejected": -162.07630920410156, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": -1.5031341314315796, "rewards/margins": 2.594180107116699, "rewards/rejected": -4.097314357757568, "step": 1784 }, { "epoch": 0.4, "learning_rate": 9.282468597785312e-06, "logits/chosen": -1.0152838230133057, "logits/rejected": -0.9734241962432861, "logps/chosen": -111.10283660888672, "logps/rejected": -128.685546875, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -0.10989761352539062, "rewards/margins": 3.2282416820526123, "rewards/rejected": -3.338139295578003, "step": 1785 }, { "epoch": 0.4, "learning_rate": 9.2815431991802e-06, "logits/chosen": -0.9590309262275696, "logits/rejected": -0.9481092095375061, "logps/chosen": -210.7629852294922, "logps/rejected": -217.63156127929688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.49200439453125, "rewards/margins": 5.876585483551025, "rewards/rejected": -6.368589878082275, "step": 1786 }, { "epoch": 0.4, "learning_rate": 9.280617250407078e-06, "logits/chosen": -0.8151285648345947, "logits/rejected": -0.7722629308700562, "logps/chosen": -79.96044158935547, "logps/rejected": -115.08149719238281, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": 1.6894524097442627, "rewards/margins": 3.344357967376709, "rewards/rejected": -1.6549056768417358, "step": 1787 }, { "epoch": 0.4, "learning_rate": 9.27969075158493e-06, "logits/chosen": -0.6016903519630432, "logits/rejected": -0.5794713497161865, "logps/chosen": -215.74998474121094, "logps/rejected": -153.08926391601562, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.29904937744140625, "rewards/margins": 6.3012590408325195, "rewards/rejected": -6.002209663391113, "step": 1788 }, { "epoch": 0.4, "learning_rate": 9.278763702832809e-06, "logits/chosen": -1.1159679889678955, "logits/rejected": -1.1118587255477905, "logps/chosen": -86.06767272949219, "logps/rejected": -36.823368072509766, "loss": 0.4087, "rewards/accuracies": 0.0, "rewards/chosen": -1.9225739240646362, "rewards/margins": -0.21813762187957764, "rewards/rejected": -1.7044363021850586, "step": 1789 }, { "epoch": 0.4, "learning_rate": 9.277836104269837e-06, "logits/chosen": -0.9125413298606873, "logits/rejected": -0.9523801803588867, "logps/chosen": -172.24111938476562, "logps/rejected": -392.8079833984375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.7722976207733154, "rewards/margins": 10.397850036621094, "rewards/rejected": -13.170147895812988, "step": 1790 }, { "epoch": 0.4, "learning_rate": 9.276907956015212e-06, "logits/chosen": -0.9972127079963684, "logits/rejected": -1.0074491500854492, "logps/chosen": -161.21685791015625, "logps/rejected": -235.08035278320312, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": 0.3865219056606293, "rewards/margins": 6.203671455383301, "rewards/rejected": -5.817149639129639, "step": 1791 }, { "epoch": 0.4, "learning_rate": 9.275979258188192e-06, "logits/chosen": -0.7748920321464539, "logits/rejected": -0.23740701377391815, "logps/chosen": -115.70758819580078, "logps/rejected": -223.7756805419922, "loss": 0.8545, "rewards/accuracies": 1.0, "rewards/chosen": -2.5245919227600098, "rewards/margins": 1.6589698791503906, "rewards/rejected": -4.1835618019104, "step": 1792 }, { "epoch": 0.4, "learning_rate": 9.275050010908118e-06, "logits/chosen": -0.8588098287582397, "logits/rejected": -0.8068738579750061, "logps/chosen": -168.59716796875, "logps/rejected": -151.56536865234375, "loss": 0.1466, "rewards/accuracies": 1.0, "rewards/chosen": -2.956063985824585, "rewards/margins": 1.076972246170044, "rewards/rejected": -4.033036231994629, "step": 1793 }, { "epoch": 0.4, "learning_rate": 9.274120214294395e-06, "logits/chosen": -1.161794662475586, "logits/rejected": -1.0889239311218262, "logps/chosen": -129.23565673828125, "logps/rejected": -281.62506103515625, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": -2.5560073852539062, "rewards/margins": 3.2074294090270996, "rewards/rejected": -5.763436794281006, "step": 1794 }, { "epoch": 0.4, "learning_rate": 9.273189868466499e-06, "logits/chosen": -0.9458959102630615, "logits/rejected": -0.9458959102630615, "logps/chosen": -200.57586669921875, "logps/rejected": -200.57586669921875, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.980749607086182, "rewards/margins": 0.0, "rewards/rejected": -4.980749607086182, "step": 1795 }, { "epoch": 0.4, "learning_rate": 9.272258973543977e-06, "logits/chosen": -0.5212450623512268, "logits/rejected": -0.49847128987312317, "logps/chosen": -191.95059204101562, "logps/rejected": -196.16729736328125, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -6.090638637542725, "rewards/margins": 4.510560512542725, "rewards/rejected": -10.60119915008545, "step": 1796 }, { "epoch": 0.4, "learning_rate": 9.271327529646447e-06, "logits/chosen": -1.097788691520691, "logits/rejected": -1.1378337144851685, "logps/chosen": -245.81130981445312, "logps/rejected": -172.46914672851562, "loss": 0.0676, "rewards/accuracies": 1.0, "rewards/chosen": -0.7022796869277954, "rewards/margins": 1.933796763420105, "rewards/rejected": -2.6360764503479004, "step": 1797 }, { "epoch": 0.4, "learning_rate": 9.270395536893599e-06, "logits/chosen": -0.8941379189491272, "logits/rejected": -0.8875806927680969, "logps/chosen": -97.5412368774414, "logps/rejected": -165.65866088867188, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -2.268561601638794, "rewards/margins": 5.150568008422852, "rewards/rejected": -7.419129371643066, "step": 1798 }, { "epoch": 0.4, "learning_rate": 9.269462995405189e-06, "logits/chosen": -0.7554720044136047, "logits/rejected": -0.8890108466148376, "logps/chosen": -136.09017944335938, "logps/rejected": -478.53375244140625, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -4.614284038543701, "rewards/margins": 22.631330490112305, "rewards/rejected": -27.245615005493164, "step": 1799 }, { "epoch": 0.4, "learning_rate": 9.268529905301049e-06, "logits/chosen": -0.8830046653747559, "logits/rejected": -0.8927912712097168, "logps/chosen": -208.44345092773438, "logps/rejected": -176.39923095703125, "loss": 0.6588, "rewards/accuracies": 0.0, "rewards/chosen": -0.08017425984144211, "rewards/margins": -0.9760971069335938, "rewards/rejected": 0.895922839641571, "step": 1800 }, { "epoch": 0.4, "learning_rate": 9.267596266701076e-06, "logits/chosen": -0.7180566787719727, "logits/rejected": -0.6820715069770813, "logps/chosen": -151.6920928955078, "logps/rejected": -256.3563232421875, "loss": 2.5973, "rewards/accuracies": 0.0, "rewards/chosen": -3.2926690578460693, "rewards/margins": -4.803557872772217, "rewards/rejected": 1.510888695716858, "step": 1801 }, { "epoch": 0.4, "learning_rate": 9.266662079725241e-06, "logits/chosen": -0.8653208017349243, "logits/rejected": -0.8031449317932129, "logps/chosen": -116.48211669921875, "logps/rejected": -132.4453125, "loss": 0.076, "rewards/accuracies": 1.0, "rewards/chosen": -1.0874786376953125, "rewards/margins": 3.0461997985839844, "rewards/rejected": -4.133678436279297, "step": 1802 }, { "epoch": 0.4, "learning_rate": 9.265727344493587e-06, "logits/chosen": -0.8972950577735901, "logits/rejected": -0.9016608595848083, "logps/chosen": -88.67938232421875, "logps/rejected": -132.86509704589844, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -1.46844482421875, "rewards/margins": 3.3926215171813965, "rewards/rejected": -4.8610663414001465, "step": 1803 }, { "epoch": 0.4, "learning_rate": 9.264792061126224e-06, "logits/chosen": -0.850034773349762, "logits/rejected": -0.7908529043197632, "logps/chosen": -222.0225830078125, "logps/rejected": -178.7041015625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.46490174531936646, "rewards/margins": 6.027012348175049, "rewards/rejected": -6.49191427230835, "step": 1804 }, { "epoch": 0.4, "learning_rate": 9.263856229743334e-06, "logits/chosen": -0.744169294834137, "logits/rejected": -0.6929659843444824, "logps/chosen": -96.97982788085938, "logps/rejected": -170.70709228515625, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 0.2575721740722656, "rewards/margins": 4.8928351402282715, "rewards/rejected": -4.635262966156006, "step": 1805 }, { "epoch": 0.4, "learning_rate": 9.262919850465166e-06, "logits/chosen": -1.0439679622650146, "logits/rejected": -0.8310955762863159, "logps/chosen": -200.21310424804688, "logps/rejected": -935.9332275390625, "loss": 0.1076, "rewards/accuracies": 1.0, "rewards/chosen": -0.5574173331260681, "rewards/margins": 80.3272476196289, "rewards/rejected": -80.8846664428711, "step": 1806 }, { "epoch": 0.4, "learning_rate": 9.261982923412046e-06, "logits/chosen": -0.665070652961731, "logits/rejected": -0.34871944785118103, "logps/chosen": -96.94155883789062, "logps/rejected": -552.8894653320312, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -0.6881653070449829, "rewards/margins": 44.93680953979492, "rewards/rejected": -45.62497329711914, "step": 1807 }, { "epoch": 0.4, "learning_rate": 9.261045448704367e-06, "logits/chosen": -0.9152595400810242, "logits/rejected": -0.8908374905586243, "logps/chosen": -56.0573844909668, "logps/rejected": -102.54867553710938, "loss": 0.0648, "rewards/accuracies": 1.0, "rewards/chosen": -0.829511284828186, "rewards/margins": 2.019026756286621, "rewards/rejected": -2.8485381603240967, "step": 1808 }, { "epoch": 0.4, "learning_rate": 9.26010742646259e-06, "logits/chosen": -0.8207032084465027, "logits/rejected": -0.8052092790603638, "logps/chosen": -176.93515014648438, "logps/rejected": -250.55160522460938, "loss": 1.4704, "rewards/accuracies": 1.0, "rewards/chosen": -4.94210958480835, "rewards/margins": 1.2543091773986816, "rewards/rejected": -6.196418762207031, "step": 1809 }, { "epoch": 0.4, "learning_rate": 9.259168856807249e-06, "logits/chosen": -0.8945441842079163, "logits/rejected": -0.8382334113121033, "logps/chosen": -79.13662719726562, "logps/rejected": -146.59237670898438, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -1.3188599348068237, "rewards/margins": 3.6407485008239746, "rewards/rejected": -4.959608554840088, "step": 1810 }, { "epoch": 0.4, "learning_rate": 9.25822973985895e-06, "logits/chosen": -0.7721077799797058, "logits/rejected": -0.7513676285743713, "logps/chosen": -145.13330078125, "logps/rejected": -146.0163116455078, "loss": 0.1137, "rewards/accuracies": 1.0, "rewards/chosen": -1.4341232776641846, "rewards/margins": 2.2079834938049316, "rewards/rejected": -3.642106771469116, "step": 1811 }, { "epoch": 0.4, "learning_rate": 9.257290075738365e-06, "logits/chosen": -1.175022840499878, "logits/rejected": -1.175022840499878, "logps/chosen": -143.23226928710938, "logps/rejected": -143.23226928710938, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -1.8173125982284546, "rewards/margins": 0.0, "rewards/rejected": -1.8173125982284546, "step": 1812 }, { "epoch": 0.4, "learning_rate": 9.25634986456624e-06, "logits/chosen": -1.1120439767837524, "logits/rejected": -1.1019068956375122, "logps/chosen": -180.16213989257812, "logps/rejected": -139.55471801757812, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.8163284659385681, "rewards/margins": 7.101150035858154, "rewards/rejected": -6.284821510314941, "step": 1813 }, { "epoch": 0.4, "learning_rate": 9.25540910646339e-06, "logits/chosen": -0.8474788069725037, "logits/rejected": -0.8847348093986511, "logps/chosen": -241.22947692871094, "logps/rejected": -173.98008728027344, "loss": 0.0447, "rewards/accuracies": 1.0, "rewards/chosen": 0.7057205438613892, "rewards/margins": 4.94207763671875, "rewards/rejected": -4.23635721206665, "step": 1814 }, { "epoch": 0.4, "learning_rate": 9.254467801550699e-06, "logits/chosen": -0.9224382042884827, "logits/rejected": -0.8943818211555481, "logps/chosen": -94.23167419433594, "logps/rejected": -128.55487060546875, "loss": 1.6006, "rewards/accuracies": 1.0, "rewards/chosen": 1.0693588256835938, "rewards/margins": 3.5487160682678223, "rewards/rejected": -2.4793572425842285, "step": 1815 }, { "epoch": 0.4, "learning_rate": 9.253525949949123e-06, "logits/chosen": -0.7602874040603638, "logits/rejected": -0.7602874040603638, "logps/chosen": -81.74368286132812, "logps/rejected": -81.74368286132812, "loss": 0.3565, "rewards/accuracies": 0.0, "rewards/chosen": -1.943311333656311, "rewards/margins": 0.0, "rewards/rejected": -1.943311333656311, "step": 1816 }, { "epoch": 0.4, "learning_rate": 9.252583551779687e-06, "logits/chosen": -0.8624078631401062, "logits/rejected": -0.8376554846763611, "logps/chosen": -164.30026245117188, "logps/rejected": -155.45465087890625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 0.6548004150390625, "rewards/margins": 6.108541965484619, "rewards/rejected": -5.453741550445557, "step": 1817 }, { "epoch": 0.4, "learning_rate": 9.251640607163488e-06, "logits/chosen": -0.6430142521858215, "logits/rejected": -0.6729797720909119, "logps/chosen": -186.6893310546875, "logps/rejected": -219.592041015625, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -5.374060153961182, "rewards/margins": 4.330320835113525, "rewards/rejected": -9.704380989074707, "step": 1818 }, { "epoch": 0.4, "learning_rate": 9.250697116221692e-06, "logits/chosen": -0.8120507597923279, "logits/rejected": -0.8268885016441345, "logps/chosen": -287.8125305175781, "logps/rejected": -145.82376098632812, "loss": 3.6912, "rewards/accuracies": 0.0, "rewards/chosen": -13.173754692077637, "rewards/margins": -7.381722927093506, "rewards/rejected": -5.792031764984131, "step": 1819 }, { "epoch": 0.4, "learning_rate": 9.249753079075534e-06, "logits/chosen": -0.9309261441230774, "logits/rejected": -0.935964047908783, "logps/chosen": -93.12333679199219, "logps/rejected": -158.255615234375, "loss": 0.0473, "rewards/accuracies": 1.0, "rewards/chosen": 0.31635361909866333, "rewards/margins": 2.337186336517334, "rewards/rejected": -2.0208327770233154, "step": 1820 }, { "epoch": 0.4, "learning_rate": 9.248808495846322e-06, "logits/chosen": -0.7739257216453552, "logits/rejected": -0.784515380859375, "logps/chosen": -89.88954162597656, "logps/rejected": -107.10177612304688, "loss": 0.1021, "rewards/accuracies": 1.0, "rewards/chosen": 0.6614479422569275, "rewards/margins": 1.5838418006896973, "rewards/rejected": -0.922393798828125, "step": 1821 }, { "epoch": 0.4, "learning_rate": 9.247863366655434e-06, "logits/chosen": -1.1573985815048218, "logits/rejected": -1.1290737390518188, "logps/chosen": -160.48245239257812, "logps/rejected": -125.70886993408203, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": 0.314126580953598, "rewards/margins": 4.932518005371094, "rewards/rejected": -4.618391513824463, "step": 1822 }, { "epoch": 0.4, "learning_rate": 9.246917691624314e-06, "logits/chosen": -0.9678341746330261, "logits/rejected": -0.9341678023338318, "logps/chosen": -94.38996887207031, "logps/rejected": -196.18362426757812, "loss": 0.429, "rewards/accuracies": 1.0, "rewards/chosen": -1.3698867559432983, "rewards/margins": 3.667497158050537, "rewards/rejected": -5.037384033203125, "step": 1823 }, { "epoch": 0.4, "learning_rate": 9.245971470874477e-06, "logits/chosen": -0.6445565819740295, "logits/rejected": -0.6233219504356384, "logps/chosen": -101.97549438476562, "logps/rejected": -95.58258056640625, "loss": 0.1521, "rewards/accuracies": 1.0, "rewards/chosen": -0.34721070528030396, "rewards/margins": 1.0340392589569092, "rewards/rejected": -1.381250023841858, "step": 1824 }, { "epoch": 0.4, "learning_rate": 9.245024704527517e-06, "logits/chosen": -0.7643442749977112, "logits/rejected": -0.6442616581916809, "logps/chosen": -83.79464721679688, "logps/rejected": -50.61845397949219, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": 1.0605987310409546, "rewards/margins": 3.649566173553467, "rewards/rejected": -2.5889673233032227, "step": 1825 }, { "epoch": 0.4, "learning_rate": 9.244077392705085e-06, "logits/chosen": -0.9783565998077393, "logits/rejected": -0.9986261129379272, "logps/chosen": -167.18544006347656, "logps/rejected": -151.25588989257812, "loss": 0.4424, "rewards/accuracies": 0.0, "rewards/chosen": -0.2255508452653885, "rewards/margins": -0.3460189700126648, "rewards/rejected": 0.1204681396484375, "step": 1826 }, { "epoch": 0.4, "learning_rate": 9.243129535528909e-06, "logits/chosen": -1.349820613861084, "logits/rejected": -1.3758676052093506, "logps/chosen": -146.63723754882812, "logps/rejected": -124.44024658203125, "loss": 0.194, "rewards/accuracies": 1.0, "rewards/chosen": -2.48075795173645, "rewards/margins": 0.8354697227478027, "rewards/rejected": -3.316227674484253, "step": 1827 }, { "epoch": 0.4, "learning_rate": 9.242181133120791e-06, "logits/chosen": -1.2179741859436035, "logits/rejected": -1.28529953956604, "logps/chosen": -136.1138916015625, "logps/rejected": -66.9916000366211, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": 0.19637452065944672, "rewards/margins": 3.613067865371704, "rewards/rejected": -3.4166934490203857, "step": 1828 }, { "epoch": 0.4, "learning_rate": 9.241232185602594e-06, "logits/chosen": -0.8755495548248291, "logits/rejected": -0.9004972577095032, "logps/chosen": -247.66415405273438, "logps/rejected": -205.06771850585938, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": 2.1402862071990967, "rewards/margins": 8.954666137695312, "rewards/rejected": -6.814380168914795, "step": 1829 }, { "epoch": 0.41, "learning_rate": 9.240282693096257e-06, "logits/chosen": -0.5291736721992493, "logits/rejected": -0.5291736721992493, "logps/chosen": -107.82281494140625, "logps/rejected": -107.82281494140625, "loss": 0.3604, "rewards/accuracies": 0.0, "rewards/chosen": -1.6386123895645142, "rewards/margins": 0.0, "rewards/rejected": -1.6386123895645142, "step": 1830 }, { "epoch": 0.41, "learning_rate": 9.239332655723787e-06, "logits/chosen": -1.1342592239379883, "logits/rejected": -1.1762760877609253, "logps/chosen": -199.47982788085938, "logps/rejected": -113.03970336914062, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.9828826785087585, "rewards/margins": 6.207005977630615, "rewards/rejected": -5.224123477935791, "step": 1831 }, { "epoch": 0.41, "learning_rate": 9.238382073607262e-06, "logits/chosen": -0.9034227132797241, "logits/rejected": -0.8998690843582153, "logps/chosen": -139.37762451171875, "logps/rejected": -105.36298370361328, "loss": 0.0614, "rewards/accuracies": 1.0, "rewards/chosen": 1.0466965436935425, "rewards/margins": 2.1982719898223877, "rewards/rejected": -1.1515754461288452, "step": 1832 }, { "epoch": 0.41, "learning_rate": 9.237430946868829e-06, "logits/chosen": -0.7331317067146301, "logits/rejected": -0.6994927525520325, "logps/chosen": -86.21139526367188, "logps/rejected": -132.84481811523438, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.2914322018623352, "rewards/margins": 5.198742866516113, "rewards/rejected": -4.907310485839844, "step": 1833 }, { "epoch": 0.41, "learning_rate": 9.236479275630707e-06, "logits/chosen": -1.0968719720840454, "logits/rejected": -1.0754003524780273, "logps/chosen": -89.052001953125, "logps/rejected": -134.37570190429688, "loss": 0.2896, "rewards/accuracies": 1.0, "rewards/chosen": -0.009790039621293545, "rewards/margins": 1.7784919738769531, "rewards/rejected": -1.788282036781311, "step": 1834 }, { "epoch": 0.41, "learning_rate": 9.235527060015182e-06, "logits/chosen": -0.9099655151367188, "logits/rejected": -0.8871170282363892, "logps/chosen": -84.2685546875, "logps/rejected": -112.8718032836914, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -0.13545837998390198, "rewards/margins": 4.184090614318848, "rewards/rejected": -4.319549083709717, "step": 1835 }, { "epoch": 0.41, "learning_rate": 9.23457430014461e-06, "logits/chosen": -0.9977234601974487, "logits/rejected": -0.9004911184310913, "logps/chosen": -170.3296661376953, "logps/rejected": -263.38262939453125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 2.1759140491485596, "rewards/margins": 5.330116271972656, "rewards/rejected": -3.1542022228240967, "step": 1836 }, { "epoch": 0.41, "learning_rate": 9.233620996141421e-06, "logits/chosen": -1.1889539957046509, "logits/rejected": -1.187712550163269, "logps/chosen": -103.8575439453125, "logps/rejected": -200.89666748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2657928466796875, "rewards/margins": 9.927742004394531, "rewards/rejected": -9.661949157714844, "step": 1837 }, { "epoch": 0.41, "learning_rate": 9.232667148128112e-06, "logits/chosen": -1.34012770652771, "logits/rejected": -1.3192219734191895, "logps/chosen": -81.09016418457031, "logps/rejected": -145.26742553710938, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": 1.3658615350723267, "rewards/margins": 4.760712623596191, "rewards/rejected": -3.394850969314575, "step": 1838 }, { "epoch": 0.41, "learning_rate": 9.231712756227249e-06, "logits/chosen": -1.1111743450164795, "logits/rejected": -1.1022859811782837, "logps/chosen": -113.34722900390625, "logps/rejected": -111.2147216796875, "loss": 0.7454, "rewards/accuracies": 1.0, "rewards/chosen": -3.123511552810669, "rewards/margins": 0.12243032455444336, "rewards/rejected": -3.2459418773651123, "step": 1839 }, { "epoch": 0.41, "learning_rate": 9.23075782056147e-06, "logits/chosen": -1.0873712301254272, "logits/rejected": -1.0724432468414307, "logps/chosen": -83.09808349609375, "logps/rejected": -219.41192626953125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 0.4093994200229645, "rewards/margins": 4.8102569580078125, "rewards/rejected": -4.400857448577881, "step": 1840 }, { "epoch": 0.41, "learning_rate": 9.229802341253482e-06, "logits/chosen": -0.9356270432472229, "logits/rejected": -0.8782628178596497, "logps/chosen": -118.27175903320312, "logps/rejected": -154.22950744628906, "loss": 0.0767, "rewards/accuracies": 1.0, "rewards/chosen": -1.6867507696151733, "rewards/margins": 1.805890679359436, "rewards/rejected": -3.4926414489746094, "step": 1841 }, { "epoch": 0.41, "learning_rate": 9.22884631842606e-06, "logits/chosen": -1.190733551979065, "logits/rejected": -1.1486490964889526, "logps/chosen": -117.48274230957031, "logps/rejected": -235.86207580566406, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 0.7221023440361023, "rewards/margins": 5.5369367599487305, "rewards/rejected": -4.8148345947265625, "step": 1842 }, { "epoch": 0.41, "learning_rate": 9.227889752202052e-06, "logits/chosen": -0.9342259764671326, "logits/rejected": -0.9253163933753967, "logps/chosen": -65.63477325439453, "logps/rejected": -71.13090515136719, "loss": 0.4482, "rewards/accuracies": 1.0, "rewards/chosen": 0.040009308606386185, "rewards/margins": 1.4898719787597656, "rewards/rejected": -1.4498627185821533, "step": 1843 }, { "epoch": 0.41, "learning_rate": 9.226932642704376e-06, "logits/chosen": -1.1354166269302368, "logits/rejected": -1.1228972673416138, "logps/chosen": -190.26290893554688, "logps/rejected": -253.9263458251953, "loss": 0.1776, "rewards/accuracies": 1.0, "rewards/chosen": -1.327215552330017, "rewards/margins": 9.621939659118652, "rewards/rejected": -10.9491548538208, "step": 1844 }, { "epoch": 0.41, "learning_rate": 9.225974990056016e-06, "logits/chosen": -1.0531198978424072, "logits/rejected": -1.0821449756622314, "logps/chosen": -97.71073150634766, "logps/rejected": -101.3007583618164, "loss": 0.2332, "rewards/accuracies": 1.0, "rewards/chosen": -1.3155158758163452, "rewards/margins": 0.5235702991485596, "rewards/rejected": -1.8390861749649048, "step": 1845 }, { "epoch": 0.41, "learning_rate": 9.225016794380027e-06, "logits/chosen": -1.167294979095459, "logits/rejected": -1.1625698804855347, "logps/chosen": -82.89601135253906, "logps/rejected": -122.4671859741211, "loss": 0.1002, "rewards/accuracies": 1.0, "rewards/chosen": -0.48031386733055115, "rewards/margins": 2.130781650543213, "rewards/rejected": -2.611095428466797, "step": 1846 }, { "epoch": 0.41, "learning_rate": 9.22405805579954e-06, "logits/chosen": -1.0733423233032227, "logits/rejected": -1.0937135219573975, "logps/chosen": -180.58383178710938, "logps/rejected": -206.91168212890625, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": 0.6915497183799744, "rewards/margins": 4.488650321960449, "rewards/rejected": -3.797100782394409, "step": 1847 }, { "epoch": 0.41, "learning_rate": 9.223098774437744e-06, "logits/chosen": -1.1444743871688843, "logits/rejected": -0.9720880389213562, "logps/chosen": -80.38906860351562, "logps/rejected": -404.5350341796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.3765830993652344, "rewards/margins": 29.629390716552734, "rewards/rejected": -28.2528076171875, "step": 1848 }, { "epoch": 0.41, "learning_rate": 9.222138950417908e-06, "logits/chosen": -0.9211219549179077, "logits/rejected": -0.9726548194885254, "logps/chosen": -181.47262573242188, "logps/rejected": -107.39315795898438, "loss": 0.0866, "rewards/accuracies": 1.0, "rewards/chosen": 0.3132827877998352, "rewards/margins": 1.6671600341796875, "rewards/rejected": -1.353877305984497, "step": 1849 }, { "epoch": 0.41, "learning_rate": 9.221178583863367e-06, "logits/chosen": -1.1061840057373047, "logits/rejected": -1.1222929954528809, "logps/chosen": -152.81568908691406, "logps/rejected": -157.30575561523438, "loss": 3.0482, "rewards/accuracies": 0.0, "rewards/chosen": -5.680037021636963, "rewards/margins": -3.831040859222412, "rewards/rejected": -1.8489960432052612, "step": 1850 }, { "epoch": 0.41, "learning_rate": 9.220217674897524e-06, "logits/chosen": -1.1016920804977417, "logits/rejected": -1.2269415855407715, "logps/chosen": -320.3564147949219, "logps/rejected": -145.31634521484375, "loss": 0.107, "rewards/accuracies": 1.0, "rewards/chosen": -3.718863010406494, "rewards/margins": 1.4331984519958496, "rewards/rejected": -5.152061462402344, "step": 1851 }, { "epoch": 0.41, "learning_rate": 9.219256223643857e-06, "logits/chosen": -0.9611075520515442, "logits/rejected": -0.909906268119812, "logps/chosen": -157.49484252929688, "logps/rejected": -262.98870849609375, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 2.6058290004730225, "rewards/margins": 7.644744873046875, "rewards/rejected": -5.038916110992432, "step": 1852 }, { "epoch": 0.41, "learning_rate": 9.218294230225908e-06, "logits/chosen": -0.6115972399711609, "logits/rejected": 0.38260674476623535, "logps/chosen": -83.65055847167969, "logps/rejected": -126.8190689086914, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.281771183013916, "rewards/margins": 5.384761810302734, "rewards/rejected": -7.66653299331665, "step": 1853 }, { "epoch": 0.41, "learning_rate": 9.217331694767291e-06, "logits/chosen": -1.2613404989242554, "logits/rejected": -1.1928430795669556, "logps/chosen": -133.7415008544922, "logps/rejected": -205.52755737304688, "loss": 0.0667, "rewards/accuracies": 1.0, "rewards/chosen": 0.2800125181674957, "rewards/margins": 3.13712477684021, "rewards/rejected": -2.857112169265747, "step": 1854 }, { "epoch": 0.41, "learning_rate": 9.21636861739169e-06, "logits/chosen": -1.0327098369598389, "logits/rejected": -0.9191134572029114, "logps/chosen": -155.504150390625, "logps/rejected": -236.83029174804688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.9683640003204346, "rewards/margins": 10.326563835144043, "rewards/rejected": -8.358200073242188, "step": 1855 }, { "epoch": 0.41, "learning_rate": 9.215404998222856e-06, "logits/chosen": -0.8943882584571838, "logits/rejected": -0.9007670283317566, "logps/chosen": -244.18060302734375, "logps/rejected": -103.33568572998047, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 1.9525054693222046, "rewards/margins": 6.1147541999816895, "rewards/rejected": -4.162248611450195, "step": 1856 }, { "epoch": 0.41, "learning_rate": 9.214440837384612e-06, "logits/chosen": -1.046234369277954, "logits/rejected": -0.9823353290557861, "logps/chosen": -124.7820053100586, "logps/rejected": -140.86883544921875, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": -0.6870079040527344, "rewards/margins": 2.610488176345825, "rewards/rejected": -3.2974960803985596, "step": 1857 }, { "epoch": 0.41, "learning_rate": 9.213476135000853e-06, "logits/chosen": -1.2346765995025635, "logits/rejected": -1.2031763792037964, "logps/chosen": -109.61343383789062, "logps/rejected": -218.99591064453125, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -1.029266357421875, "rewards/margins": 3.4617767333984375, "rewards/rejected": -4.4910430908203125, "step": 1858 }, { "epoch": 0.41, "learning_rate": 9.21251089119554e-06, "logits/chosen": -0.7202723622322083, "logits/rejected": -0.6550191640853882, "logps/chosen": -120.25496673583984, "logps/rejected": -106.54574584960938, "loss": 0.047, "rewards/accuracies": 1.0, "rewards/chosen": -1.2105721235275269, "rewards/margins": 2.318068027496338, "rewards/rejected": -3.528640031814575, "step": 1859 }, { "epoch": 0.41, "learning_rate": 9.211545106092706e-06, "logits/chosen": -1.0788767337799072, "logits/rejected": -1.05977463722229, "logps/chosen": -97.280517578125, "logps/rejected": -128.5240478515625, "loss": 0.2114, "rewards/accuracies": 1.0, "rewards/chosen": -0.4890594482421875, "rewards/margins": 0.7577224969863892, "rewards/rejected": -1.2467819452285767, "step": 1860 }, { "epoch": 0.41, "learning_rate": 9.210578779816449e-06, "logits/chosen": -0.9444155097007751, "logits/rejected": -0.9260900616645813, "logps/chosen": -112.7998275756836, "logps/rejected": -99.84938049316406, "loss": 0.6069, "rewards/accuracies": 0.0, "rewards/chosen": -2.3839356899261475, "rewards/margins": -0.8588823080062866, "rewards/rejected": -1.5250533819198608, "step": 1861 }, { "epoch": 0.41, "learning_rate": 9.20961191249094e-06, "logits/chosen": -1.0968413352966309, "logits/rejected": -1.0865201950073242, "logps/chosen": -113.57708740234375, "logps/rejected": -137.5650634765625, "loss": 0.1153, "rewards/accuracies": 1.0, "rewards/chosen": -0.9337868094444275, "rewards/margins": 1.4929511547088623, "rewards/rejected": -2.4267380237579346, "step": 1862 }, { "epoch": 0.41, "learning_rate": 9.208644504240418e-06, "logits/chosen": -1.0914491415023804, "logits/rejected": -1.0689902305603027, "logps/chosen": -96.33568572998047, "logps/rejected": -194.37416076660156, "loss": 0.0669, "rewards/accuracies": 1.0, "rewards/chosen": 0.21711884438991547, "rewards/margins": 1.9654541015625, "rewards/rejected": -1.7483352422714233, "step": 1863 }, { "epoch": 0.41, "learning_rate": 9.207676555189196e-06, "logits/chosen": -1.3967608213424683, "logits/rejected": -1.4390273094177246, "logps/chosen": -111.62554931640625, "logps/rejected": -77.74244689941406, "loss": 0.0712, "rewards/accuracies": 1.0, "rewards/chosen": -0.6602989435195923, "rewards/margins": 3.6867194175720215, "rewards/rejected": -4.347018241882324, "step": 1864 }, { "epoch": 0.41, "learning_rate": 9.206708065461652e-06, "logits/chosen": -1.008971095085144, "logits/rejected": -1.008971095085144, "logps/chosen": -75.40100860595703, "logps/rejected": -75.40100860595703, "loss": 0.928, "rewards/accuracies": 0.0, "rewards/chosen": -0.2478897124528885, "rewards/margins": 0.0, "rewards/rejected": -0.2478897124528885, "step": 1865 }, { "epoch": 0.41, "learning_rate": 9.205739035182236e-06, "logits/chosen": -1.0819650888442993, "logits/rejected": -1.1331660747528076, "logps/chosen": -171.143310546875, "logps/rejected": -66.17931365966797, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 2.360605001449585, "rewards/margins": 6.134260654449463, "rewards/rejected": -3.773655652999878, "step": 1866 }, { "epoch": 0.41, "learning_rate": 9.204769464475462e-06, "logits/chosen": -0.9954478144645691, "logits/rejected": -0.9988707900047302, "logps/chosen": -135.49502563476562, "logps/rejected": -276.3223571777344, "loss": 0.0988, "rewards/accuracies": 1.0, "rewards/chosen": 0.5232788324356079, "rewards/margins": 1.5297698974609375, "rewards/rejected": -1.0064910650253296, "step": 1867 }, { "epoch": 0.41, "learning_rate": 9.20379935346592e-06, "logits/chosen": -0.9469107985496521, "logits/rejected": -0.8667017221450806, "logps/chosen": -60.39186096191406, "logps/rejected": -72.95651245117188, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -0.475912481546402, "rewards/margins": 3.8831052780151367, "rewards/rejected": -4.359017848968506, "step": 1868 }, { "epoch": 0.41, "learning_rate": 9.202828702278265e-06, "logits/chosen": -0.606563925743103, "logits/rejected": -0.606563925743103, "logps/chosen": -37.99409484863281, "logps/rejected": -37.99409484863281, "loss": 0.3487, "rewards/accuracies": 0.0, "rewards/chosen": -2.560407876968384, "rewards/margins": 0.0, "rewards/rejected": -2.560407876968384, "step": 1869 }, { "epoch": 0.41, "learning_rate": 9.201857511037228e-06, "logits/chosen": -1.0608795881271362, "logits/rejected": -1.0071470737457275, "logps/chosen": -277.0941162109375, "logps/rejected": -252.12966918945312, "loss": 1.4, "rewards/accuracies": 0.0, "rewards/chosen": -4.146336555480957, "rewards/margins": -2.722403049468994, "rewards/rejected": -1.4239333868026733, "step": 1870 }, { "epoch": 0.41, "learning_rate": 9.200885779867601e-06, "logits/chosen": -0.7994673848152161, "logits/rejected": -0.6940312385559082, "logps/chosen": -185.270263671875, "logps/rejected": -310.7125244140625, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": -2.533709764480591, "rewards/margins": 8.188925743103027, "rewards/rejected": -10.722635269165039, "step": 1871 }, { "epoch": 0.41, "learning_rate": 9.199913508894251e-06, "logits/chosen": -0.676282525062561, "logits/rejected": -0.6447104215621948, "logps/chosen": -188.25320434570312, "logps/rejected": -192.12619018554688, "loss": 0.0803, "rewards/accuracies": 1.0, "rewards/chosen": -3.853977918624878, "rewards/margins": 1.8218767642974854, "rewards/rejected": -5.675854682922363, "step": 1872 }, { "epoch": 0.41, "learning_rate": 9.198940698242108e-06, "logits/chosen": -1.2620145082473755, "logits/rejected": -1.0962094068527222, "logps/chosen": -152.51536560058594, "logps/rejected": -362.9957275390625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": 0.5893630981445312, "rewards/margins": 6.984190464019775, "rewards/rejected": -6.394827365875244, "step": 1873 }, { "epoch": 0.41, "learning_rate": 9.197967348036182e-06, "logits/chosen": -0.8621849417686462, "logits/rejected": -0.8775761723518372, "logps/chosen": -112.97508239746094, "logps/rejected": -65.43914794921875, "loss": 0.0611, "rewards/accuracies": 1.0, "rewards/chosen": -1.5814933776855469, "rewards/margins": 2.058779239654541, "rewards/rejected": -3.640272617340088, "step": 1874 }, { "epoch": 0.42, "learning_rate": 9.196993458401544e-06, "logits/chosen": -0.7757054567337036, "logits/rejected": -0.8647416234016418, "logps/chosen": -256.92431640625, "logps/rejected": -166.85093688964844, "loss": 0.2648, "rewards/accuracies": 1.0, "rewards/chosen": -1.9239898920059204, "rewards/margins": 0.449849009513855, "rewards/rejected": -2.3738389015197754, "step": 1875 }, { "epoch": 0.42, "learning_rate": 9.196019029463335e-06, "logits/chosen": -0.8906170129776001, "logits/rejected": -0.8685429692268372, "logps/chosen": -107.1807861328125, "logps/rejected": -160.81944274902344, "loss": 0.256, "rewards/accuracies": 1.0, "rewards/chosen": -0.448294073343277, "rewards/margins": 3.740255832672119, "rewards/rejected": -4.188549995422363, "step": 1876 }, { "epoch": 0.42, "learning_rate": 9.195044061346767e-06, "logits/chosen": -1.008151888847351, "logits/rejected": -0.3308207392692566, "logps/chosen": -96.718994140625, "logps/rejected": -385.91943359375, "loss": 0.0737, "rewards/accuracies": 1.0, "rewards/chosen": 1.230871558189392, "rewards/margins": 31.55808448791504, "rewards/rejected": -30.327213287353516, "step": 1877 }, { "epoch": 0.42, "learning_rate": 9.194068554177123e-06, "logits/chosen": -0.8215115666389465, "logits/rejected": -0.8654410243034363, "logps/chosen": -196.33364868164062, "logps/rejected": -117.80170440673828, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": 4.524273872375488, "rewards/margins": 3.865445852279663, "rewards/rejected": 0.6588279604911804, "step": 1878 }, { "epoch": 0.42, "learning_rate": 9.19309250807975e-06, "logits/chosen": -1.2254276275634766, "logits/rejected": -1.1558958292007446, "logps/chosen": -82.66181182861328, "logps/rejected": -79.5266342163086, "loss": 0.141, "rewards/accuracies": 1.0, "rewards/chosen": -0.802661120891571, "rewards/margins": 1.2980644702911377, "rewards/rejected": -2.1007256507873535, "step": 1879 }, { "epoch": 0.42, "learning_rate": 9.192115923180071e-06, "logits/chosen": -0.6835007071495056, "logits/rejected": -0.6891935467720032, "logps/chosen": -94.1241455078125, "logps/rejected": -93.05674743652344, "loss": 0.1098, "rewards/accuracies": 1.0, "rewards/chosen": 0.21222610771656036, "rewards/margins": 1.4619988203048706, "rewards/rejected": -1.2497726678848267, "step": 1880 }, { "epoch": 0.42, "learning_rate": 9.191138799603574e-06, "logits/chosen": -0.9642961025238037, "logits/rejected": -0.8956321477890015, "logps/chosen": -84.88604736328125, "logps/rejected": -137.04441833496094, "loss": 0.7627, "rewards/accuracies": 0.0, "rewards/chosen": -0.9406509399414062, "rewards/margins": -1.2601211071014404, "rewards/rejected": 0.31947022676467896, "step": 1881 }, { "epoch": 0.42, "learning_rate": 9.190161137475814e-06, "logits/chosen": -0.7831268906593323, "logits/rejected": -0.7470900416374207, "logps/chosen": -138.0301513671875, "logps/rejected": -479.77239990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0718727111816406, "rewards/margins": 15.93878173828125, "rewards/rejected": -17.01065444946289, "step": 1882 }, { "epoch": 0.42, "learning_rate": 9.189182936922424e-06, "logits/chosen": -1.3337403535842896, "logits/rejected": -1.2710952758789062, "logps/chosen": -86.17891693115234, "logps/rejected": -179.60818481445312, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": -1.3752319812774658, "rewards/margins": 2.943228006362915, "rewards/rejected": -4.318459987640381, "step": 1883 }, { "epoch": 0.42, "learning_rate": 9.188204198069096e-06, "logits/chosen": -0.5582099556922913, "logits/rejected": -0.5582099556922913, "logps/chosen": -102.18843841552734, "logps/rejected": -102.18843841552734, "loss": 0.3559, "rewards/accuracies": 0.0, "rewards/chosen": -3.5654945373535156, "rewards/margins": 0.0, "rewards/rejected": -3.5654945373535156, "step": 1884 }, { "epoch": 0.42, "learning_rate": 9.187224921041595e-06, "logits/chosen": -1.1128047704696655, "logits/rejected": -1.0684781074523926, "logps/chosen": -189.13502502441406, "logps/rejected": -276.68939208984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.9413880109786987, "rewards/margins": 8.920591354370117, "rewards/rejected": -6.979203701019287, "step": 1885 }, { "epoch": 0.42, "learning_rate": 9.186245105965758e-06, "logits/chosen": -0.8848252296447754, "logits/rejected": -0.859778642654419, "logps/chosen": -106.56049346923828, "logps/rejected": -178.73658752441406, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": 0.0006683349492959678, "rewards/margins": 4.3729753494262695, "rewards/rejected": -4.372306823730469, "step": 1886 }, { "epoch": 0.42, "learning_rate": 9.18526475296749e-06, "logits/chosen": -1.104217290878296, "logits/rejected": -0.898318886756897, "logps/chosen": -127.85846710205078, "logps/rejected": -131.2340545654297, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": -1.446936011314392, "rewards/margins": 4.369839668273926, "rewards/rejected": -5.816775798797607, "step": 1887 }, { "epoch": 0.42, "learning_rate": 9.184283862172763e-06, "logits/chosen": -0.8081574440002441, "logits/rejected": -0.8133285045623779, "logps/chosen": -98.028564453125, "logps/rejected": -126.37415313720703, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/chosen": -1.1574859619140625, "rewards/margins": 2.8583178520202637, "rewards/rejected": -4.015803813934326, "step": 1888 }, { "epoch": 0.42, "learning_rate": 9.183302433707616e-06, "logits/chosen": -1.0089631080627441, "logits/rejected": -1.0010732412338257, "logps/chosen": -109.95982360839844, "logps/rejected": -128.08767700195312, "loss": 0.1756, "rewards/accuracies": 1.0, "rewards/chosen": -3.3205978870391846, "rewards/margins": 1.1028907299041748, "rewards/rejected": -4.423488616943359, "step": 1889 }, { "epoch": 0.42, "learning_rate": 9.182320467698164e-06, "logits/chosen": -0.8789641857147217, "logits/rejected": -0.8566042184829712, "logps/chosen": -169.3850555419922, "logps/rejected": -163.04327392578125, "loss": 0.7973, "rewards/accuracies": 0.0, "rewards/chosen": 0.6982040405273438, "rewards/margins": -0.6266158819198608, "rewards/rejected": 1.3248199224472046, "step": 1890 }, { "epoch": 0.42, "learning_rate": 9.181337964270585e-06, "logits/chosen": -1.123925805091858, "logits/rejected": -1.1646052598953247, "logps/chosen": -117.7681884765625, "logps/rejected": -84.44613647460938, "loss": 0.0422, "rewards/accuracies": 1.0, "rewards/chosen": -1.7805839776992798, "rewards/margins": 2.4318199157714844, "rewards/rejected": -4.212403774261475, "step": 1891 }, { "epoch": 0.42, "learning_rate": 9.180354923551129e-06, "logits/chosen": -1.2105145454406738, "logits/rejected": -1.144469141960144, "logps/chosen": -95.49205017089844, "logps/rejected": -155.83432006835938, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": 0.30243682861328125, "rewards/margins": 5.777836799621582, "rewards/rejected": -5.475399971008301, "step": 1892 }, { "epoch": 0.42, "learning_rate": 9.179371345666115e-06, "logits/chosen": -0.903102457523346, "logits/rejected": -0.9191755652427673, "logps/chosen": -166.981201171875, "logps/rejected": -179.03018188476562, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 2.2951018810272217, "rewards/margins": 4.019862174987793, "rewards/rejected": -1.7247604131698608, "step": 1893 }, { "epoch": 0.42, "learning_rate": 9.178387230741932e-06, "logits/chosen": -1.088532567024231, "logits/rejected": -1.027927041053772, "logps/chosen": -89.87687683105469, "logps/rejected": -136.70257568359375, "loss": 0.0621, "rewards/accuracies": 1.0, "rewards/chosen": -1.2534866333007812, "rewards/margins": 2.0244948863983154, "rewards/rejected": -3.2779815196990967, "step": 1894 }, { "epoch": 0.42, "learning_rate": 9.177402578905032e-06, "logits/chosen": -1.0615553855895996, "logits/rejected": -1.0805543661117554, "logps/chosen": -62.52286148071289, "logps/rejected": -95.23391723632812, "loss": 0.0921, "rewards/accuracies": 1.0, "rewards/chosen": -0.9747859835624695, "rewards/margins": 1.8916492462158203, "rewards/rejected": -2.8664352893829346, "step": 1895 }, { "epoch": 0.42, "learning_rate": 9.176417390281944e-06, "logits/chosen": -0.9772823452949524, "logits/rejected": -0.9726414680480957, "logps/chosen": -114.84500122070312, "logps/rejected": -177.72683715820312, "loss": 0.092, "rewards/accuracies": 1.0, "rewards/chosen": -1.9856857061386108, "rewards/margins": 1.5994309186935425, "rewards/rejected": -3.5851166248321533, "step": 1896 }, { "epoch": 0.42, "learning_rate": 9.17543166499926e-06, "logits/chosen": -0.740247905254364, "logits/rejected": -0.6623023152351379, "logps/chosen": -88.3662109375, "logps/rejected": -214.04696655273438, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.1905059814453125, "rewards/margins": 5.717309474945068, "rewards/rejected": -5.526803493499756, "step": 1897 }, { "epoch": 0.42, "learning_rate": 9.174445403183645e-06, "logits/chosen": -0.8804771304130554, "logits/rejected": -0.8422958254814148, "logps/chosen": -206.66128540039062, "logps/rejected": -268.94781494140625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.36783143877983093, "rewards/margins": 7.920716762542725, "rewards/rejected": -7.55288553237915, "step": 1898 }, { "epoch": 0.42, "learning_rate": 9.173458604961832e-06, "logits/chosen": -0.9468697309494019, "logits/rejected": -0.9343693852424622, "logps/chosen": -77.42044067382812, "logps/rejected": -105.04197692871094, "loss": 0.1148, "rewards/accuracies": 1.0, "rewards/chosen": -0.3662582337856293, "rewards/margins": 1.4104042053222656, "rewards/rejected": -1.7766624689102173, "step": 1899 }, { "epoch": 0.42, "learning_rate": 9.17247127046062e-06, "logits/chosen": -0.8705583810806274, "logits/rejected": -0.8091421127319336, "logps/chosen": -176.36782836914062, "logps/rejected": -155.2460479736328, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": 0.31190186738967896, "rewards/margins": 6.0867695808410645, "rewards/rejected": -5.774867534637451, "step": 1900 }, { "epoch": 0.42, "learning_rate": 9.17148339980688e-06, "logits/chosen": -0.6397534608840942, "logits/rejected": -0.6392412185668945, "logps/chosen": -80.77420043945312, "logps/rejected": -108.03089904785156, "loss": 0.1166, "rewards/accuracies": 1.0, "rewards/chosen": -0.07100220024585724, "rewards/margins": 1.7467399835586548, "rewards/rejected": -1.8177422285079956, "step": 1901 }, { "epoch": 0.42, "learning_rate": 9.170494993127552e-06, "logits/chosen": -1.0753010511398315, "logits/rejected": -1.122775673866272, "logps/chosen": -196.9801025390625, "logps/rejected": -161.21197509765625, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": 2.704737901687622, "rewards/margins": 6.219534397125244, "rewards/rejected": -3.514796495437622, "step": 1902 }, { "epoch": 0.42, "learning_rate": 9.169506050549641e-06, "logits/chosen": -0.7685219645500183, "logits/rejected": -0.8162807822227478, "logps/chosen": -285.49676513671875, "logps/rejected": -143.54769897460938, "loss": 0.1759, "rewards/accuracies": 1.0, "rewards/chosen": -3.4754669666290283, "rewards/margins": 0.8641235828399658, "rewards/rejected": -4.339590549468994, "step": 1903 }, { "epoch": 0.42, "learning_rate": 9.168516572200227e-06, "logits/chosen": -1.0824447870254517, "logits/rejected": -1.082959771156311, "logps/chosen": -112.92841339111328, "logps/rejected": -125.27084350585938, "loss": 0.4621, "rewards/accuracies": 0.0, "rewards/chosen": 0.814038097858429, "rewards/margins": -0.3797714114189148, "rewards/rejected": 1.1938095092773438, "step": 1904 }, { "epoch": 0.42, "learning_rate": 9.167526558206455e-06, "logits/chosen": -1.5458375215530396, "logits/rejected": -1.558137059211731, "logps/chosen": -116.16580963134766, "logps/rejected": -122.32015228271484, "loss": 0.0277, "rewards/accuracies": 1.0, "rewards/chosen": 0.5099380612373352, "rewards/margins": 3.0710930824279785, "rewards/rejected": -2.561155080795288, "step": 1905 }, { "epoch": 0.42, "learning_rate": 9.166536008695536e-06, "logits/chosen": -0.827436625957489, "logits/rejected": -0.7812153697013855, "logps/chosen": -109.98896789550781, "logps/rejected": -171.31057739257812, "loss": 0.0482, "rewards/accuracies": 1.0, "rewards/chosen": -1.4310722351074219, "rewards/margins": 2.4458749294281006, "rewards/rejected": -3.8769471645355225, "step": 1906 }, { "epoch": 0.42, "learning_rate": 9.165544923794758e-06, "logits/chosen": -1.0786054134368896, "logits/rejected": -0.6642579436302185, "logps/chosen": -166.8287353515625, "logps/rejected": -493.2818908691406, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.03160400316119194, "rewards/margins": 38.29838180541992, "rewards/rejected": -38.329986572265625, "step": 1907 }, { "epoch": 0.42, "learning_rate": 9.164553303631472e-06, "logits/chosen": -0.8788211345672607, "logits/rejected": -0.8340418338775635, "logps/chosen": -84.5162582397461, "logps/rejected": -135.4770965576172, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 0.3062858581542969, "rewards/margins": 4.0114850997924805, "rewards/rejected": -3.7051994800567627, "step": 1908 }, { "epoch": 0.42, "learning_rate": 9.163561148333097e-06, "logits/chosen": -1.0021791458129883, "logits/rejected": -0.9809922575950623, "logps/chosen": -215.33944702148438, "logps/rejected": -102.52102661132812, "loss": 1.219, "rewards/accuracies": 0.0, "rewards/chosen": -2.8895416259765625, "rewards/margins": -2.2265067100524902, "rewards/rejected": -0.6630348563194275, "step": 1909 }, { "epoch": 0.42, "learning_rate": 9.162568458027122e-06, "logits/chosen": -0.8452480435371399, "logits/rejected": -0.8251693248748779, "logps/chosen": -79.71417236328125, "logps/rejected": -198.52833557128906, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -2.055990695953369, "rewards/margins": 2.6655988693237305, "rewards/rejected": -4.7215895652771, "step": 1910 }, { "epoch": 0.42, "learning_rate": 9.16157523284111e-06, "logits/chosen": -0.8581595420837402, "logits/rejected": -0.7553476095199585, "logps/chosen": -293.0492248535156, "logps/rejected": -357.38134765625, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -3.20755934715271, "rewards/margins": 16.797801971435547, "rewards/rejected": -20.005361557006836, "step": 1911 }, { "epoch": 0.42, "learning_rate": 9.16058147290268e-06, "logits/chosen": -0.97173011302948, "logits/rejected": -0.9821203947067261, "logps/chosen": -127.51985168457031, "logps/rejected": -156.62969970703125, "loss": 0.8312, "rewards/accuracies": 1.0, "rewards/chosen": 1.2620849609375, "rewards/margins": 2.7406463623046875, "rewards/rejected": -1.4785614013671875, "step": 1912 }, { "epoch": 0.42, "learning_rate": 9.159587178339535e-06, "logits/chosen": -0.9358523488044739, "logits/rejected": -1.0104328393936157, "logps/chosen": -145.7094268798828, "logps/rejected": -84.6127700805664, "loss": 0.5495, "rewards/accuracies": 1.0, "rewards/chosen": -0.8481735587120056, "rewards/margins": 0.24235612154006958, "rewards/rejected": -1.0905296802520752, "step": 1913 }, { "epoch": 0.42, "learning_rate": 9.158592349279439e-06, "logits/chosen": -1.2869197130203247, "logits/rejected": -1.2010761499404907, "logps/chosen": -136.1649169921875, "logps/rejected": -247.05812072753906, "loss": 0.2368, "rewards/accuracies": 1.0, "rewards/chosen": -1.577050805091858, "rewards/margins": 1.9222136735916138, "rewards/rejected": -3.4992644786834717, "step": 1914 }, { "epoch": 0.42, "learning_rate": 9.157596985850218e-06, "logits/chosen": -0.8770349621772766, "logits/rejected": -0.8720927238464355, "logps/chosen": -104.66950988769531, "logps/rejected": -167.2200164794922, "loss": 0.452, "rewards/accuracies": 1.0, "rewards/chosen": -0.46241456270217896, "rewards/margins": 4.492175102233887, "rewards/rejected": -4.95458984375, "step": 1915 }, { "epoch": 0.42, "learning_rate": 9.156601088179785e-06, "logits/chosen": -1.0177280902862549, "logits/rejected": -0.9820839166641235, "logps/chosen": -174.67041015625, "logps/rejected": -145.14260864257812, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 1.5108215808868408, "rewards/margins": 5.629912376403809, "rewards/rejected": -4.119090557098389, "step": 1916 }, { "epoch": 0.42, "learning_rate": 9.1556046563961e-06, "logits/chosen": -1.0884419679641724, "logits/rejected": -1.0824663639068604, "logps/chosen": -91.14736938476562, "logps/rejected": -112.78184509277344, "loss": 0.1201, "rewards/accuracies": 1.0, "rewards/chosen": -1.3010987043380737, "rewards/margins": 1.3040648698806763, "rewards/rejected": -2.60516357421875, "step": 1917 }, { "epoch": 0.42, "learning_rate": 9.154607690627207e-06, "logits/chosen": -1.0926657915115356, "logits/rejected": -1.0933362245559692, "logps/chosen": -102.52423095703125, "logps/rejected": -106.53570556640625, "loss": 0.4686, "rewards/accuracies": 0.0, "rewards/chosen": -3.2755463123321533, "rewards/margins": -0.44007110595703125, "rewards/rejected": -2.835475206375122, "step": 1918 }, { "epoch": 0.42, "learning_rate": 9.153610191001214e-06, "logits/chosen": -1.0336865186691284, "logits/rejected": -1.010853886604309, "logps/chosen": -104.49050903320312, "logps/rejected": -160.3281707763672, "loss": 1.26, "rewards/accuracies": 1.0, "rewards/chosen": -0.587213933467865, "rewards/margins": 1.6400916576385498, "rewards/rejected": -2.2273056507110596, "step": 1919 }, { "epoch": 0.42, "learning_rate": 9.152612157646297e-06, "logits/chosen": -1.1533948183059692, "logits/rejected": -1.2489237785339355, "logps/chosen": -148.07046508789062, "logps/rejected": -106.48414611816406, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": 1.3146759271621704, "rewards/margins": 3.5590806007385254, "rewards/rejected": -2.2444045543670654, "step": 1920 }, { "epoch": 0.43, "learning_rate": 9.1516135906907e-06, "logits/chosen": -1.2865498065948486, "logits/rejected": -1.1785776615142822, "logps/chosen": -103.8115234375, "logps/rejected": -301.05926513671875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.6388664245605469, "rewards/margins": 13.54672622680664, "rewards/rejected": -15.185592651367188, "step": 1921 }, { "epoch": 0.43, "learning_rate": 9.150614490262736e-06, "logits/chosen": -1.1208281517028809, "logits/rejected": -1.044214129447937, "logps/chosen": -200.9516143798828, "logps/rejected": -330.4552001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.7252365350723267, "rewards/margins": 11.470911026000977, "rewards/rejected": -10.745674133300781, "step": 1922 }, { "epoch": 0.43, "learning_rate": 9.149614856490788e-06, "logits/chosen": -1.0343506336212158, "logits/rejected": -1.0406543016433716, "logps/chosen": -72.15168762207031, "logps/rejected": -99.12279510498047, "loss": 0.3617, "rewards/accuracies": 1.0, "rewards/chosen": 0.11255569756031036, "rewards/margins": 3.4817521572113037, "rewards/rejected": -3.3691964149475098, "step": 1923 }, { "epoch": 0.43, "learning_rate": 9.148614689503307e-06, "logits/chosen": -1.1088906526565552, "logits/rejected": -1.0229555368423462, "logps/chosen": -74.64088439941406, "logps/rejected": -184.58860778808594, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.08438873291015625, "rewards/margins": 6.746946811676025, "rewards/rejected": -6.662558078765869, "step": 1924 }, { "epoch": 0.43, "learning_rate": 9.147613989428809e-06, "logits/chosen": -1.032679796218872, "logits/rejected": -0.9086572527885437, "logps/chosen": -233.33148193359375, "logps/rejected": -274.29681396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.5235137939453125, "rewards/margins": 16.822237014770508, "rewards/rejected": -16.298723220825195, "step": 1925 }, { "epoch": 0.43, "learning_rate": 9.146612756395888e-06, "logits/chosen": -0.740508496761322, "logits/rejected": -0.7246711254119873, "logps/chosen": -94.59861755371094, "logps/rejected": -199.2184600830078, "loss": 0.235, "rewards/accuracies": 1.0, "rewards/chosen": -0.3646865785121918, "rewards/margins": 2.7283356189727783, "rewards/rejected": -3.093022108078003, "step": 1926 }, { "epoch": 0.43, "learning_rate": 9.145610990533193e-06, "logits/chosen": -0.8566211462020874, "logits/rejected": -0.8175091743469238, "logps/chosen": -169.14111328125, "logps/rejected": -279.3019104003906, "loss": 0.0899, "rewards/accuracies": 1.0, "rewards/chosen": 0.6245178580284119, "rewards/margins": 5.533636569976807, "rewards/rejected": -4.90911865234375, "step": 1927 }, { "epoch": 0.43, "learning_rate": 9.144608691969452e-06, "logits/chosen": -0.806951105594635, "logits/rejected": -0.77168208360672, "logps/chosen": -118.28922271728516, "logps/rejected": -179.79437255859375, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": 0.1267799437046051, "rewards/margins": 4.210020065307617, "rewards/rejected": -4.083240032196045, "step": 1928 }, { "epoch": 0.43, "learning_rate": 9.143605860833459e-06, "logits/chosen": -0.7376194596290588, "logits/rejected": -0.6597496867179871, "logps/chosen": -197.2187957763672, "logps/rejected": -190.58035278320312, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": 0.5415054559707642, "rewards/margins": 8.294281005859375, "rewards/rejected": -7.7527756690979, "step": 1929 }, { "epoch": 0.43, "learning_rate": 9.142602497254071e-06, "logits/chosen": -1.0034126043319702, "logits/rejected": -0.5248331427574158, "logps/chosen": -133.58428955078125, "logps/rejected": -334.5732421875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -4.087543487548828, "rewards/margins": 15.660783767700195, "rewards/rejected": -19.748327255249023, "step": 1930 }, { "epoch": 0.43, "learning_rate": 9.141598601360225e-06, "logits/chosen": -1.084568977355957, "logits/rejected": -1.1135863065719604, "logps/chosen": -88.87571716308594, "logps/rejected": -181.93870544433594, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": 0.848749577999115, "rewards/margins": 5.30367374420166, "rewards/rejected": -4.4549241065979, "step": 1931 }, { "epoch": 0.43, "learning_rate": 9.14059417328091e-06, "logits/chosen": -0.8309561014175415, "logits/rejected": -0.8055636882781982, "logps/chosen": -133.2950439453125, "logps/rejected": -147.04193115234375, "loss": 0.3422, "rewards/accuracies": 1.0, "rewards/chosen": -4.4767351150512695, "rewards/margins": 0.8238639831542969, "rewards/rejected": -5.300599098205566, "step": 1932 }, { "epoch": 0.43, "learning_rate": 9.139589213145202e-06, "logits/chosen": -1.010204553604126, "logits/rejected": -1.010120153427124, "logps/chosen": -196.6038818359375, "logps/rejected": -316.27301025390625, "loss": 0.043, "rewards/accuracies": 1.0, "rewards/chosen": 1.60875403881073, "rewards/margins": 2.8927292823791504, "rewards/rejected": -1.2839752435684204, "step": 1933 }, { "epoch": 0.43, "learning_rate": 9.138583721082229e-06, "logits/chosen": -1.0582009553909302, "logits/rejected": -1.0354697704315186, "logps/chosen": -108.72747802734375, "logps/rejected": -203.91452026367188, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -1.0019890069961548, "rewards/margins": 3.941387176513672, "rewards/rejected": -4.943376064300537, "step": 1934 }, { "epoch": 0.43, "learning_rate": 9.137577697221195e-06, "logits/chosen": -0.697363018989563, "logits/rejected": -0.7518077492713928, "logps/chosen": -201.85543823242188, "logps/rejected": -524.26611328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.7409698963165283, "rewards/margins": 37.75129318237305, "rewards/rejected": -39.49226379394531, "step": 1935 }, { "epoch": 0.43, "learning_rate": 9.136571141691376e-06, "logits/chosen": -1.0580458641052246, "logits/rejected": -1.0196990966796875, "logps/chosen": -152.0109100341797, "logps/rejected": -62.40070343017578, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": 2.05082106590271, "rewards/margins": 4.520473480224609, "rewards/rejected": -2.4696521759033203, "step": 1936 }, { "epoch": 0.43, "learning_rate": 9.135564054622108e-06, "logits/chosen": -1.3628520965576172, "logits/rejected": -1.3258594274520874, "logps/chosen": -166.8551025390625, "logps/rejected": -131.52484130859375, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": 1.3892395496368408, "rewards/margins": 5.573836326599121, "rewards/rejected": -4.184597015380859, "step": 1937 }, { "epoch": 0.43, "learning_rate": 9.134556436142801e-06, "logits/chosen": -1.0604692697525024, "logits/rejected": -1.1676291227340698, "logps/chosen": -239.52674865722656, "logps/rejected": -150.4766845703125, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": 4.270408630371094, "rewards/margins": 2.8240065574645996, "rewards/rejected": 1.4464019536972046, "step": 1938 }, { "epoch": 0.43, "learning_rate": 9.133548286382932e-06, "logits/chosen": -0.8141283392906189, "logits/rejected": -0.8505086302757263, "logps/chosen": -123.69242095947266, "logps/rejected": -169.93161010742188, "loss": 1.1013, "rewards/accuracies": 0.0, "rewards/chosen": 0.8526298403739929, "rewards/margins": -0.8900948166847229, "rewards/rejected": 1.7427246570587158, "step": 1939 }, { "epoch": 0.43, "learning_rate": 9.132539605472044e-06, "logits/chosen": -1.3753243684768677, "logits/rejected": -1.4689277410507202, "logps/chosen": -119.7626724243164, "logps/rejected": -46.27186584472656, "loss": 0.0713, "rewards/accuracies": 1.0, "rewards/chosen": 0.8536796569824219, "rewards/margins": 2.307036876678467, "rewards/rejected": -1.4533571004867554, "step": 1940 }, { "epoch": 0.43, "learning_rate": 9.131530393539752e-06, "logits/chosen": -1.0705623626708984, "logits/rejected": -1.0566819906234741, "logps/chosen": -146.57302856445312, "logps/rejected": -137.68325805664062, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.4494338929653168, "rewards/margins": 4.327850341796875, "rewards/rejected": -4.777284145355225, "step": 1941 }, { "epoch": 0.43, "learning_rate": 9.130520650715735e-06, "logits/chosen": -0.8920396566390991, "logits/rejected": -0.8841413259506226, "logps/chosen": -107.98291015625, "logps/rejected": -85.35871124267578, "loss": 0.3889, "rewards/accuracies": 0.0, "rewards/chosen": 0.1436202973127365, "rewards/margins": -0.16249467432498932, "rewards/rejected": 0.30611497163772583, "step": 1942 }, { "epoch": 0.43, "learning_rate": 9.129510377129745e-06, "logits/chosen": -0.7860544323921204, "logits/rejected": -0.7400560975074768, "logps/chosen": -155.45416259765625, "logps/rejected": -120.42231750488281, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": 0.32900696992874146, "rewards/margins": 4.558696269989014, "rewards/rejected": -4.229689121246338, "step": 1943 }, { "epoch": 0.43, "learning_rate": 9.128499572911596e-06, "logits/chosen": -0.847108006477356, "logits/rejected": -0.8399804830551147, "logps/chosen": -75.07176208496094, "logps/rejected": -74.771240234375, "loss": 0.2223, "rewards/accuracies": 1.0, "rewards/chosen": -0.8304428458213806, "rewards/margins": 0.5841746926307678, "rewards/rejected": -1.4146175384521484, "step": 1944 }, { "epoch": 0.43, "learning_rate": 9.12748823819118e-06, "logits/chosen": -0.9111343622207642, "logits/rejected": -0.8837275505065918, "logps/chosen": -111.70262145996094, "logps/rejected": -59.152130126953125, "loss": 0.0437, "rewards/accuracies": 1.0, "rewards/chosen": -1.292720079421997, "rewards/margins": 2.4915530681610107, "rewards/rejected": -3.784273147583008, "step": 1945 }, { "epoch": 0.43, "learning_rate": 9.126476373098446e-06, "logits/chosen": -0.9739618301391602, "logits/rejected": -0.9801694750785828, "logps/chosen": -97.58362579345703, "logps/rejected": -116.90731811523438, "loss": 0.2309, "rewards/accuracies": 1.0, "rewards/chosen": -0.3938736021518707, "rewards/margins": 0.6225457191467285, "rewards/rejected": -1.0164192914962769, "step": 1946 }, { "epoch": 0.43, "learning_rate": 9.125463977763417e-06, "logits/chosen": -1.2545698881149292, "logits/rejected": -1.2702915668487549, "logps/chosen": -198.4169921875, "logps/rejected": -172.30722045898438, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": 1.6475495100021362, "rewards/margins": 5.327188014984131, "rewards/rejected": -3.679638624191284, "step": 1947 }, { "epoch": 0.43, "learning_rate": 9.124451052316185e-06, "logits/chosen": -0.8996198773384094, "logits/rejected": -0.7649639844894409, "logps/chosen": -126.4498062133789, "logps/rejected": -311.96826171875, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": 0.8220481872558594, "rewards/margins": 3.5787529945373535, "rewards/rejected": -2.756704807281494, "step": 1948 }, { "epoch": 0.43, "learning_rate": 9.123437596886909e-06, "logits/chosen": -0.8291579484939575, "logits/rejected": -0.7161425352096558, "logps/chosen": -240.20338439941406, "logps/rejected": -53.508331298828125, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/chosen": 0.34082794189453125, "rewards/margins": 2.3897759914398193, "rewards/rejected": -2.048948049545288, "step": 1949 }, { "epoch": 0.43, "learning_rate": 9.122423611605814e-06, "logits/chosen": -1.0700913667678833, "logits/rejected": -1.031264305114746, "logps/chosen": -169.93362426757812, "logps/rejected": -187.83453369140625, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": 1.5008468627929688, "rewards/margins": 3.366185188293457, "rewards/rejected": -1.8653382062911987, "step": 1950 }, { "epoch": 0.43, "learning_rate": 9.121409096603193e-06, "logits/chosen": -0.8162423372268677, "logits/rejected": -0.7349593639373779, "logps/chosen": -288.473388671875, "logps/rejected": -426.47705078125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4357360899448395, "rewards/margins": 5.819403171539307, "rewards/rejected": -6.255139350891113, "step": 1951 }, { "epoch": 0.43, "learning_rate": 9.120394052009412e-06, "logits/chosen": -0.8450645804405212, "logits/rejected": -0.8306629657745361, "logps/chosen": -120.6712875366211, "logps/rejected": -176.41485595703125, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": 0.28016358613967896, "rewards/margins": 4.212515354156494, "rewards/rejected": -3.93235182762146, "step": 1952 }, { "epoch": 0.43, "learning_rate": 9.1193784779549e-06, "logits/chosen": -0.6212151050567627, "logits/rejected": -0.6212151050567627, "logps/chosen": -141.33982849121094, "logps/rejected": -141.33982849121094, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.693124294281006, "rewards/margins": 0.0, "rewards/rejected": -4.693124294281006, "step": 1953 }, { "epoch": 0.43, "learning_rate": 9.118362374570158e-06, "logits/chosen": -0.9287687540054321, "logits/rejected": -0.9335112571716309, "logps/chosen": -210.70147705078125, "logps/rejected": -137.8113250732422, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": 1.179742455482483, "rewards/margins": 5.5141777992248535, "rewards/rejected": -4.33443546295166, "step": 1954 }, { "epoch": 0.43, "learning_rate": 9.117345741985749e-06, "logits/chosen": -0.9606663584709167, "logits/rejected": -0.9640024900436401, "logps/chosen": -290.089599609375, "logps/rejected": -308.65875244140625, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -3.5764968395233154, "rewards/margins": 2.955986261367798, "rewards/rejected": -6.532483100891113, "step": 1955 }, { "epoch": 0.43, "learning_rate": 9.116328580332309e-06, "logits/chosen": -0.8407177329063416, "logits/rejected": -0.7896308898925781, "logps/chosen": -205.4796142578125, "logps/rejected": -88.44979858398438, "loss": 0.0621, "rewards/accuracies": 1.0, "rewards/chosen": -0.284454345703125, "rewards/margins": 3.2224578857421875, "rewards/rejected": -3.5069122314453125, "step": 1956 }, { "epoch": 0.43, "learning_rate": 9.115310889740545e-06, "logits/chosen": -1.15787672996521, "logits/rejected": -1.0983868837356567, "logps/chosen": -101.862060546875, "logps/rejected": -201.80477905273438, "loss": 0.4487, "rewards/accuracies": 0.0, "rewards/chosen": -0.28260040283203125, "rewards/margins": -0.36876219511032104, "rewards/rejected": 0.08616180717945099, "step": 1957 }, { "epoch": 0.43, "learning_rate": 9.114292670341222e-06, "logits/chosen": -1.291440486907959, "logits/rejected": -1.3409048318862915, "logps/chosen": -96.73643493652344, "logps/rejected": -87.20853424072266, "loss": 0.0863, "rewards/accuracies": 1.0, "rewards/chosen": 1.1211731433868408, "rewards/margins": 2.761216163635254, "rewards/rejected": -1.6400429010391235, "step": 1958 }, { "epoch": 0.43, "learning_rate": 9.113273922265183e-06, "logits/chosen": -0.9281592965126038, "logits/rejected": -0.9281592965126038, "logps/chosen": -156.0240936279297, "logps/rejected": -156.0240936279297, "loss": 0.352, "rewards/accuracies": 0.0, "rewards/chosen": -0.7092117667198181, "rewards/margins": 0.0, "rewards/rejected": -0.7092117667198181, "step": 1959 }, { "epoch": 0.43, "learning_rate": 9.112254645643332e-06, "logits/chosen": -0.9554345011711121, "logits/rejected": -0.9089458584785461, "logps/chosen": -121.6888198852539, "logps/rejected": -193.8926239013672, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.962274968624115, "rewards/margins": 5.14544153213501, "rewards/rejected": -6.1077165603637695, "step": 1960 }, { "epoch": 0.43, "learning_rate": 9.111234840606647e-06, "logits/chosen": -0.7223261594772339, "logits/rejected": -0.6710043549537659, "logps/chosen": -77.77193450927734, "logps/rejected": -286.9787292480469, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.792628526687622, "rewards/margins": 4.312365531921387, "rewards/rejected": -6.104994297027588, "step": 1961 }, { "epoch": 0.43, "learning_rate": 9.110214507286167e-06, "logits/chosen": -0.8321851491928101, "logits/rejected": -0.8118768334388733, "logps/chosen": -84.45039367675781, "logps/rejected": -97.77411651611328, "loss": 0.2637, "rewards/accuracies": 1.0, "rewards/chosen": -0.8056373596191406, "rewards/margins": 0.3644294738769531, "rewards/rejected": -1.1700668334960938, "step": 1962 }, { "epoch": 0.43, "learning_rate": 9.109193645813001e-06, "logits/chosen": -1.1133959293365479, "logits/rejected": -1.1071077585220337, "logps/chosen": -76.03791046142578, "logps/rejected": -83.51316833496094, "loss": 0.575, "rewards/accuracies": 0.0, "rewards/chosen": -0.102752685546875, "rewards/margins": -0.6340881586074829, "rewards/rejected": 0.5313354730606079, "step": 1963 }, { "epoch": 0.43, "learning_rate": 9.10817225631833e-06, "logits/chosen": -1.1133936643600464, "logits/rejected": -1.0425220727920532, "logps/chosen": -90.38072204589844, "logps/rejected": -180.72634887695312, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 0.126362606883049, "rewards/margins": 4.0260820388793945, "rewards/rejected": -3.89971923828125, "step": 1964 }, { "epoch": 0.43, "learning_rate": 9.107150338933403e-06, "logits/chosen": -0.8641608357429504, "logits/rejected": -0.8672128319740295, "logps/chosen": -88.90306091308594, "logps/rejected": -83.92727661132812, "loss": 0.1501, "rewards/accuracies": 1.0, "rewards/chosen": -0.7710480093955994, "rewards/margins": 1.071234941482544, "rewards/rejected": -1.8422828912734985, "step": 1965 }, { "epoch": 0.44, "learning_rate": 9.10612789378953e-06, "logits/chosen": -0.9851691126823425, "logits/rejected": -0.9742575287818909, "logps/chosen": -170.55877685546875, "logps/rejected": -189.71151733398438, "loss": 1.0467, "rewards/accuracies": 0.0, "rewards/chosen": 0.46787720918655396, "rewards/margins": -1.9612884521484375, "rewards/rejected": 2.4291656017303467, "step": 1966 }, { "epoch": 0.44, "learning_rate": 9.105104921018092e-06, "logits/chosen": -1.2103849649429321, "logits/rejected": -1.186773419380188, "logps/chosen": -149.2254638671875, "logps/rejected": -189.51222229003906, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 1.3304290771484375, "rewards/margins": 6.841365337371826, "rewards/rejected": -5.510936260223389, "step": 1967 }, { "epoch": 0.44, "learning_rate": 9.10408142075054e-06, "logits/chosen": -1.116127371788025, "logits/rejected": -1.1428910493850708, "logps/chosen": -214.52880859375, "logps/rejected": -155.1267852783203, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 4.015161037445068, "rewards/margins": 6.93743896484375, "rewards/rejected": -2.9222779273986816, "step": 1968 }, { "epoch": 0.44, "learning_rate": 9.103057393118392e-06, "logits/chosen": -0.8906669616699219, "logits/rejected": -0.8906669616699219, "logps/chosen": -75.29935455322266, "logps/rejected": -75.29935455322266, "loss": 0.3636, "rewards/accuracies": 0.0, "rewards/chosen": -2.888566732406616, "rewards/margins": 0.0, "rewards/rejected": -2.888566732406616, "step": 1969 }, { "epoch": 0.44, "learning_rate": 9.102032838253232e-06, "logits/chosen": -1.3191238641738892, "logits/rejected": -1.2760552167892456, "logps/chosen": -113.42156982421875, "logps/rejected": -166.487060546875, "loss": 0.0596, "rewards/accuracies": 1.0, "rewards/chosen": 0.5497833490371704, "rewards/margins": 6.114813327789307, "rewards/rejected": -5.565030097961426, "step": 1970 }, { "epoch": 0.44, "learning_rate": 9.101007756286713e-06, "logits/chosen": -0.7795015573501587, "logits/rejected": -0.767076313495636, "logps/chosen": -165.33213806152344, "logps/rejected": -63.562957763671875, "loss": 0.7134, "rewards/accuracies": 0.0, "rewards/chosen": -5.5813188552856445, "rewards/margins": -1.1521530151367188, "rewards/rejected": -4.429165840148926, "step": 1971 }, { "epoch": 0.44, "learning_rate": 9.099982147350558e-06, "logits/chosen": -0.7433663010597229, "logits/rejected": -0.7099106907844543, "logps/chosen": -235.986328125, "logps/rejected": -267.7471618652344, "loss": 0.0849, "rewards/accuracies": 1.0, "rewards/chosen": -1.791833519935608, "rewards/margins": 6.759677410125732, "rewards/rejected": -8.55151081085205, "step": 1972 }, { "epoch": 0.44, "learning_rate": 9.098956011576552e-06, "logits/chosen": -0.919037401676178, "logits/rejected": -0.8750900626182556, "logps/chosen": -95.4566421508789, "logps/rejected": -44.82845687866211, "loss": 0.2128, "rewards/accuracies": 1.0, "rewards/chosen": -2.059947967529297, "rewards/margins": 0.6470966339111328, "rewards/rejected": -2.7070446014404297, "step": 1973 }, { "epoch": 0.44, "learning_rate": 9.097929349096551e-06, "logits/chosen": -1.2676490545272827, "logits/rejected": -1.2461241483688354, "logps/chosen": -250.35836791992188, "logps/rejected": -209.4020233154297, "loss": 2.9842, "rewards/accuracies": 1.0, "rewards/chosen": 1.5520660877227783, "rewards/margins": 0.8245254755020142, "rewards/rejected": 0.7275406122207642, "step": 1974 }, { "epoch": 0.44, "learning_rate": 9.09690216004248e-06, "logits/chosen": -0.9207807779312134, "logits/rejected": -0.8666151762008667, "logps/chosen": -106.271240234375, "logps/rejected": -185.77877807617188, "loss": 0.0508, "rewards/accuracies": 1.0, "rewards/chosen": 1.2006027698516846, "rewards/margins": 2.2820236682891846, "rewards/rejected": -1.0814208984375, "step": 1975 }, { "epoch": 0.44, "learning_rate": 9.09587444454633e-06, "logits/chosen": -0.8395170569419861, "logits/rejected": -0.7912511229515076, "logps/chosen": -49.36907196044922, "logps/rejected": -118.40489196777344, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": 0.4598712921142578, "rewards/margins": 3.5557239055633545, "rewards/rejected": -3.0958526134490967, "step": 1976 }, { "epoch": 0.44, "learning_rate": 9.094846202740162e-06, "logits/chosen": -1.143190860748291, "logits/rejected": -1.0642882585525513, "logps/chosen": -127.30810546875, "logps/rejected": -150.52923583984375, "loss": 0.8961, "rewards/accuracies": 0.0, "rewards/chosen": 1.0263031721115112, "rewards/margins": -1.4995545148849487, "rewards/rejected": 2.52585768699646, "step": 1977 }, { "epoch": 0.44, "learning_rate": 9.0938174347561e-06, "logits/chosen": -0.684353768825531, "logits/rejected": -0.7140178084373474, "logps/chosen": -188.2803955078125, "logps/rejected": -143.82395935058594, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": 0.2614807188510895, "rewards/margins": 3.558258295059204, "rewards/rejected": -3.2967774868011475, "step": 1978 }, { "epoch": 0.44, "learning_rate": 9.092788140726338e-06, "logits/chosen": -0.9354683756828308, "logits/rejected": -0.9022724628448486, "logps/chosen": -142.1204376220703, "logps/rejected": -178.88629150390625, "loss": 0.0496, "rewards/accuracies": 1.0, "rewards/chosen": 0.3513351380825043, "rewards/margins": 2.317880153656006, "rewards/rejected": -1.9665451049804688, "step": 1979 }, { "epoch": 0.44, "learning_rate": 9.091758320783139e-06, "logits/chosen": -1.1752463579177856, "logits/rejected": -1.1079961061477661, "logps/chosen": -195.9359130859375, "logps/rejected": -258.9389953613281, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": 1.4392273426055908, "rewards/margins": 4.8799729347229, "rewards/rejected": -3.4407455921173096, "step": 1980 }, { "epoch": 0.44, "learning_rate": 9.090727975058833e-06, "logits/chosen": -0.827614426612854, "logits/rejected": -0.8101723194122314, "logps/chosen": -132.21719360351562, "logps/rejected": -68.38578033447266, "loss": 0.5364, "rewards/accuracies": 0.0, "rewards/chosen": -5.491682529449463, "rewards/margins": -0.6519527435302734, "rewards/rejected": -4.8397297859191895, "step": 1981 }, { "epoch": 0.44, "learning_rate": 9.089697103685815e-06, "logits/chosen": -1.0050532817840576, "logits/rejected": -1.011188268661499, "logps/chosen": -112.17559051513672, "logps/rejected": -121.04092407226562, "loss": 0.4509, "rewards/accuracies": 0.0, "rewards/chosen": 0.2534843385219574, "rewards/margins": -0.3805480897426605, "rewards/rejected": 0.6340324282646179, "step": 1982 }, { "epoch": 0.44, "learning_rate": 9.08866570679655e-06, "logits/chosen": -1.0674062967300415, "logits/rejected": -1.0521961450576782, "logps/chosen": -221.81410217285156, "logps/rejected": -305.7576904296875, "loss": 0.1397, "rewards/accuracies": 1.0, "rewards/chosen": 1.1490356922149658, "rewards/margins": 1.1324585676193237, "rewards/rejected": 0.01657714881002903, "step": 1983 }, { "epoch": 0.44, "learning_rate": 9.087633784523574e-06, "logits/chosen": -1.0802537202835083, "logits/rejected": -1.0769132375717163, "logps/chosen": -139.1804962158203, "logps/rejected": -167.4857177734375, "loss": 0.8569, "rewards/accuracies": 0.0, "rewards/chosen": -4.589792728424072, "rewards/margins": -1.4980385303497314, "rewards/rejected": -3.091754198074341, "step": 1984 }, { "epoch": 0.44, "learning_rate": 9.08660133699948e-06, "logits/chosen": -0.8460392355918884, "logits/rejected": -0.8272959589958191, "logps/chosen": -92.6211929321289, "logps/rejected": -122.78160095214844, "loss": 0.7249, "rewards/accuracies": 1.0, "rewards/chosen": -2.488091230392456, "rewards/margins": 2.3185737133026123, "rewards/rejected": -4.806664943695068, "step": 1985 }, { "epoch": 0.44, "learning_rate": 9.085568364356939e-06, "logits/chosen": -0.9906520247459412, "logits/rejected": -0.9693537950515747, "logps/chosen": -64.98774719238281, "logps/rejected": -105.09010314941406, "loss": 1.0597, "rewards/accuracies": 1.0, "rewards/chosen": -0.7129940390586853, "rewards/margins": 2.198169231414795, "rewards/rejected": -2.911163330078125, "step": 1986 }, { "epoch": 0.44, "learning_rate": 9.084534866728683e-06, "logits/chosen": -1.1918607950210571, "logits/rejected": -1.1249628067016602, "logps/chosen": -147.72987365722656, "logps/rejected": -169.5191192626953, "loss": 0.2664, "rewards/accuracies": 1.0, "rewards/chosen": -0.3176986873149872, "rewards/margins": 0.3520507514476776, "rewards/rejected": -0.6697494387626648, "step": 1987 }, { "epoch": 0.44, "learning_rate": 9.083500844247517e-06, "logits/chosen": -0.7328096032142639, "logits/rejected": -0.714051365852356, "logps/chosen": -80.31304168701172, "logps/rejected": -114.68141174316406, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.20777206122875214, "rewards/margins": 3.0498709678649902, "rewards/rejected": -2.8420989513397217, "step": 1988 }, { "epoch": 0.44, "learning_rate": 9.082466297046308e-06, "logits/chosen": -0.7546836137771606, "logits/rejected": -0.7627481818199158, "logps/chosen": -140.32308959960938, "logps/rejected": -82.2684097290039, "loss": 0.961, "rewards/accuracies": 0.0, "rewards/chosen": -3.1236984729766846, "rewards/margins": -1.7637275457382202, "rewards/rejected": -1.3599709272384644, "step": 1989 }, { "epoch": 0.44, "learning_rate": 9.081431225257994e-06, "logits/chosen": -1.1124967336654663, "logits/rejected": -1.10177481174469, "logps/chosen": -99.12877655029297, "logps/rejected": -88.21839141845703, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 1.1615241765975952, "rewards/margins": 3.5557098388671875, "rewards/rejected": -2.394185781478882, "step": 1990 }, { "epoch": 0.44, "learning_rate": 9.08039562901558e-06, "logits/chosen": -0.8472124338150024, "logits/rejected": -0.8263760805130005, "logps/chosen": -93.46270751953125, "logps/rejected": -95.0511474609375, "loss": 0.0755, "rewards/accuracies": 1.0, "rewards/chosen": -0.23403015732765198, "rewards/margins": 1.893398404121399, "rewards/rejected": -2.1274285316467285, "step": 1991 }, { "epoch": 0.44, "learning_rate": 9.079359508452138e-06, "logits/chosen": -1.089691162109375, "logits/rejected": -1.0963207483291626, "logps/chosen": -84.13661193847656, "logps/rejected": -92.32229614257812, "loss": 0.0599, "rewards/accuracies": 1.0, "rewards/chosen": -0.06112060695886612, "rewards/margins": 2.515495538711548, "rewards/rejected": -2.576616048812866, "step": 1992 }, { "epoch": 0.44, "learning_rate": 9.078322863700803e-06, "logits/chosen": -0.8661462068557739, "logits/rejected": -0.8942986726760864, "logps/chosen": -160.40896606445312, "logps/rejected": -163.3072509765625, "loss": 0.075, "rewards/accuracies": 1.0, "rewards/chosen": 0.9627441763877869, "rewards/margins": 1.8633499145507812, "rewards/rejected": -0.9006057977676392, "step": 1993 }, { "epoch": 0.44, "learning_rate": 9.077285694894786e-06, "logits/chosen": -1.0760163068771362, "logits/rejected": -1.075693130493164, "logps/chosen": -161.5221405029297, "logps/rejected": -201.71974182128906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 3.029013156890869, "rewards/margins": 8.88029956817627, "rewards/rejected": -5.8512864112854, "step": 1994 }, { "epoch": 0.44, "learning_rate": 9.076248002167357e-06, "logits/chosen": -1.016750454902649, "logits/rejected": -0.9812594056129456, "logps/chosen": -136.48052978515625, "logps/rejected": -149.4176025390625, "loss": 0.2453, "rewards/accuracies": 1.0, "rewards/chosen": -0.7252517938613892, "rewards/margins": 0.4566986560821533, "rewards/rejected": -1.1819504499435425, "step": 1995 }, { "epoch": 0.44, "learning_rate": 9.07520978565186e-06, "logits/chosen": -1.261453628540039, "logits/rejected": -1.206499457359314, "logps/chosen": -103.30256652832031, "logps/rejected": -179.71304321289062, "loss": 0.1444, "rewards/accuracies": 1.0, "rewards/chosen": -2.018419027328491, "rewards/margins": 3.9303557872772217, "rewards/rejected": -5.948774814605713, "step": 1996 }, { "epoch": 0.44, "learning_rate": 9.074171045481701e-06, "logits/chosen": -1.0222512483596802, "logits/rejected": -0.9691986441612244, "logps/chosen": -109.82035064697266, "logps/rejected": -226.5087890625, "loss": 0.0467, "rewards/accuracies": 1.0, "rewards/chosen": -0.7803306579589844, "rewards/margins": 5.359522342681885, "rewards/rejected": -6.139853000640869, "step": 1997 }, { "epoch": 0.44, "learning_rate": 9.073131781790358e-06, "logits/chosen": -0.9701687097549438, "logits/rejected": -0.9750620126724243, "logps/chosen": -136.31382751464844, "logps/rejected": -45.22154998779297, "loss": 1.0849, "rewards/accuracies": 0.0, "rewards/chosen": -3.918165683746338, "rewards/margins": -1.9973665475845337, "rewards/rejected": -1.9207991361618042, "step": 1998 }, { "epoch": 0.44, "learning_rate": 9.072091994711372e-06, "logits/chosen": -1.149935007095337, "logits/rejected": -1.1530240774154663, "logps/chosen": -140.24200439453125, "logps/rejected": -220.29098510742188, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 0.9108245968818665, "rewards/margins": 7.40419340133667, "rewards/rejected": -6.493368625640869, "step": 1999 }, { "epoch": 0.44, "learning_rate": 9.071051684378352e-06, "logits/chosen": -0.8418217897415161, "logits/rejected": -0.823843777179718, "logps/chosen": -116.94450378417969, "logps/rejected": -141.88858032226562, "loss": 0.474, "rewards/accuracies": 0.0, "rewards/chosen": -0.553759753704071, "rewards/margins": -0.45015257596969604, "rewards/rejected": -0.103607177734375, "step": 2000 }, { "epoch": 0.44, "learning_rate": 9.07001085092498e-06, "logits/chosen": -0.9366157054901123, "logits/rejected": -0.9140483736991882, "logps/chosen": -114.59430694580078, "logps/rejected": -210.18923950195312, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 0.002773284912109375, "rewards/margins": 4.766404151916504, "rewards/rejected": -4.7636308670043945, "step": 2001 }, { "epoch": 0.44, "learning_rate": 9.068969494484996e-06, "logits/chosen": -0.9816156625747681, "logits/rejected": -0.8944095969200134, "logps/chosen": -178.43643188476562, "logps/rejected": -164.1969451904297, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.2877090573310852, "rewards/margins": 5.660238742828369, "rewards/rejected": -5.37252950668335, "step": 2002 }, { "epoch": 0.44, "learning_rate": 9.067927615192214e-06, "logits/chosen": -0.7819740772247314, "logits/rejected": -0.7781286239624023, "logps/chosen": -153.68280029296875, "logps/rejected": -44.35257339477539, "loss": 1.4158, "rewards/accuracies": 0.0, "rewards/chosen": -3.908647298812866, "rewards/margins": -0.9574964046478271, "rewards/rejected": -2.951150894165039, "step": 2003 }, { "epoch": 0.44, "learning_rate": 9.066885213180512e-06, "logits/chosen": -0.9189444184303284, "logits/rejected": -0.8854358792304993, "logps/chosen": -98.7523193359375, "logps/rejected": -161.33900451660156, "loss": 0.0897, "rewards/accuracies": 1.0, "rewards/chosen": -3.750880479812622, "rewards/margins": 2.5045225620269775, "rewards/rejected": -6.2554030418396, "step": 2004 }, { "epoch": 0.44, "learning_rate": 9.065842288583838e-06, "logits/chosen": -0.6741698384284973, "logits/rejected": -0.6431547999382019, "logps/chosen": -101.77783203125, "logps/rejected": -145.21072387695312, "loss": 0.1525, "rewards/accuracies": 1.0, "rewards/chosen": -0.2927909791469574, "rewards/margins": 1.2014564275741577, "rewards/rejected": -1.4942474365234375, "step": 2005 }, { "epoch": 0.44, "learning_rate": 9.064798841536203e-06, "logits/chosen": -0.8561466336250305, "logits/rejected": -0.5944753289222717, "logps/chosen": -84.18832397460938, "logps/rejected": -506.44110107421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.2550621032714844, "rewards/margins": 38.50334930419922, "rewards/rejected": -38.7584114074707, "step": 2006 }, { "epoch": 0.44, "learning_rate": 9.063754872171686e-06, "logits/chosen": -0.8010655045509338, "logits/rejected": -0.7768935561180115, "logps/chosen": -94.93577575683594, "logps/rejected": -145.5498046875, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -1.1648911237716675, "rewards/margins": 2.991589069366455, "rewards/rejected": -4.156480312347412, "step": 2007 }, { "epoch": 0.44, "learning_rate": 9.062710380624439e-06, "logits/chosen": -0.9989862442016602, "logits/rejected": -1.065995454788208, "logps/chosen": -144.680908203125, "logps/rejected": -86.09800720214844, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": 0.39498597383499146, "rewards/margins": 3.1007354259490967, "rewards/rejected": -2.70574951171875, "step": 2008 }, { "epoch": 0.44, "learning_rate": 9.061665367028676e-06, "logits/chosen": -0.8042141199111938, "logits/rejected": -0.8210054039955139, "logps/chosen": -101.57223510742188, "logps/rejected": -106.90481567382812, "loss": 0.1574, "rewards/accuracies": 1.0, "rewards/chosen": -0.36464691162109375, "rewards/margins": 0.9945366382598877, "rewards/rejected": -1.3591835498809814, "step": 2009 }, { "epoch": 0.44, "learning_rate": 9.060619831518676e-06, "logits/chosen": -0.8205317258834839, "logits/rejected": -0.7942036986351013, "logps/chosen": -133.30172729492188, "logps/rejected": -167.44049072265625, "loss": 0.0513, "rewards/accuracies": 1.0, "rewards/chosen": -4.31082010269165, "rewards/margins": 2.230778694152832, "rewards/rejected": -6.541598796844482, "step": 2010 }, { "epoch": 0.45, "learning_rate": 9.05957377422879e-06, "logits/chosen": -1.0177249908447266, "logits/rejected": -0.9741158485412598, "logps/chosen": -156.9927978515625, "logps/rejected": -226.26470947265625, "loss": 0.1494, "rewards/accuracies": 1.0, "rewards/chosen": -4.800835609436035, "rewards/margins": 1.0545568466186523, "rewards/rejected": -5.8553924560546875, "step": 2011 }, { "epoch": 0.45, "learning_rate": 9.058527195293431e-06, "logits/chosen": -0.8213894963264465, "logits/rejected": -0.8047638535499573, "logps/chosen": -120.83689880371094, "logps/rejected": -56.454376220703125, "loss": 0.1207, "rewards/accuracies": 1.0, "rewards/chosen": -0.4604942500591278, "rewards/margins": 1.7810521125793457, "rewards/rejected": -2.241546392440796, "step": 2012 }, { "epoch": 0.45, "learning_rate": 9.057480094847085e-06, "logits/chosen": -0.8021037578582764, "logits/rejected": -0.7570480108261108, "logps/chosen": -154.08566284179688, "logps/rejected": -205.62570190429688, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": 1.9895050525665283, "rewards/margins": 7.538657188415527, "rewards/rejected": -5.54915189743042, "step": 2013 }, { "epoch": 0.45, "learning_rate": 9.056432473024302e-06, "logits/chosen": -0.8799383044242859, "logits/rejected": -0.922566294670105, "logps/chosen": -190.11624145507812, "logps/rejected": -111.03924560546875, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": 0.1216278076171875, "rewards/margins": 3.5157203674316406, "rewards/rejected": -3.394092559814453, "step": 2014 }, { "epoch": 0.45, "learning_rate": 9.055384329959695e-06, "logits/chosen": -1.1335606575012207, "logits/rejected": -1.18775475025177, "logps/chosen": -281.71282958984375, "logps/rejected": -153.2000732421875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 2.1188995838165283, "rewards/margins": 6.127524375915527, "rewards/rejected": -4.00862455368042, "step": 2015 }, { "epoch": 0.45, "learning_rate": 9.054335665787952e-06, "logits/chosen": -0.6535094380378723, "logits/rejected": -0.6535094380378723, "logps/chosen": -79.20772552490234, "logps/rejected": -79.20772552490234, "loss": 0.3734, "rewards/accuracies": 0.0, "rewards/chosen": -1.0292503833770752, "rewards/margins": 0.0, "rewards/rejected": -1.0292503833770752, "step": 2016 }, { "epoch": 0.45, "learning_rate": 9.053286480643822e-06, "logits/chosen": -1.230778455734253, "logits/rejected": -1.172924280166626, "logps/chosen": -137.19113159179688, "logps/rejected": -204.4752197265625, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": -0.08785553276538849, "rewards/margins": 2.7954468727111816, "rewards/rejected": -2.8833024501800537, "step": 2017 }, { "epoch": 0.45, "learning_rate": 9.052236774662123e-06, "logits/chosen": -1.1908173561096191, "logits/rejected": -1.2512617111206055, "logps/chosen": -232.7020263671875, "logps/rejected": -183.2966766357422, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/chosen": 1.212286353111267, "rewards/margins": 7.132697582244873, "rewards/rejected": -5.920411109924316, "step": 2018 }, { "epoch": 0.45, "learning_rate": 9.051186547977739e-06, "logits/chosen": -0.8944862484931946, "logits/rejected": -0.8646689057350159, "logps/chosen": -139.25083923339844, "logps/rejected": -237.13836669921875, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -3.012197971343994, "rewards/margins": 5.09822416305542, "rewards/rejected": -8.110422134399414, "step": 2019 }, { "epoch": 0.45, "learning_rate": 9.050135800725623e-06, "logits/chosen": -0.6534239649772644, "logits/rejected": -0.6700147390365601, "logps/chosen": -112.59422302246094, "logps/rejected": -106.47950744628906, "loss": 1.3044, "rewards/accuracies": 0.0, "rewards/chosen": -4.402484893798828, "rewards/margins": -0.7521536350250244, "rewards/rejected": -3.6503312587738037, "step": 2020 }, { "epoch": 0.45, "learning_rate": 9.049084533040794e-06, "logits/chosen": -1.2778527736663818, "logits/rejected": -1.2031469345092773, "logps/chosen": -179.45143127441406, "logps/rejected": -244.70269775390625, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -0.3191330134868622, "rewards/margins": 8.818585395812988, "rewards/rejected": -9.137718200683594, "step": 2021 }, { "epoch": 0.45, "learning_rate": 9.048032745058335e-06, "logits/chosen": -0.7457095980644226, "logits/rejected": -0.7337697744369507, "logps/chosen": -111.30272674560547, "logps/rejected": -110.68504333496094, "loss": 0.1761, "rewards/accuracies": 1.0, "rewards/chosen": -1.0391181707382202, "rewards/margins": 1.0236626863479614, "rewards/rejected": -2.0627808570861816, "step": 2022 }, { "epoch": 0.45, "learning_rate": 9.0469804369134e-06, "logits/chosen": -1.0279873609542847, "logits/rejected": -0.9354423880577087, "logps/chosen": -251.03045654296875, "logps/rejected": -218.08895874023438, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": 3.6840882301330566, "rewards/margins": 12.159809112548828, "rewards/rejected": -8.475720405578613, "step": 2023 }, { "epoch": 0.45, "learning_rate": 9.045927608741207e-06, "logits/chosen": -1.2000211477279663, "logits/rejected": -1.143187403678894, "logps/chosen": -140.88247680664062, "logps/rejected": -189.31329345703125, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": 1.8333313465118408, "rewards/margins": 2.965564012527466, "rewards/rejected": -1.132232666015625, "step": 2024 }, { "epoch": 0.45, "learning_rate": 9.044874260677043e-06, "logits/chosen": -1.1269630193710327, "logits/rejected": -1.109779715538025, "logps/chosen": -92.94857788085938, "logps/rejected": -135.48928833007812, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/chosen": 0.06125640869140625, "rewards/margins": 4.061374187469482, "rewards/rejected": -4.000117778778076, "step": 2025 }, { "epoch": 0.45, "learning_rate": 9.043820392856259e-06, "logits/chosen": -1.0753017663955688, "logits/rejected": -1.0428829193115234, "logps/chosen": -124.73844909667969, "logps/rejected": -169.20843505859375, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -2.6900315284729004, "rewards/margins": 2.656424045562744, "rewards/rejected": -5.3464555740356445, "step": 2026 }, { "epoch": 0.45, "learning_rate": 9.042766005414278e-06, "logits/chosen": -1.006872534751892, "logits/rejected": -0.8684190511703491, "logps/chosen": -309.50335693359375, "logps/rejected": -277.7834167480469, "loss": 0.0538, "rewards/accuracies": 1.0, "rewards/chosen": -2.315600633621216, "rewards/margins": 14.249396324157715, "rewards/rejected": -16.56499671936035, "step": 2027 }, { "epoch": 0.45, "learning_rate": 9.041711098486583e-06, "logits/chosen": -0.9131839275360107, "logits/rejected": -0.9363884925842285, "logps/chosen": -84.41394805908203, "logps/rejected": -111.73548889160156, "loss": 0.5412, "rewards/accuracies": 0.0, "rewards/chosen": -0.876483142375946, "rewards/margins": -0.666771650314331, "rewards/rejected": -0.2097114622592926, "step": 2028 }, { "epoch": 0.45, "learning_rate": 9.040655672208727e-06, "logits/chosen": -0.7464302182197571, "logits/rejected": -0.6571162343025208, "logps/chosen": -96.98726654052734, "logps/rejected": -180.4398193359375, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": 0.5873298645019531, "rewards/margins": 3.9965996742248535, "rewards/rejected": -3.4092698097229004, "step": 2029 }, { "epoch": 0.45, "learning_rate": 9.03959972671633e-06, "logits/chosen": -1.0173265933990479, "logits/rejected": -1.228744626045227, "logps/chosen": -193.38198852539062, "logps/rejected": -165.9031982421875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 2.4832794666290283, "rewards/margins": 5.851923942565918, "rewards/rejected": -3.3686447143554688, "step": 2030 }, { "epoch": 0.45, "learning_rate": 9.03854326214508e-06, "logits/chosen": -0.855582594871521, "logits/rejected": -0.8260360956192017, "logps/chosen": -156.54013061523438, "logps/rejected": -172.00335693359375, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.9036346673965454, "rewards/margins": 3.9436097145080566, "rewards/rejected": -5.8472442626953125, "step": 2031 }, { "epoch": 0.45, "learning_rate": 9.037486278630729e-06, "logits/chosen": -1.0725066661834717, "logits/rejected": -1.048150897026062, "logps/chosen": -90.23464965820312, "logps/rejected": -84.39802551269531, "loss": 0.1934, "rewards/accuracies": 1.0, "rewards/chosen": 0.6238166689872742, "rewards/margins": 2.8796818256378174, "rewards/rejected": -2.2558650970458984, "step": 2032 }, { "epoch": 0.45, "learning_rate": 9.036428776309096e-06, "logits/chosen": -1.2499439716339111, "logits/rejected": -1.2364643812179565, "logps/chosen": -214.66380310058594, "logps/rejected": -271.3029479980469, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.5004333853721619, "rewards/margins": 5.996758937835693, "rewards/rejected": -6.4971923828125, "step": 2033 }, { "epoch": 0.45, "learning_rate": 9.03537075531607e-06, "logits/chosen": -1.2039262056350708, "logits/rejected": -1.1524999141693115, "logps/chosen": -146.43247985839844, "logps/rejected": -220.1146240234375, "loss": 1.6388, "rewards/accuracies": 0.0, "rewards/chosen": -2.6556413173675537, "rewards/margins": -3.2256486415863037, "rewards/rejected": 0.57000732421875, "step": 2034 }, { "epoch": 0.45, "learning_rate": 9.034312215787603e-06, "logits/chosen": -1.008143424987793, "logits/rejected": -0.7677645683288574, "logps/chosen": -252.1276397705078, "logps/rejected": -480.1026611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.05074157938361168, "rewards/margins": 24.12299346923828, "rewards/rejected": -24.173734664916992, "step": 2035 }, { "epoch": 0.45, "learning_rate": 9.033253157859715e-06, "logits/chosen": -1.0963294506072998, "logits/rejected": -1.0705573558807373, "logps/chosen": -223.6215057373047, "logps/rejected": -272.93243408203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9480209350585938, "rewards/margins": 8.291160583496094, "rewards/rejected": -9.239181518554688, "step": 2036 }, { "epoch": 0.45, "learning_rate": 9.03219358166849e-06, "logits/chosen": -0.9387593269348145, "logits/rejected": -0.930974006652832, "logps/chosen": -121.9220199584961, "logps/rejected": -106.40634155273438, "loss": 0.2247, "rewards/accuracies": 1.0, "rewards/chosen": -0.3695564270019531, "rewards/margins": 2.755993604660034, "rewards/rejected": -3.1255500316619873, "step": 2037 }, { "epoch": 0.45, "learning_rate": 9.031133487350084e-06, "logits/chosen": -0.8907620310783386, "logits/rejected": -0.7870395183563232, "logps/chosen": -59.5543327331543, "logps/rejected": -75.09728240966797, "loss": 0.178, "rewards/accuracies": 1.0, "rewards/chosen": -0.3102840483188629, "rewards/margins": 4.364105224609375, "rewards/rejected": -4.674389362335205, "step": 2038 }, { "epoch": 0.45, "learning_rate": 9.030072875040714e-06, "logits/chosen": -0.9021351337432861, "logits/rejected": -0.9021351337432861, "logps/chosen": -177.8517608642578, "logps/rejected": -177.8517608642578, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.476561069488525, "rewards/margins": 0.0, "rewards/rejected": -4.476561069488525, "step": 2039 }, { "epoch": 0.45, "learning_rate": 9.029011744876669e-06, "logits/chosen": -1.271801471710205, "logits/rejected": -1.2246909141540527, "logps/chosen": -55.785823822021484, "logps/rejected": -161.2954864501953, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -1.593542456626892, "rewards/margins": 3.3709897994995117, "rewards/rejected": -4.964532375335693, "step": 2040 }, { "epoch": 0.45, "learning_rate": 9.027950096994299e-06, "logits/chosen": -0.7334128618240356, "logits/rejected": -0.7185096740722656, "logps/chosen": -143.6699676513672, "logps/rejected": -119.45674896240234, "loss": 0.0436, "rewards/accuracies": 1.0, "rewards/chosen": 0.9804901480674744, "rewards/margins": 4.515023231506348, "rewards/rejected": -3.5345330238342285, "step": 2041 }, { "epoch": 0.45, "learning_rate": 9.026887931530026e-06, "logits/chosen": -0.6674028635025024, "logits/rejected": -0.6570383310317993, "logps/chosen": -99.14494323730469, "logps/rejected": -191.81170654296875, "loss": 0.0716, "rewards/accuracies": 1.0, "rewards/chosen": -1.449170708656311, "rewards/margins": 1.9795204401016235, "rewards/rejected": -3.4286911487579346, "step": 2042 }, { "epoch": 0.45, "learning_rate": 9.025825248620332e-06, "logits/chosen": -1.3157191276550293, "logits/rejected": -1.2452666759490967, "logps/chosen": -189.951171875, "logps/rejected": -195.32936096191406, "loss": 0.2778, "rewards/accuracies": 1.0, "rewards/chosen": -1.0795379877090454, "rewards/margins": 0.36401212215423584, "rewards/rejected": -1.4435501098632812, "step": 2043 }, { "epoch": 0.45, "learning_rate": 9.024762048401775e-06, "logits/chosen": -1.1786046028137207, "logits/rejected": -1.130220890045166, "logps/chosen": -130.38919067382812, "logps/rejected": -194.8880615234375, "loss": 0.1394, "rewards/accuracies": 1.0, "rewards/chosen": -1.0671600103378296, "rewards/margins": 1.175521969795227, "rewards/rejected": -2.2426819801330566, "step": 2044 }, { "epoch": 0.45, "learning_rate": 9.023698331010966e-06, "logits/chosen": -0.9090220928192139, "logits/rejected": -0.8971254825592041, "logps/chosen": -77.05230712890625, "logps/rejected": -132.3663330078125, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/chosen": -1.6038963794708252, "rewards/margins": 3.0669729709625244, "rewards/rejected": -4.67086935043335, "step": 2045 }, { "epoch": 0.45, "learning_rate": 9.022634096584597e-06, "logits/chosen": -0.841774046421051, "logits/rejected": -0.7541810870170593, "logps/chosen": -213.39427185058594, "logps/rejected": -302.51708984375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.9975234866142273, "rewards/margins": 6.196964740753174, "rewards/rejected": -5.199441432952881, "step": 2046 }, { "epoch": 0.45, "learning_rate": 9.021569345259415e-06, "logits/chosen": -0.8770995140075684, "logits/rejected": -0.9085297584533691, "logps/chosen": -211.78546142578125, "logps/rejected": -185.9321746826172, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -1.1215041875839233, "rewards/margins": 3.324380397796631, "rewards/rejected": -4.445884704589844, "step": 2047 }, { "epoch": 0.45, "learning_rate": 9.02050407717224e-06, "logits/chosen": -1.1610009670257568, "logits/rejected": -1.0652159452438354, "logps/chosen": -101.28975677490234, "logps/rejected": -255.28469848632812, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.7619835138320923, "rewards/margins": 5.130303382873535, "rewards/rejected": -5.892286777496338, "step": 2048 }, { "epoch": 0.45, "learning_rate": 9.019438292459958e-06, "logits/chosen": -0.9119619131088257, "logits/rejected": -0.6438639760017395, "logps/chosen": -196.48211669921875, "logps/rejected": -668.4659423828125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 0.6229171752929688, "rewards/margins": 39.175662994384766, "rewards/rejected": -38.5527458190918, "step": 2049 }, { "epoch": 0.45, "learning_rate": 9.018371991259516e-06, "logits/chosen": -0.7828070521354675, "logits/rejected": -0.740776777267456, "logps/chosen": -220.9263153076172, "logps/rejected": -108.23841094970703, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -0.8906753659248352, "rewards/margins": 4.235995292663574, "rewards/rejected": -5.126670837402344, "step": 2050 }, { "epoch": 0.45, "learning_rate": 9.017305173707932e-06, "logits/chosen": -1.0665955543518066, "logits/rejected": -1.0568559169769287, "logps/chosen": -166.9987335205078, "logps/rejected": -222.0430145263672, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.0010574341285973787, "rewards/margins": 5.728938579559326, "rewards/rejected": -5.727880954742432, "step": 2051 }, { "epoch": 0.45, "learning_rate": 9.016237839942294e-06, "logits/chosen": -0.9722959995269775, "logits/rejected": -0.9440324902534485, "logps/chosen": -149.98257446289062, "logps/rejected": -95.47884368896484, "loss": 0.0416, "rewards/accuracies": 1.0, "rewards/chosen": -0.2858474850654602, "rewards/margins": 2.520102024078369, "rewards/rejected": -2.8059494495391846, "step": 2052 }, { "epoch": 0.45, "learning_rate": 9.015169990099746e-06, "logits/chosen": -1.0604465007781982, "logits/rejected": -1.0382699966430664, "logps/chosen": -90.81729125976562, "logps/rejected": -186.12356567382812, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": -0.32271119952201843, "rewards/margins": 3.0099289417266846, "rewards/rejected": -3.3326401710510254, "step": 2053 }, { "epoch": 0.45, "learning_rate": 9.014101624317506e-06, "logits/chosen": -1.1539065837860107, "logits/rejected": -1.0325747728347778, "logps/chosen": -156.58372497558594, "logps/rejected": -107.41741943359375, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": 1.3910843133926392, "rewards/margins": 7.086358070373535, "rewards/rejected": -5.6952738761901855, "step": 2054 }, { "epoch": 0.45, "learning_rate": 9.013032742732858e-06, "logits/chosen": -0.7610188722610474, "logits/rejected": -0.8833523392677307, "logps/chosen": -166.85574340820312, "logps/rejected": -58.222007751464844, "loss": 0.0443, "rewards/accuracies": 1.0, "rewards/chosen": -0.9771392941474915, "rewards/margins": 2.8363099098205566, "rewards/rejected": -3.8134491443634033, "step": 2055 }, { "epoch": 0.46, "learning_rate": 9.01196334548315e-06, "logits/chosen": -1.551841378211975, "logits/rejected": -1.536613941192627, "logps/chosen": -98.55741882324219, "logps/rejected": -192.66453552246094, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.811000108718872, "rewards/margins": 5.216864585876465, "rewards/rejected": -7.027864933013916, "step": 2056 }, { "epoch": 0.46, "learning_rate": 9.010893432705796e-06, "logits/chosen": -0.7957904934883118, "logits/rejected": -0.7397662997245789, "logps/chosen": -194.2945556640625, "logps/rejected": -108.95762634277344, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 1.268438696861267, "rewards/margins": 7.608991622924805, "rewards/rejected": -6.340552806854248, "step": 2057 }, { "epoch": 0.46, "learning_rate": 9.009823004538278e-06, "logits/chosen": -1.2515592575073242, "logits/rejected": -1.2515592575073242, "logps/chosen": -185.81106567382812, "logps/rejected": -185.81106567382812, "loss": 0.3996, "rewards/accuracies": 0.0, "rewards/chosen": -2.1055314540863037, "rewards/margins": 0.0, "rewards/rejected": -2.1055314540863037, "step": 2058 }, { "epoch": 0.46, "learning_rate": 9.008752061118143e-06, "logits/chosen": -1.2392258644104004, "logits/rejected": -1.1520720720291138, "logps/chosen": -87.53995513916016, "logps/rejected": -231.34654235839844, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.10946197807788849, "rewards/margins": 5.805774211883545, "rewards/rejected": -5.915235996246338, "step": 2059 }, { "epoch": 0.46, "learning_rate": 9.007680602583005e-06, "logits/chosen": -1.2208359241485596, "logits/rejected": -1.1879308223724365, "logps/chosen": -80.90046691894531, "logps/rejected": -138.61178588867188, "loss": 0.155, "rewards/accuracies": 1.0, "rewards/chosen": -1.3118187189102173, "rewards/margins": 1.0153793096542358, "rewards/rejected": -2.327198028564453, "step": 2060 }, { "epoch": 0.46, "learning_rate": 9.006608629070543e-06, "logits/chosen": -1.045183777809143, "logits/rejected": -1.0133458375930786, "logps/chosen": -130.50930786132812, "logps/rejected": -149.5067138671875, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -3.0676567554473877, "rewards/margins": 3.7422120571136475, "rewards/rejected": -6.809868812561035, "step": 2061 }, { "epoch": 0.46, "learning_rate": 9.005536140718506e-06, "logits/chosen": -0.8573155403137207, "logits/rejected": -0.8346744179725647, "logps/chosen": -156.36390686035156, "logps/rejected": -134.411376953125, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": 0.9757080078125, "rewards/margins": 5.7248125076293945, "rewards/rejected": -4.7491044998168945, "step": 2062 }, { "epoch": 0.46, "learning_rate": 9.004463137664701e-06, "logits/chosen": -1.1105507612228394, "logits/rejected": -1.0636656284332275, "logps/chosen": -157.94744873046875, "logps/rejected": -200.21705627441406, "loss": 0.872, "rewards/accuracies": 0.0, "rewards/chosen": -0.5894256830215454, "rewards/margins": -1.5161758661270142, "rewards/rejected": 0.9267501831054688, "step": 2063 }, { "epoch": 0.46, "learning_rate": 9.003389620047012e-06, "logits/chosen": -0.8844681978225708, "logits/rejected": -0.8255605697631836, "logps/chosen": -94.49626159667969, "logps/rejected": -279.43133544921875, "loss": 1.4294, "rewards/accuracies": 0.0, "rewards/chosen": -2.294442892074585, "rewards/margins": -2.79758620262146, "rewards/rejected": 0.503143310546875, "step": 2064 }, { "epoch": 0.46, "learning_rate": 9.002315588003378e-06, "logits/chosen": -1.0739130973815918, "logits/rejected": -1.140106439590454, "logps/chosen": -141.144287109375, "logps/rejected": -120.14440155029297, "loss": 0.0989, "rewards/accuracies": 1.0, "rewards/chosen": -2.436274766921997, "rewards/margins": 2.2825167179107666, "rewards/rejected": -4.718791484832764, "step": 2065 }, { "epoch": 0.46, "learning_rate": 9.001241041671814e-06, "logits/chosen": -1.002840518951416, "logits/rejected": -0.90977543592453, "logps/chosen": -70.72541046142578, "logps/rejected": -159.02191162109375, "loss": 1.395, "rewards/accuracies": 1.0, "rewards/chosen": 0.10390549153089523, "rewards/margins": 3.0424017906188965, "rewards/rejected": -2.9384963512420654, "step": 2066 }, { "epoch": 0.46, "learning_rate": 9.000165981190396e-06, "logits/chosen": -0.91978520154953, "logits/rejected": -0.9050698280334473, "logps/chosen": -82.54391479492188, "logps/rejected": -105.05279541015625, "loss": 0.2528, "rewards/accuracies": 1.0, "rewards/chosen": -2.8396763801574707, "rewards/margins": 0.418834924697876, "rewards/rejected": -3.2585113048553467, "step": 2067 }, { "epoch": 0.46, "learning_rate": 8.999090406697263e-06, "logits/chosen": -1.1376227140426636, "logits/rejected": -1.1669542789459229, "logps/chosen": -153.30722045898438, "logps/rejected": -162.05288696289062, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -0.7830520868301392, "rewards/margins": 3.926727294921875, "rewards/rejected": -4.709779262542725, "step": 2068 }, { "epoch": 0.46, "learning_rate": 8.998014318330627e-06, "logits/chosen": -1.1420297622680664, "logits/rejected": -1.1531935930252075, "logps/chosen": -170.68490600585938, "logps/rejected": -126.90559387207031, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": -0.8506225943565369, "rewards/margins": 2.9704055786132812, "rewards/rejected": -3.821028232574463, "step": 2069 }, { "epoch": 0.46, "learning_rate": 8.996937716228763e-06, "logits/chosen": -1.2026163339614868, "logits/rejected": -1.2856882810592651, "logps/chosen": -129.84744262695312, "logps/rejected": -53.513511657714844, "loss": 0.3629, "rewards/accuracies": 1.0, "rewards/chosen": -0.8183525204658508, "rewards/margins": 1.606523036956787, "rewards/rejected": -2.424875497817993, "step": 2070 }, { "epoch": 0.46, "learning_rate": 8.99586060053001e-06, "logits/chosen": -1.001939296722412, "logits/rejected": -0.955126166343689, "logps/chosen": -90.24380493164062, "logps/rejected": -108.3024673461914, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.00787200964987278, "rewards/margins": 5.146270275115967, "rewards/rejected": -5.154142379760742, "step": 2071 }, { "epoch": 0.46, "learning_rate": 8.994782971372776e-06, "logits/chosen": -1.0842105150222778, "logits/rejected": -1.0493948459625244, "logps/chosen": -142.673828125, "logps/rejected": -166.8997802734375, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -0.15997925400733948, "rewards/margins": 3.2453720569610596, "rewards/rejected": -3.405351400375366, "step": 2072 }, { "epoch": 0.46, "learning_rate": 8.993704828895533e-06, "logits/chosen": -0.860568642616272, "logits/rejected": -0.8597739934921265, "logps/chosen": -217.0393829345703, "logps/rejected": -237.7543182373047, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 1.2199249267578125, "rewards/margins": 6.923506259918213, "rewards/rejected": -5.7035813331604, "step": 2073 }, { "epoch": 0.46, "learning_rate": 8.99262617323682e-06, "logits/chosen": -1.1508219242095947, "logits/rejected": -1.1508219242095947, "logps/chosen": -103.60436248779297, "logps/rejected": -103.60436248779297, "loss": 0.3509, "rewards/accuracies": 0.0, "rewards/chosen": -4.53676700592041, "rewards/margins": 0.0, "rewards/rejected": -4.53676700592041, "step": 2074 }, { "epoch": 0.46, "learning_rate": 8.991547004535244e-06, "logits/chosen": -1.1650969982147217, "logits/rejected": -1.1750595569610596, "logps/chosen": -182.09825134277344, "logps/rejected": -113.99063110351562, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": 0.9164687991142273, "rewards/margins": 4.014611721038818, "rewards/rejected": -3.0981431007385254, "step": 2075 }, { "epoch": 0.46, "learning_rate": 8.99046732292947e-06, "logits/chosen": -0.8318945169448853, "logits/rejected": -0.668241560459137, "logps/chosen": -101.47988891601562, "logps/rejected": -542.64794921875, "loss": 0.2951, "rewards/accuracies": 1.0, "rewards/chosen": 0.6940330862998962, "rewards/margins": 47.46323013305664, "rewards/rejected": -46.769195556640625, "step": 2076 }, { "epoch": 0.46, "learning_rate": 8.98938712855824e-06, "logits/chosen": -0.731457531452179, "logits/rejected": -0.7177640795707703, "logps/chosen": -135.01910400390625, "logps/rejected": -160.46510314941406, "loss": 0.2928, "rewards/accuracies": 1.0, "rewards/chosen": 0.24799957871437073, "rewards/margins": 1.6414978504180908, "rewards/rejected": -1.3934983015060425, "step": 2077 }, { "epoch": 0.46, "learning_rate": 8.988306421560354e-06, "logits/chosen": -0.6276485919952393, "logits/rejected": -0.6175953149795532, "logps/chosen": -99.971923828125, "logps/rejected": -104.39421081542969, "loss": 0.114, "rewards/accuracies": 1.0, "rewards/chosen": 0.07790680229663849, "rewards/margins": 1.4036362171173096, "rewards/rejected": -1.3257293701171875, "step": 2078 }, { "epoch": 0.46, "learning_rate": 8.98722520207468e-06, "logits/chosen": -0.9964985251426697, "logits/rejected": -0.8037267923355103, "logps/chosen": -90.0888442993164, "logps/rejected": -504.71295166015625, "loss": 0.0699, "rewards/accuracies": 1.0, "rewards/chosen": -0.3650924861431122, "rewards/margins": 29.398731231689453, "rewards/rejected": -29.763824462890625, "step": 2079 }, { "epoch": 0.46, "learning_rate": 8.986143470240152e-06, "logits/chosen": -0.9412833452224731, "logits/rejected": -0.979220986366272, "logps/chosen": -140.7870330810547, "logps/rejected": -126.33905029296875, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": 1.8681640625, "rewards/margins": 2.727710008621216, "rewards/rejected": -0.859545886516571, "step": 2080 }, { "epoch": 0.46, "learning_rate": 8.98506122619577e-06, "logits/chosen": -0.9502788186073303, "logits/rejected": -0.9474412202835083, "logps/chosen": -181.75079345703125, "logps/rejected": -149.1313018798828, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -0.40913698077201843, "rewards/margins": 3.107598066329956, "rewards/rejected": -3.516735076904297, "step": 2081 }, { "epoch": 0.46, "learning_rate": 8.983978470080603e-06, "logits/chosen": -0.8105007410049438, "logits/rejected": -0.7987738251686096, "logps/chosen": -97.4899673461914, "logps/rejected": -91.40428924560547, "loss": 0.1453, "rewards/accuracies": 1.0, "rewards/chosen": -0.3952781856060028, "rewards/margins": 1.0868247747421265, "rewards/rejected": -1.4821029901504517, "step": 2082 }, { "epoch": 0.46, "learning_rate": 8.982895202033776e-06, "logits/chosen": -1.2730751037597656, "logits/rejected": -1.2380166053771973, "logps/chosen": -119.97298431396484, "logps/rejected": -222.0356903076172, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.7756264209747314, "rewards/margins": 4.928781509399414, "rewards/rejected": -6.704408168792725, "step": 2083 }, { "epoch": 0.46, "learning_rate": 8.981811422194493e-06, "logits/chosen": -0.9849050641059875, "logits/rejected": -0.9896820187568665, "logps/chosen": -197.7495880126953, "logps/rejected": -262.45941162109375, "loss": 0.099, "rewards/accuracies": 1.0, "rewards/chosen": 1.292364478111267, "rewards/margins": 5.199232578277588, "rewards/rejected": -3.9068679809570312, "step": 2084 }, { "epoch": 0.46, "learning_rate": 8.980727130702014e-06, "logits/chosen": -0.722287118434906, "logits/rejected": -0.7345907688140869, "logps/chosen": -130.75457763671875, "logps/rejected": -186.56326293945312, "loss": 0.0427, "rewards/accuracies": 1.0, "rewards/chosen": -4.051307678222656, "rewards/margins": 2.993168830871582, "rewards/rejected": -7.044476509094238, "step": 2085 }, { "epoch": 0.46, "learning_rate": 8.979642327695668e-06, "logits/chosen": -0.9934749007225037, "logits/rejected": -0.9791606068611145, "logps/chosen": -121.82112884521484, "logps/rejected": -55.54396438598633, "loss": 0.3032, "rewards/accuracies": 1.0, "rewards/chosen": 0.7592216730117798, "rewards/margins": 3.0933656692504883, "rewards/rejected": -2.334143877029419, "step": 2086 }, { "epoch": 0.46, "learning_rate": 8.978557013314848e-06, "logits/chosen": -1.1611214876174927, "logits/rejected": -1.2820394039154053, "logps/chosen": -187.6022186279297, "logps/rejected": -112.278564453125, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": 0.4782058894634247, "rewards/margins": 2.5818161964416504, "rewards/rejected": -2.1036102771759033, "step": 2087 }, { "epoch": 0.46, "learning_rate": 8.977471187699019e-06, "logits/chosen": -1.1523866653442383, "logits/rejected": -1.1563652753829956, "logps/chosen": -233.98916625976562, "logps/rejected": -183.8070831298828, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -1.84600830078125, "rewards/margins": 3.490718364715576, "rewards/rejected": -5.336726665496826, "step": 2088 }, { "epoch": 0.46, "learning_rate": 8.976384850987702e-06, "logits/chosen": -0.9185315370559692, "logits/rejected": -0.9185315370559692, "logps/chosen": -135.88514709472656, "logps/rejected": -135.88514709472656, "loss": 0.9489, "rewards/accuracies": 0.0, "rewards/chosen": -1.1401726007461548, "rewards/margins": 0.0, "rewards/rejected": -1.1401726007461548, "step": 2089 }, { "epoch": 0.46, "learning_rate": 8.97529800332049e-06, "logits/chosen": -1.1124143600463867, "logits/rejected": -1.1066585779190063, "logps/chosen": -110.04826354980469, "logps/rejected": -68.4715347290039, "loss": 0.3131, "rewards/accuracies": 1.0, "rewards/chosen": -2.5875747203826904, "rewards/margins": 0.6416447162628174, "rewards/rejected": -3.229219436645508, "step": 2090 }, { "epoch": 0.46, "learning_rate": 8.974210644837042e-06, "logits/chosen": -0.6822049617767334, "logits/rejected": -0.6794348359107971, "logps/chosen": -112.93052673339844, "logps/rejected": -97.2959213256836, "loss": 0.2575, "rewards/accuracies": 1.0, "rewards/chosen": -2.8187217712402344, "rewards/margins": 0.4217255115509033, "rewards/rejected": -3.2404472827911377, "step": 2091 }, { "epoch": 0.46, "learning_rate": 8.973122775677078e-06, "logits/chosen": -1.1212526559829712, "logits/rejected": -1.0845047235488892, "logps/chosen": -117.62493896484375, "logps/rejected": -144.45314025878906, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": -2.3239593505859375, "rewards/margins": 2.149013042449951, "rewards/rejected": -4.472972393035889, "step": 2092 }, { "epoch": 0.46, "learning_rate": 8.97203439598039e-06, "logits/chosen": -1.1664012670516968, "logits/rejected": -1.1467951536178589, "logps/chosen": -134.85757446289062, "logps/rejected": -224.22055053710938, "loss": 0.1048, "rewards/accuracies": 1.0, "rewards/chosen": -4.612466335296631, "rewards/margins": 2.7784957885742188, "rewards/rejected": -7.39096212387085, "step": 2093 }, { "epoch": 0.46, "learning_rate": 8.970945505886832e-06, "logits/chosen": -0.8604951500892639, "logits/rejected": -0.8497684001922607, "logps/chosen": -125.57434844970703, "logps/rejected": -55.20222473144531, "loss": 0.2862, "rewards/accuracies": 1.0, "rewards/chosen": -1.6504677534103394, "rewards/margins": 0.3093017339706421, "rewards/rejected": -1.9597694873809814, "step": 2094 }, { "epoch": 0.46, "learning_rate": 8.96985610553632e-06, "logits/chosen": -1.3291265964508057, "logits/rejected": -1.33087158203125, "logps/chosen": -126.45965576171875, "logps/rejected": -168.05203247070312, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": 0.7143142819404602, "rewards/margins": 4.041453838348389, "rewards/rejected": -3.327139377593994, "step": 2095 }, { "epoch": 0.46, "learning_rate": 8.968766195068845e-06, "logits/chosen": -0.9824324250221252, "logits/rejected": -0.8959338068962097, "logps/chosen": -189.10560607910156, "logps/rejected": -185.8485565185547, "loss": 0.4149, "rewards/accuracies": 1.0, "rewards/chosen": -0.7325912714004517, "rewards/margins": 5.294589519500732, "rewards/rejected": -6.0271806716918945, "step": 2096 }, { "epoch": 0.46, "learning_rate": 8.967675774624451e-06, "logits/chosen": -1.0347943305969238, "logits/rejected": -0.9503641128540039, "logps/chosen": -214.52212524414062, "logps/rejected": -152.38119506835938, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.4600571393966675, "rewards/margins": 6.4949493408203125, "rewards/rejected": -7.9550065994262695, "step": 2097 }, { "epoch": 0.46, "learning_rate": 8.96658484434326e-06, "logits/chosen": -1.0774692296981812, "logits/rejected": -1.0683352947235107, "logps/chosen": -108.68103790283203, "logps/rejected": -87.3967056274414, "loss": 0.3502, "rewards/accuracies": 1.0, "rewards/chosen": 0.5788177847862244, "rewards/margins": 0.8067688345909119, "rewards/rejected": -0.2279510498046875, "step": 2098 }, { "epoch": 0.46, "learning_rate": 8.96549340436545e-06, "logits/chosen": -0.9015557765960693, "logits/rejected": -0.9214261770248413, "logps/chosen": -187.1778106689453, "logps/rejected": -199.86900329589844, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 1.8669906854629517, "rewards/margins": 8.113189697265625, "rewards/rejected": -6.246199131011963, "step": 2099 }, { "epoch": 0.46, "learning_rate": 8.964401454831273e-06, "logits/chosen": -0.8587803244590759, "logits/rejected": -0.7798009514808655, "logps/chosen": -105.43807220458984, "logps/rejected": -55.62477493286133, "loss": 0.4014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5653068423271179, "rewards/margins": 2.1556525230407715, "rewards/rejected": -2.720959424972534, "step": 2100 }, { "epoch": 0.47, "learning_rate": 8.963308995881037e-06, "logits/chosen": -1.423480749130249, "logits/rejected": -1.387420892715454, "logps/chosen": -80.34294891357422, "logps/rejected": -150.54217529296875, "loss": 0.1976, "rewards/accuracies": 1.0, "rewards/chosen": -0.16008834540843964, "rewards/margins": 0.7646644711494446, "rewards/rejected": -0.9247528314590454, "step": 2101 }, { "epoch": 0.47, "learning_rate": 8.962216027655123e-06, "logits/chosen": -0.8360453844070435, "logits/rejected": -0.7837570905685425, "logps/chosen": -204.73550415039062, "logps/rejected": -213.1981964111328, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.709545910358429, "rewards/margins": 8.52367115020752, "rewards/rejected": -7.814125061035156, "step": 2102 }, { "epoch": 0.47, "learning_rate": 8.961122550293975e-06, "logits/chosen": -0.9853562116622925, "logits/rejected": -0.9744197130203247, "logps/chosen": -68.322998046875, "logps/rejected": -150.180419921875, "loss": 0.1369, "rewards/accuracies": 1.0, "rewards/chosen": -0.2506042420864105, "rewards/margins": 4.853922367095947, "rewards/rejected": -5.104526519775391, "step": 2103 }, { "epoch": 0.47, "learning_rate": 8.960028563938101e-06, "logits/chosen": -0.5633057355880737, "logits/rejected": -0.5855814218521118, "logps/chosen": -184.74549865722656, "logps/rejected": -202.376708984375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.859844923019409, "rewards/margins": 6.718082427978516, "rewards/rejected": -10.577927589416504, "step": 2104 }, { "epoch": 0.47, "learning_rate": 8.958934068728078e-06, "logits/chosen": -0.7329879403114319, "logits/rejected": -0.7247428894042969, "logps/chosen": -150.2084503173828, "logps/rejected": -129.41949462890625, "loss": 0.1913, "rewards/accuracies": 1.0, "rewards/chosen": -1.388861060142517, "rewards/margins": 0.796094536781311, "rewards/rejected": -2.184955596923828, "step": 2105 }, { "epoch": 0.47, "learning_rate": 8.957839064804542e-06, "logits/chosen": -1.0963321924209595, "logits/rejected": -1.0555962324142456, "logps/chosen": -133.6495361328125, "logps/rejected": -147.40945434570312, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": -1.141030192375183, "rewards/margins": 3.1361069679260254, "rewards/rejected": -4.277137279510498, "step": 2106 }, { "epoch": 0.47, "learning_rate": 8.9567435523082e-06, "logits/chosen": -0.8977435827255249, "logits/rejected": -0.8434890508651733, "logps/chosen": -80.4749526977539, "logps/rejected": -144.99618530273438, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/chosen": 0.20868682861328125, "rewards/margins": 3.405487060546875, "rewards/rejected": -3.1968002319335938, "step": 2107 }, { "epoch": 0.47, "learning_rate": 8.955647531379826e-06, "logits/chosen": -0.7779518365859985, "logits/rejected": -0.7299814820289612, "logps/chosen": -176.22926330566406, "logps/rejected": -178.47219848632812, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": 0.3651870787143707, "rewards/margins": 11.260910987854004, "rewards/rejected": -10.895724296569824, "step": 2108 }, { "epoch": 0.47, "learning_rate": 8.954551002160252e-06, "logits/chosen": -0.8317819237709045, "logits/rejected": -0.7998641729354858, "logps/chosen": -100.06735229492188, "logps/rejected": -256.7652587890625, "loss": 0.1003, "rewards/accuracies": 1.0, "rewards/chosen": -1.335370659828186, "rewards/margins": 4.062896251678467, "rewards/rejected": -5.398266792297363, "step": 2109 }, { "epoch": 0.47, "learning_rate": 8.95345396479038e-06, "logits/chosen": -1.159440517425537, "logits/rejected": -1.134319543838501, "logps/chosen": -137.19068908691406, "logps/rejected": -145.8187255859375, "loss": 0.8916, "rewards/accuracies": 0.0, "rewards/chosen": -3.822953939437866, "rewards/margins": -0.46852874755859375, "rewards/rejected": -3.3544251918792725, "step": 2110 }, { "epoch": 0.47, "learning_rate": 8.952356419411177e-06, "logits/chosen": -1.2488616704940796, "logits/rejected": -1.171030879020691, "logps/chosen": -169.94602966308594, "logps/rejected": -201.3761444091797, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.04079589992761612, "rewards/margins": 4.232579231262207, "rewards/rejected": -4.273375034332275, "step": 2111 }, { "epoch": 0.47, "learning_rate": 8.951258366163677e-06, "logits/chosen": -0.7498711943626404, "logits/rejected": -0.8491958379745483, "logps/chosen": -261.642333984375, "logps/rejected": -202.60745239257812, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -5.003155708312988, "rewards/margins": 2.982191562652588, "rewards/rejected": -7.985347270965576, "step": 2112 }, { "epoch": 0.47, "learning_rate": 8.950159805188973e-06, "logits/chosen": -1.233486533164978, "logits/rejected": -1.175918459892273, "logps/chosen": -75.01142883300781, "logps/rejected": -216.66778564453125, "loss": 0.2319, "rewards/accuracies": 1.0, "rewards/chosen": 0.09045105427503586, "rewards/margins": 2.1302733421325684, "rewards/rejected": -2.0398223400115967, "step": 2113 }, { "epoch": 0.47, "learning_rate": 8.949060736628233e-06, "logits/chosen": -1.022503137588501, "logits/rejected": -0.9815950989723206, "logps/chosen": -166.59210205078125, "logps/rejected": -254.8855438232422, "loss": 0.2068, "rewards/accuracies": 1.0, "rewards/chosen": 0.44834595918655396, "rewards/margins": 2.909580945968628, "rewards/rejected": -2.4612350463867188, "step": 2114 }, { "epoch": 0.47, "learning_rate": 8.94796116062268e-06, "logits/chosen": -0.9245583415031433, "logits/rejected": -0.7682022452354431, "logps/chosen": -209.8687744140625, "logps/rejected": -418.6566162109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.04579315334558487, "rewards/margins": 11.024019241333008, "rewards/rejected": -10.978225708007812, "step": 2115 }, { "epoch": 0.47, "learning_rate": 8.946861077313609e-06, "logits/chosen": -1.330664873123169, "logits/rejected": -1.3327183723449707, "logps/chosen": -66.62370300292969, "logps/rejected": -70.06076049804688, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": 0.4167488217353821, "rewards/margins": 3.806535243988037, "rewards/rejected": -3.3897864818573, "step": 2116 }, { "epoch": 0.47, "learning_rate": 8.945760486842377e-06, "logits/chosen": -1.149810791015625, "logits/rejected": -1.2716988325119019, "logps/chosen": -234.75294494628906, "logps/rejected": -194.31883239746094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.4768083095550537, "rewards/margins": 9.216011047363281, "rewards/rejected": -6.739202976226807, "step": 2117 }, { "epoch": 0.47, "learning_rate": 8.944659389350409e-06, "logits/chosen": -0.8017183542251587, "logits/rejected": -0.7506632804870605, "logps/chosen": -142.76719665527344, "logps/rejected": -209.66702270507812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.3424423336982727, "rewards/margins": 8.541851997375488, "rewards/rejected": -8.199409484863281, "step": 2118 }, { "epoch": 0.47, "learning_rate": 8.94355778497919e-06, "logits/chosen": -0.9846374988555908, "logits/rejected": -0.9644429683685303, "logps/chosen": -83.51703643798828, "logps/rejected": -119.8354263305664, "loss": 0.182, "rewards/accuracies": 1.0, "rewards/chosen": -0.4929389953613281, "rewards/margins": 0.8367652893066406, "rewards/rejected": -1.3297042846679688, "step": 2119 }, { "epoch": 0.47, "learning_rate": 8.942455673870278e-06, "logits/chosen": -0.9393030405044556, "logits/rejected": -0.9371263384819031, "logps/chosen": -198.55685424804688, "logps/rejected": -250.39678955078125, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": 0.9347381591796875, "rewards/margins": 3.5142593383789062, "rewards/rejected": -2.5795211791992188, "step": 2120 }, { "epoch": 0.47, "learning_rate": 8.941353056165288e-06, "logits/chosen": -1.2898615598678589, "logits/rejected": -1.3613568544387817, "logps/chosen": -167.7784423828125, "logps/rejected": -109.20490264892578, "loss": 0.0912, "rewards/accuracies": 1.0, "rewards/chosen": -0.8598389029502869, "rewards/margins": 1.6162497997283936, "rewards/rejected": -2.476088762283325, "step": 2121 }, { "epoch": 0.47, "learning_rate": 8.940249932005904e-06, "logits/chosen": -0.96296626329422, "logits/rejected": -0.8670703172683716, "logps/chosen": -104.648681640625, "logps/rejected": -187.052001953125, "loss": 0.2175, "rewards/accuracies": 1.0, "rewards/chosen": 0.7049247622489929, "rewards/margins": 6.689785480499268, "rewards/rejected": -5.984860897064209, "step": 2122 }, { "epoch": 0.47, "learning_rate": 8.939146301533878e-06, "logits/chosen": -1.334813117980957, "logits/rejected": -1.3216947317123413, "logps/chosen": -221.64205932617188, "logps/rejected": -152.79864501953125, "loss": 0.0616, "rewards/accuracies": 1.0, "rewards/chosen": 0.8789993524551392, "rewards/margins": 4.7171311378479, "rewards/rejected": -3.8381316661834717, "step": 2123 }, { "epoch": 0.47, "learning_rate": 8.938042164891021e-06, "logits/chosen": -1.0640082359313965, "logits/rejected": -1.0389933586120605, "logps/chosen": -80.0552978515625, "logps/rejected": -46.48141098022461, "loss": 0.1341, "rewards/accuracies": 1.0, "rewards/chosen": -0.21493148803710938, "rewards/margins": 1.8311975002288818, "rewards/rejected": -2.046128988265991, "step": 2124 }, { "epoch": 0.47, "learning_rate": 8.936937522219212e-06, "logits/chosen": -0.8448818922042847, "logits/rejected": -0.7803570032119751, "logps/chosen": -258.8945007324219, "logps/rejected": -210.42269897460938, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": 0.3969879150390625, "rewards/margins": 2.618978977203369, "rewards/rejected": -2.2219910621643066, "step": 2125 }, { "epoch": 0.47, "learning_rate": 8.935832373660397e-06, "logits/chosen": -0.9289106130599976, "logits/rejected": -0.9195422530174255, "logps/chosen": -161.16867065429688, "logps/rejected": -144.4209747314453, "loss": 0.0964, "rewards/accuracies": 1.0, "rewards/chosen": -3.892082929611206, "rewards/margins": 1.5804803371429443, "rewards/rejected": -5.47256326675415, "step": 2126 }, { "epoch": 0.47, "learning_rate": 8.934726719356582e-06, "logits/chosen": -1.2894198894500732, "logits/rejected": -1.264469027519226, "logps/chosen": -84.67745208740234, "logps/rejected": -119.92745971679688, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -0.39734575152397156, "rewards/margins": 3.1350600719451904, "rewards/rejected": -3.5324058532714844, "step": 2127 }, { "epoch": 0.47, "learning_rate": 8.933620559449842e-06, "logits/chosen": -0.9653410315513611, "logits/rejected": -0.9096702337265015, "logps/chosen": -123.23053741455078, "logps/rejected": -182.391357421875, "loss": 0.3797, "rewards/accuracies": 0.0, "rewards/chosen": -0.5367546081542969, "rewards/margins": -0.12845230102539062, "rewards/rejected": -0.40830230712890625, "step": 2128 }, { "epoch": 0.47, "learning_rate": 8.932513894082317e-06, "logits/chosen": -1.0596237182617188, "logits/rejected": -1.034450650215149, "logps/chosen": -74.86273193359375, "logps/rejected": -86.14510345458984, "loss": 0.213, "rewards/accuracies": 1.0, "rewards/chosen": 0.7206474542617798, "rewards/margins": 0.6653847098350525, "rewards/rejected": 0.05526275560259819, "step": 2129 }, { "epoch": 0.47, "learning_rate": 8.93140672339621e-06, "logits/chosen": -0.7634245753288269, "logits/rejected": -0.7578443288803101, "logps/chosen": -278.25799560546875, "logps/rejected": -198.95916748046875, "loss": 0.402, "rewards/accuracies": 0.0, "rewards/chosen": -1.4004547595977783, "rewards/margins": -0.21076667308807373, "rewards/rejected": -1.1896880865097046, "step": 2130 }, { "epoch": 0.47, "learning_rate": 8.930299047533792e-06, "logits/chosen": -1.1495234966278076, "logits/rejected": -1.0318032503128052, "logps/chosen": -104.67762756347656, "logps/rejected": -225.8932342529297, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": 0.3192299008369446, "rewards/margins": 10.189955711364746, "rewards/rejected": -9.870725631713867, "step": 2131 }, { "epoch": 0.47, "learning_rate": 8.929190866637391e-06, "logits/chosen": -1.2468278408050537, "logits/rejected": -1.226653814315796, "logps/chosen": -174.65701293945312, "logps/rejected": -166.2489776611328, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": 0.8461197018623352, "rewards/margins": 3.6990509033203125, "rewards/rejected": -2.852931261062622, "step": 2132 }, { "epoch": 0.47, "learning_rate": 8.92808218084941e-06, "logits/chosen": -0.7582056522369385, "logits/rejected": -0.6954829692840576, "logps/chosen": -128.1397705078125, "logps/rejected": -42.33151626586914, "loss": 0.1621, "rewards/accuracies": 1.0, "rewards/chosen": -0.8561660647392273, "rewards/margins": 1.2570126056671143, "rewards/rejected": -2.1131787300109863, "step": 2133 }, { "epoch": 0.47, "learning_rate": 8.926972990312314e-06, "logits/chosen": -1.0118125677108765, "logits/rejected": -1.0175819396972656, "logps/chosen": -154.83761596679688, "logps/rejected": -151.17825317382812, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.06064453348517418, "rewards/margins": 4.753282070159912, "rewards/rejected": -4.813926696777344, "step": 2134 }, { "epoch": 0.47, "learning_rate": 8.925863295168628e-06, "logits/chosen": -1.0312350988388062, "logits/rejected": -0.9315603971481323, "logps/chosen": -136.09463500976562, "logps/rejected": -338.6152038574219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.5195891857147217, "rewards/margins": 13.300946235656738, "rewards/rejected": -10.781356811523438, "step": 2135 }, { "epoch": 0.47, "learning_rate": 8.924753095560945e-06, "logits/chosen": -1.0151430368423462, "logits/rejected": -0.9165984392166138, "logps/chosen": -169.44259643554688, "logps/rejected": -232.14630126953125, "loss": 0.1921, "rewards/accuracies": 1.0, "rewards/chosen": -1.1479614973068237, "rewards/margins": 10.605937957763672, "rewards/rejected": -11.753899574279785, "step": 2136 }, { "epoch": 0.47, "learning_rate": 8.923642391631924e-06, "logits/chosen": -1.0134015083312988, "logits/rejected": -0.9521070718765259, "logps/chosen": -110.19454956054688, "logps/rejected": -232.07772827148438, "loss": 0.2838, "rewards/accuracies": 1.0, "rewards/chosen": 0.3666885495185852, "rewards/margins": 0.3483841121196747, "rewards/rejected": 0.01830444298684597, "step": 2137 }, { "epoch": 0.47, "learning_rate": 8.922531183524287e-06, "logits/chosen": -0.9071298241615295, "logits/rejected": -0.9162128567695618, "logps/chosen": -79.39300537109375, "logps/rejected": -205.84103393554688, "loss": 0.0736, "rewards/accuracies": 1.0, "rewards/chosen": -0.20521850883960724, "rewards/margins": 2.835693359375, "rewards/rejected": -3.040911912918091, "step": 2138 }, { "epoch": 0.47, "learning_rate": 8.921419471380826e-06, "logits/chosen": -1.264082908630371, "logits/rejected": -1.2507152557373047, "logps/chosen": -196.48175048828125, "logps/rejected": -215.73526000976562, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": 0.8562576174736023, "rewards/margins": 3.1803207397460938, "rewards/rejected": -2.3240630626678467, "step": 2139 }, { "epoch": 0.47, "learning_rate": 8.920307255344386e-06, "logits/chosen": -0.9119386672973633, "logits/rejected": -0.8369240164756775, "logps/chosen": -181.6981201171875, "logps/rejected": -240.99330139160156, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": 0.23916320502758026, "rewards/margins": 10.470943450927734, "rewards/rejected": -10.231780052185059, "step": 2140 }, { "epoch": 0.47, "learning_rate": 8.91919453555789e-06, "logits/chosen": -1.1400538682937622, "logits/rejected": -1.178714394569397, "logps/chosen": -241.21087646484375, "logps/rejected": -133.82101440429688, "loss": 0.1039, "rewards/accuracies": 1.0, "rewards/chosen": 0.6678863763809204, "rewards/margins": 5.622932434082031, "rewards/rejected": -4.9550461769104, "step": 2141 }, { "epoch": 0.47, "learning_rate": 8.918081312164318e-06, "logits/chosen": -0.8417640328407288, "logits/rejected": -0.8660342693328857, "logps/chosen": -180.02334594726562, "logps/rejected": -204.1009063720703, "loss": 0.2881, "rewards/accuracies": 1.0, "rewards/chosen": -1.0301223993301392, "rewards/margins": 0.9302794933319092, "rewards/rejected": -1.9604018926620483, "step": 2142 }, { "epoch": 0.47, "learning_rate": 8.916967585306715e-06, "logits/chosen": -1.0474612712860107, "logits/rejected": -1.1113605499267578, "logps/chosen": -277.0791015625, "logps/rejected": -79.23468017578125, "loss": 2.1373, "rewards/accuracies": 0.0, "rewards/chosen": -5.498938083648682, "rewards/margins": -4.259941101074219, "rewards/rejected": -1.2389968633651733, "step": 2143 }, { "epoch": 0.47, "learning_rate": 8.915853355128192e-06, "logits/chosen": -1.0318949222564697, "logits/rejected": -0.9892625212669373, "logps/chosen": -100.84439086914062, "logps/rejected": -126.25604248046875, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": -0.9849746823310852, "rewards/margins": 4.810564994812012, "rewards/rejected": -5.795539855957031, "step": 2144 }, { "epoch": 0.47, "learning_rate": 8.91473862177193e-06, "logits/chosen": -1.1381014585494995, "logits/rejected": -1.1381014585494995, "logps/chosen": -98.76116180419922, "logps/rejected": -98.76116180419922, "loss": 0.3518, "rewards/accuracies": 0.0, "rewards/chosen": -3.9424312114715576, "rewards/margins": 0.0, "rewards/rejected": -3.9424312114715576, "step": 2145 }, { "epoch": 0.47, "learning_rate": 8.913623385381163e-06, "logits/chosen": -0.8154285550117493, "logits/rejected": -0.6558290719985962, "logps/chosen": -275.3293762207031, "logps/rejected": -408.2340087890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.545440673828125, "rewards/margins": 7.941986083984375, "rewards/rejected": -8.4874267578125, "step": 2146 }, { "epoch": 0.48, "learning_rate": 8.9125076460992e-06, "logits/chosen": -1.381145715713501, "logits/rejected": -1.3687313795089722, "logps/chosen": -89.1280288696289, "logps/rejected": -119.27166748046875, "loss": 0.0632, "rewards/accuracies": 1.0, "rewards/chosen": -2.1624557971954346, "rewards/margins": 2.3668320178985596, "rewards/rejected": -4.529287815093994, "step": 2147 }, { "epoch": 0.48, "learning_rate": 8.91139140406941e-06, "logits/chosen": -1.1781916618347168, "logits/rejected": -1.133953332901001, "logps/chosen": -192.7413330078125, "logps/rejected": -179.91354370117188, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.7039932012557983, "rewards/margins": 5.003939628601074, "rewards/rejected": -6.707932949066162, "step": 2148 }, { "epoch": 0.48, "learning_rate": 8.910274659435226e-06, "logits/chosen": -1.0156939029693604, "logits/rejected": -1.0279468297958374, "logps/chosen": -208.18421936035156, "logps/rejected": -208.5704345703125, "loss": 0.3425, "rewards/accuracies": 1.0, "rewards/chosen": -2.3055496215820312, "rewards/margins": 0.01662755012512207, "rewards/rejected": -2.3221771717071533, "step": 2149 }, { "epoch": 0.48, "learning_rate": 8.90915741234015e-06, "logits/chosen": -1.1640408039093018, "logits/rejected": -1.119812250137329, "logps/chosen": -214.968994140625, "logps/rejected": -276.56732177734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.407614141702652, "rewards/margins": 6.0331220626831055, "rewards/rejected": -6.440736293792725, "step": 2150 }, { "epoch": 0.48, "learning_rate": 8.908039662927743e-06, "logits/chosen": -0.8098293542861938, "logits/rejected": -0.8625627756118774, "logps/chosen": -277.1833190917969, "logps/rejected": -114.09008026123047, "loss": 0.1108, "rewards/accuracies": 1.0, "rewards/chosen": -2.4701340198516846, "rewards/margins": 1.4959053993225098, "rewards/rejected": -3.9660394191741943, "step": 2151 }, { "epoch": 0.48, "learning_rate": 8.906921411341634e-06, "logits/chosen": -0.8255837559700012, "logits/rejected": -0.826330304145813, "logps/chosen": -188.22048950195312, "logps/rejected": -341.9079895019531, "loss": 0.1936, "rewards/accuracies": 1.0, "rewards/chosen": -2.539541721343994, "rewards/margins": 2.86431884765625, "rewards/rejected": -5.403860569000244, "step": 2152 }, { "epoch": 0.48, "learning_rate": 8.905802657725516e-06, "logits/chosen": -0.8428394198417664, "logits/rejected": -0.8336132168769836, "logps/chosen": -182.9117431640625, "logps/rejected": -195.29669189453125, "loss": 0.1336, "rewards/accuracies": 1.0, "rewards/chosen": -6.6897783279418945, "rewards/margins": 1.193511962890625, "rewards/rejected": -7.8832902908325195, "step": 2153 }, { "epoch": 0.48, "learning_rate": 8.904683402223146e-06, "logits/chosen": -1.1717442274093628, "logits/rejected": -1.1439679861068726, "logps/chosen": -111.4507064819336, "logps/rejected": -127.14321899414062, "loss": 0.1432, "rewards/accuracies": 1.0, "rewards/chosen": 0.0042739869095385075, "rewards/margins": 1.2887588739395142, "rewards/rejected": -1.28448486328125, "step": 2154 }, { "epoch": 0.48, "learning_rate": 8.903563644978346e-06, "logits/chosen": -1.3443660736083984, "logits/rejected": -1.3169288635253906, "logps/chosen": -97.54007720947266, "logps/rejected": -92.22880554199219, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -1.4532051086425781, "rewards/margins": 3.505751609802246, "rewards/rejected": -4.958956718444824, "step": 2155 }, { "epoch": 0.48, "learning_rate": 8.902443386135e-06, "logits/chosen": -1.2502599954605103, "logits/rejected": -1.2277270555496216, "logps/chosen": -85.34532165527344, "logps/rejected": -129.7851104736328, "loss": 0.0934, "rewards/accuracies": 1.0, "rewards/chosen": 0.3264938294887543, "rewards/margins": 1.5826889276504517, "rewards/rejected": -1.256195068359375, "step": 2156 }, { "epoch": 0.48, "learning_rate": 8.90132262583706e-06, "logits/chosen": -1.1081792116165161, "logits/rejected": -1.072723150253296, "logps/chosen": -214.8120574951172, "logps/rejected": -194.05242919921875, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -1.548609972000122, "rewards/margins": 8.16248893737793, "rewards/rejected": -9.711098670959473, "step": 2157 }, { "epoch": 0.48, "learning_rate": 8.900201364228542e-06, "logits/chosen": -0.9157078862190247, "logits/rejected": -0.8767436742782593, "logps/chosen": -140.4714813232422, "logps/rejected": -136.1603546142578, "loss": 0.0906, "rewards/accuracies": 1.0, "rewards/chosen": -4.818662166595459, "rewards/margins": 1.618307113647461, "rewards/rejected": -6.43696928024292, "step": 2158 }, { "epoch": 0.48, "learning_rate": 8.899079601453524e-06, "logits/chosen": -1.2254652976989746, "logits/rejected": -1.1361501216888428, "logps/chosen": -98.33143615722656, "logps/rejected": -186.70550537109375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 1.3831192255020142, "rewards/margins": 8.720672607421875, "rewards/rejected": -7.33755350112915, "step": 2159 }, { "epoch": 0.48, "learning_rate": 8.897957337656151e-06, "logits/chosen": -1.0149015188217163, "logits/rejected": -1.0601332187652588, "logps/chosen": -167.36489868164062, "logps/rejected": -330.49932861328125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 1.0471848249435425, "rewards/margins": 19.517404556274414, "rewards/rejected": -18.4702205657959, "step": 2160 }, { "epoch": 0.48, "learning_rate": 8.89683457298063e-06, "logits/chosen": -1.2937959432601929, "logits/rejected": -1.2937959432601929, "logps/chosen": -220.84658813476562, "logps/rejected": -220.84658813476562, "loss": 0.3713, "rewards/accuracies": 0.0, "rewards/chosen": -10.34268856048584, "rewards/margins": 0.0, "rewards/rejected": -10.34268856048584, "step": 2161 }, { "epoch": 0.48, "learning_rate": 8.895711307571235e-06, "logits/chosen": -0.9930126070976257, "logits/rejected": -1.012817144393921, "logps/chosen": -259.3963317871094, "logps/rejected": -211.40057373046875, "loss": 2.5889, "rewards/accuracies": 1.0, "rewards/chosen": -3.0166778564453125, "rewards/margins": 1.3478364944458008, "rewards/rejected": -4.364514350891113, "step": 2162 }, { "epoch": 0.48, "learning_rate": 8.894587541572301e-06, "logits/chosen": -1.2522062063217163, "logits/rejected": -1.2057807445526123, "logps/chosen": -126.95123291015625, "logps/rejected": -202.98004150390625, "loss": 0.1705, "rewards/accuracies": 1.0, "rewards/chosen": -2.4779975414276123, "rewards/margins": 0.9021813869476318, "rewards/rejected": -3.380178928375244, "step": 2163 }, { "epoch": 0.48, "learning_rate": 8.89346327512823e-06, "logits/chosen": -1.2362703084945679, "logits/rejected": -1.2017521858215332, "logps/chosen": -125.71430969238281, "logps/rejected": -119.82444763183594, "loss": 0.2687, "rewards/accuracies": 1.0, "rewards/chosen": -2.3531646728515625, "rewards/margins": 0.4151465892791748, "rewards/rejected": -2.7683112621307373, "step": 2164 }, { "epoch": 0.48, "learning_rate": 8.89233850838349e-06, "logits/chosen": -1.037827491760254, "logits/rejected": -0.9848918914794922, "logps/chosen": -109.36312866210938, "logps/rejected": -192.21282958984375, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": 0.4871933162212372, "rewards/margins": 6.077049255371094, "rewards/rejected": -5.589856147766113, "step": 2165 }, { "epoch": 0.48, "learning_rate": 8.891213241482606e-06, "logits/chosen": -0.928023099899292, "logits/rejected": -0.8529560565948486, "logps/chosen": -249.1031494140625, "logps/rejected": -283.49566650390625, "loss": 1.1867, "rewards/accuracies": 1.0, "rewards/chosen": 0.32651063799858093, "rewards/margins": 5.100717067718506, "rewards/rejected": -4.774206638336182, "step": 2166 }, { "epoch": 0.48, "learning_rate": 8.890087474570174e-06, "logits/chosen": -0.9458035230636597, "logits/rejected": -0.9968706965446472, "logps/chosen": -209.00071716308594, "logps/rejected": -184.673583984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8672928214073181, "rewards/margins": 9.229642868041992, "rewards/rejected": -10.096935272216797, "step": 2167 }, { "epoch": 0.48, "learning_rate": 8.888961207790856e-06, "logits/chosen": -0.8729757070541382, "logits/rejected": -0.8595852255821228, "logps/chosen": -155.38710021972656, "logps/rejected": -119.06240844726562, "loss": 0.3562, "rewards/accuracies": 0.0, "rewards/chosen": -4.355908393859863, "rewards/margins": -0.037706851959228516, "rewards/rejected": -4.318201541900635, "step": 2168 }, { "epoch": 0.48, "learning_rate": 8.887834441289369e-06, "logits/chosen": -1.20747709274292, "logits/rejected": -1.1636377573013306, "logps/chosen": -122.02894592285156, "logps/rejected": -224.1312713623047, "loss": 0.2201, "rewards/accuracies": 1.0, "rewards/chosen": -3.3244340419769287, "rewards/margins": 0.5925140380859375, "rewards/rejected": -3.916948080062866, "step": 2169 }, { "epoch": 0.48, "learning_rate": 8.886707175210503e-06, "logits/chosen": -1.1015896797180176, "logits/rejected": -1.060428261756897, "logps/chosen": -230.98741149902344, "logps/rejected": -183.10958862304688, "loss": 1.9185, "rewards/accuracies": 1.0, "rewards/chosen": 2.609736680984497, "rewards/margins": 9.941232681274414, "rewards/rejected": -7.331495761871338, "step": 2170 }, { "epoch": 0.48, "learning_rate": 8.88557940969911e-06, "logits/chosen": -1.3073465824127197, "logits/rejected": -1.4133260250091553, "logps/chosen": -187.92190551757812, "logps/rejected": -62.33384323120117, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": -1.0902191400527954, "rewards/margins": 2.4949889183044434, "rewards/rejected": -3.5852081775665283, "step": 2171 }, { "epoch": 0.48, "learning_rate": 8.884451144900104e-06, "logits/chosen": -1.3155298233032227, "logits/rejected": -1.4559522867202759, "logps/chosen": -190.4709930419922, "logps/rejected": -85.99053955078125, "loss": 0.0627, "rewards/accuracies": 1.0, "rewards/chosen": 2.2059648036956787, "rewards/margins": 6.684541702270508, "rewards/rejected": -4.47857666015625, "step": 2172 }, { "epoch": 0.48, "learning_rate": 8.88332238095846e-06, "logits/chosen": -1.028999924659729, "logits/rejected": -1.0165350437164307, "logps/chosen": -129.47244262695312, "logps/rejected": -180.7825927734375, "loss": 0.143, "rewards/accuracies": 1.0, "rewards/chosen": -1.447657823562622, "rewards/margins": 1.1224656105041504, "rewards/rejected": -2.5701234340667725, "step": 2173 }, { "epoch": 0.48, "learning_rate": 8.882193118019229e-06, "logits/chosen": -0.8790521025657654, "logits/rejected": -0.8653423190116882, "logps/chosen": -172.05398559570312, "logps/rejected": -224.83238220214844, "loss": 0.8208, "rewards/accuracies": 0.0, "rewards/chosen": -4.352464199066162, "rewards/margins": -1.3584868907928467, "rewards/rejected": -2.9939773082733154, "step": 2174 }, { "epoch": 0.48, "learning_rate": 8.881063356227513e-06, "logits/chosen": -1.0625349283218384, "logits/rejected": -0.9850937724113464, "logps/chosen": -242.6457977294922, "logps/rejected": -355.6451110839844, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 1.9398025274276733, "rewards/margins": 8.680112838745117, "rewards/rejected": -6.7403106689453125, "step": 2175 }, { "epoch": 0.48, "learning_rate": 8.879933095728485e-06, "logits/chosen": -1.3770484924316406, "logits/rejected": -1.3973876237869263, "logps/chosen": -191.0415496826172, "logps/rejected": -123.57133483886719, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 1.6852035522460938, "rewards/margins": 8.119756698608398, "rewards/rejected": -6.434553623199463, "step": 2176 }, { "epoch": 0.48, "learning_rate": 8.878802336667384e-06, "logits/chosen": -0.8962565064430237, "logits/rejected": -0.85398268699646, "logps/chosen": -136.78500366210938, "logps/rejected": -157.4105682373047, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -0.17663879692554474, "rewards/margins": 4.4824299812316895, "rewards/rejected": -4.659068584442139, "step": 2177 }, { "epoch": 0.48, "learning_rate": 8.877671079189505e-06, "logits/chosen": -0.7823190093040466, "logits/rejected": -0.7845962643623352, "logps/chosen": -292.6024475097656, "logps/rejected": -483.06341552734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.1015777587890625, "rewards/margins": 16.410598754882812, "rewards/rejected": -15.30902099609375, "step": 2178 }, { "epoch": 0.48, "learning_rate": 8.876539323440214e-06, "logits/chosen": -0.9679965972900391, "logits/rejected": -0.9854009747505188, "logps/chosen": -135.39431762695312, "logps/rejected": -187.3349609375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.03043518029153347, "rewards/margins": 5.696374416351318, "rewards/rejected": -5.6659393310546875, "step": 2179 }, { "epoch": 0.48, "learning_rate": 8.87540706956494e-06, "logits/chosen": -0.9109187126159668, "logits/rejected": -0.8136699199676514, "logps/chosen": -195.31883239746094, "logps/rejected": -416.2647705078125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.0668747425079346, "rewards/margins": 11.977581024169922, "rewards/rejected": -13.044455528259277, "step": 2180 }, { "epoch": 0.48, "learning_rate": 8.874274317709173e-06, "logits/chosen": -0.9495620131492615, "logits/rejected": -0.9235426783561707, "logps/chosen": -58.91773223876953, "logps/rejected": -163.55264282226562, "loss": 0.0926, "rewards/accuracies": 1.0, "rewards/chosen": -0.9113067984580994, "rewards/margins": 3.1949386596679688, "rewards/rejected": -4.106245517730713, "step": 2181 }, { "epoch": 0.48, "learning_rate": 8.873141068018469e-06, "logits/chosen": -1.1405221223831177, "logits/rejected": -1.1039611101150513, "logps/chosen": -124.70494079589844, "logps/rejected": -217.73748779296875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.9556190967559814, "rewards/margins": 8.821903228759766, "rewards/rejected": -10.777522087097168, "step": 2182 }, { "epoch": 0.48, "learning_rate": 8.872007320638449e-06, "logits/chosen": -0.807436466217041, "logits/rejected": -0.8270366787910461, "logps/chosen": -137.62152099609375, "logps/rejected": -104.21391296386719, "loss": 0.2862, "rewards/accuracies": 1.0, "rewards/chosen": 0.002378845354542136, "rewards/margins": 0.2612564265727997, "rewards/rejected": -0.25887757539749146, "step": 2183 }, { "epoch": 0.48, "learning_rate": 8.870873075714797e-06, "logits/chosen": -1.1043850183486938, "logits/rejected": -1.0981066226959229, "logps/chosen": -49.02512741088867, "logps/rejected": -61.56690979003906, "loss": 0.1147, "rewards/accuracies": 1.0, "rewards/chosen": -1.5338554382324219, "rewards/margins": 1.518294095993042, "rewards/rejected": -3.052149534225464, "step": 2184 }, { "epoch": 0.48, "learning_rate": 8.86973833339326e-06, "logits/chosen": -1.1297041177749634, "logits/rejected": -1.0733722448349, "logps/chosen": -61.09065246582031, "logps/rejected": -148.12200927734375, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -1.7032803297042847, "rewards/margins": 4.416262149810791, "rewards/rejected": -6.119542598724365, "step": 2185 }, { "epoch": 0.48, "learning_rate": 8.86860309381965e-06, "logits/chosen": -0.5618042349815369, "logits/rejected": -0.5211545825004578, "logps/chosen": -74.40603637695312, "logps/rejected": -172.14369201660156, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -0.09714355319738388, "rewards/margins": 5.593131065368652, "rewards/rejected": -5.690274715423584, "step": 2186 }, { "epoch": 0.48, "learning_rate": 8.867467357139842e-06, "logits/chosen": -0.7573710083961487, "logits/rejected": -0.7478727698326111, "logps/chosen": -167.25100708007812, "logps/rejected": -60.1083984375, "loss": 2.4274, "rewards/accuracies": 0.0, "rewards/chosen": -7.405032634735107, "rewards/margins": -4.456140518188477, "rewards/rejected": -2.948892116546631, "step": 2187 }, { "epoch": 0.48, "learning_rate": 8.866331123499775e-06, "logits/chosen": -0.664645254611969, "logits/rejected": -0.6057469844818115, "logps/chosen": -135.658935546875, "logps/rejected": -351.4613037109375, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": -2.4568779468536377, "rewards/margins": 4.711938858032227, "rewards/rejected": -7.168817043304443, "step": 2188 }, { "epoch": 0.48, "learning_rate": 8.865194393045452e-06, "logits/chosen": -0.8855703473091125, "logits/rejected": -0.867773175239563, "logps/chosen": -115.10420227050781, "logps/rejected": -72.97618103027344, "loss": 0.396, "rewards/accuracies": 1.0, "rewards/chosen": -1.7754974365234375, "rewards/margins": 0.05260467529296875, "rewards/rejected": -1.8281021118164062, "step": 2189 }, { "epoch": 0.48, "learning_rate": 8.864057165922944e-06, "logits/chosen": -1.1645127534866333, "logits/rejected": -1.2531709671020508, "logps/chosen": -161.2191619873047, "logps/rejected": -106.57427215576172, "loss": 0.0329, "rewards/accuracies": 1.0, "rewards/chosen": 1.2382034063339233, "rewards/margins": 2.9501852989196777, "rewards/rejected": -1.711982011795044, "step": 2190 }, { "epoch": 0.48, "learning_rate": 8.862919442278379e-06, "logits/chosen": -1.2197202444076538, "logits/rejected": -1.3005619049072266, "logps/chosen": -127.41145324707031, "logps/rejected": -123.52989196777344, "loss": 0.427, "rewards/accuracies": 0.0, "rewards/chosen": -0.8545921444892883, "rewards/margins": -0.1198837161064148, "rewards/rejected": -0.7347084283828735, "step": 2191 }, { "epoch": 0.49, "learning_rate": 8.86178122225795e-06, "logits/chosen": -0.7596229314804077, "logits/rejected": -0.7860090732574463, "logps/chosen": -96.04547119140625, "logps/rejected": -53.8065185546875, "loss": 0.1674, "rewards/accuracies": 1.0, "rewards/chosen": 0.17385636270046234, "rewards/margins": 0.9224659204483032, "rewards/rejected": -0.7486095428466797, "step": 2192 }, { "epoch": 0.49, "learning_rate": 8.860642506007919e-06, "logits/chosen": -1.0615237951278687, "logits/rejected": -1.0430124998092651, "logps/chosen": -156.78456115722656, "logps/rejected": -286.65179443359375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.5747238397598267, "rewards/margins": 4.7803850173950195, "rewards/rejected": -5.355108737945557, "step": 2193 }, { "epoch": 0.49, "learning_rate": 8.859503293674605e-06, "logits/chosen": -0.913332462310791, "logits/rejected": -0.8928229808807373, "logps/chosen": -62.62939453125, "logps/rejected": -53.00364303588867, "loss": 0.1765, "rewards/accuracies": 1.0, "rewards/chosen": -1.0557053089141846, "rewards/margins": 2.0530197620391846, "rewards/rejected": -3.108725070953369, "step": 2194 }, { "epoch": 0.49, "learning_rate": 8.858363585404397e-06, "logits/chosen": -0.9192654490470886, "logits/rejected": -0.9428521394729614, "logps/chosen": -177.25289916992188, "logps/rejected": -167.11285400390625, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.9110840559005737, "rewards/margins": 4.4468255043029785, "rewards/rejected": -6.357909679412842, "step": 2195 }, { "epoch": 0.49, "learning_rate": 8.857223381343742e-06, "logits/chosen": -1.0043200254440308, "logits/rejected": -0.9546745419502258, "logps/chosen": -137.22955322265625, "logps/rejected": -259.65643310546875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.3734420835971832, "rewards/margins": 7.096426486968994, "rewards/rejected": -7.4698686599731445, "step": 2196 }, { "epoch": 0.49, "learning_rate": 8.856082681639158e-06, "logits/chosen": -0.7499445676803589, "logits/rejected": -0.7445911169052124, "logps/chosen": -141.468505859375, "logps/rejected": -578.2529296875, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -2.464311361312866, "rewards/margins": 41.323204040527344, "rewards/rejected": -43.787513732910156, "step": 2197 }, { "epoch": 0.49, "learning_rate": 8.854941486437216e-06, "logits/chosen": -1.2147578001022339, "logits/rejected": -1.1916780471801758, "logps/chosen": -157.34930419921875, "logps/rejected": -254.61439514160156, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.13798370957374573, "rewards/margins": 7.4070024490356445, "rewards/rejected": -7.544986248016357, "step": 2198 }, { "epoch": 0.49, "learning_rate": 8.853799795884562e-06, "logits/chosen": -0.8291383981704712, "logits/rejected": -0.7877541184425354, "logps/chosen": -102.79923248291016, "logps/rejected": -96.63159942626953, "loss": 0.0346, "rewards/accuracies": 1.0, "rewards/chosen": -0.8048286437988281, "rewards/margins": 3.0281522274017334, "rewards/rejected": -3.8329808712005615, "step": 2199 }, { "epoch": 0.49, "learning_rate": 8.852657610127898e-06, "logits/chosen": -0.8519537448883057, "logits/rejected": -0.857304036617279, "logps/chosen": -189.49664306640625, "logps/rejected": -149.8249053955078, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": 0.09354553371667862, "rewards/margins": 3.0341758728027344, "rewards/rejected": -2.9406304359436035, "step": 2200 }, { "epoch": 0.49, "learning_rate": 8.851514929313992e-06, "logits/chosen": -0.9869183301925659, "logits/rejected": -0.8535881638526917, "logps/chosen": -216.24398803710938, "logps/rejected": -369.52099609375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 1.0195801258087158, "rewards/margins": 12.503506660461426, "rewards/rejected": -11.483926773071289, "step": 2201 }, { "epoch": 0.49, "learning_rate": 8.850371753589677e-06, "logits/chosen": -1.0652801990509033, "logits/rejected": -1.0221341848373413, "logps/chosen": -159.40155029296875, "logps/rejected": -123.91590118408203, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.02897338941693306, "rewards/margins": 4.983190059661865, "rewards/rejected": -5.0121636390686035, "step": 2202 }, { "epoch": 0.49, "learning_rate": 8.849228083101847e-06, "logits/chosen": -0.8438990712165833, "logits/rejected": -0.7812676429748535, "logps/chosen": -220.40985107421875, "logps/rejected": -234.2177276611328, "loss": 0.0452, "rewards/accuracies": 1.0, "rewards/chosen": 0.14927367866039276, "rewards/margins": 2.3657517433166504, "rewards/rejected": -2.216478109359741, "step": 2203 }, { "epoch": 0.49, "learning_rate": 8.848083917997463e-06, "logits/chosen": -0.8352607488632202, "logits/rejected": -0.8168402314186096, "logps/chosen": -197.79794311523438, "logps/rejected": -228.51095581054688, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.4477859735488892, "rewards/margins": 7.778769016265869, "rewards/rejected": -9.226554870605469, "step": 2204 }, { "epoch": 0.49, "learning_rate": 8.846939258423545e-06, "logits/chosen": -0.8826858401298523, "logits/rejected": -0.7013657689094543, "logps/chosen": -183.717041015625, "logps/rejected": -527.2296142578125, "loss": 0.0485, "rewards/accuracies": 1.0, "rewards/chosen": 0.9316986203193665, "rewards/margins": 17.947595596313477, "rewards/rejected": -17.015897750854492, "step": 2205 }, { "epoch": 0.49, "learning_rate": 8.84579410452718e-06, "logits/chosen": -1.0979748964309692, "logits/rejected": -1.0805671215057373, "logps/chosen": -128.12762451171875, "logps/rejected": -119.43726348876953, "loss": 0.1169, "rewards/accuracies": 1.0, "rewards/chosen": -0.7109748721122742, "rewards/margins": 3.314978837966919, "rewards/rejected": -4.025953769683838, "step": 2206 }, { "epoch": 0.49, "learning_rate": 8.844648456455518e-06, "logits/chosen": -0.8086032271385193, "logits/rejected": -0.8474110960960388, "logps/chosen": -151.67799377441406, "logps/rejected": -106.6324691772461, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": 0.26213836669921875, "rewards/margins": 2.6589698791503906, "rewards/rejected": -2.396831512451172, "step": 2207 }, { "epoch": 0.49, "learning_rate": 8.843502314355771e-06, "logits/chosen": -1.1308923959732056, "logits/rejected": -1.132439136505127, "logps/chosen": -43.60435485839844, "logps/rejected": -36.68525695800781, "loss": 0.3095, "rewards/accuracies": 1.0, "rewards/chosen": -2.4746181964874268, "rewards/margins": 0.15530157089233398, "rewards/rejected": -2.6299197673797607, "step": 2208 }, { "epoch": 0.49, "learning_rate": 8.842355678375217e-06, "logits/chosen": -0.9471035599708557, "logits/rejected": -0.935287594795227, "logps/chosen": -91.86471557617188, "logps/rejected": -174.5355224609375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.5856132507324219, "rewards/margins": 6.09676456451416, "rewards/rejected": -7.682377815246582, "step": 2209 }, { "epoch": 0.49, "learning_rate": 8.841208548661195e-06, "logits/chosen": -0.7002760171890259, "logits/rejected": -0.714706540107727, "logps/chosen": -103.54534912109375, "logps/rejected": -171.82278442382812, "loss": 0.4447, "rewards/accuracies": 0.0, "rewards/chosen": 0.6161819696426392, "rewards/margins": -0.21208035945892334, "rewards/rejected": 0.8282623291015625, "step": 2210 }, { "epoch": 0.49, "learning_rate": 8.840060925361109e-06, "logits/chosen": -0.8928663730621338, "logits/rejected": -0.8589866757392883, "logps/chosen": -105.27728271484375, "logps/rejected": -189.96780395507812, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": -3.4112184047698975, "rewards/margins": 3.467272996902466, "rewards/rejected": -6.878491401672363, "step": 2211 }, { "epoch": 0.49, "learning_rate": 8.838912808622424e-06, "logits/chosen": -0.7844634056091309, "logits/rejected": -0.6518073678016663, "logps/chosen": -196.88409423828125, "logps/rejected": -320.203857421875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": 0.9489822387695312, "rewards/margins": 7.635389804840088, "rewards/rejected": -6.686407566070557, "step": 2212 }, { "epoch": 0.49, "learning_rate": 8.837764198592672e-06, "logits/chosen": -0.90062415599823, "logits/rejected": -0.9207250475883484, "logps/chosen": -123.62472534179688, "logps/rejected": -35.078773498535156, "loss": 0.5517, "rewards/accuracies": 0.0, "rewards/chosen": -2.3974647521972656, "rewards/margins": -0.6982017755508423, "rewards/rejected": -1.6992629766464233, "step": 2213 }, { "epoch": 0.49, "learning_rate": 8.836615095419448e-06, "logits/chosen": -1.1748743057250977, "logits/rejected": -1.1240310668945312, "logps/chosen": -104.05695343017578, "logps/rejected": -163.52279663085938, "loss": 0.1176, "rewards/accuracies": 1.0, "rewards/chosen": -1.343103051185608, "rewards/margins": 1.3270598649978638, "rewards/rejected": -2.6701629161834717, "step": 2214 }, { "epoch": 0.49, "learning_rate": 8.835465499250404e-06, "logits/chosen": -1.2628979682922363, "logits/rejected": -1.4436924457550049, "logps/chosen": -236.86636352539062, "logps/rejected": -161.1175079345703, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.461895704269409, "rewards/margins": 5.109738349914551, "rewards/rejected": -7.571633815765381, "step": 2215 }, { "epoch": 0.49, "learning_rate": 8.834315410233264e-06, "logits/chosen": -0.7003766894340515, "logits/rejected": -0.7003766894340515, "logps/chosen": -117.2953109741211, "logps/rejected": -117.2953109741211, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.912143230438232, "rewards/margins": 0.0, "rewards/rejected": -5.912143230438232, "step": 2216 }, { "epoch": 0.49, "learning_rate": 8.833164828515815e-06, "logits/chosen": -0.9375154376029968, "logits/rejected": -0.5628450512886047, "logps/chosen": -217.1318817138672, "logps/rejected": -587.73291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.7148239016532898, "rewards/margins": 48.458152770996094, "rewards/rejected": -47.74332809448242, "step": 2217 }, { "epoch": 0.49, "learning_rate": 8.832013754245895e-06, "logits/chosen": -1.0620945692062378, "logits/rejected": -1.0083339214324951, "logps/chosen": -85.59133911132812, "logps/rejected": -148.45407104492188, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -1.4053146839141846, "rewards/margins": 3.6121580600738525, "rewards/rejected": -5.017472743988037, "step": 2218 }, { "epoch": 0.49, "learning_rate": 8.830862187571423e-06, "logits/chosen": -1.1779892444610596, "logits/rejected": -1.053155541419983, "logps/chosen": -164.53662109375, "logps/rejected": -298.29547119140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.04219665750861168, "rewards/margins": 7.549041748046875, "rewards/rejected": -7.591238498687744, "step": 2219 }, { "epoch": 0.49, "learning_rate": 8.829710128640368e-06, "logits/chosen": -1.4769983291625977, "logits/rejected": -1.4524335861206055, "logps/chosen": -105.50819396972656, "logps/rejected": -94.57659912109375, "loss": 0.3459, "rewards/accuracies": 1.0, "rewards/chosen": -0.20760880410671234, "rewards/margins": 0.07410125434398651, "rewards/rejected": -0.28171005845069885, "step": 2220 }, { "epoch": 0.49, "learning_rate": 8.828557577600769e-06, "logits/chosen": -1.1053158044815063, "logits/rejected": -1.0259268283843994, "logps/chosen": -198.2144775390625, "logps/rejected": -252.2393798828125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 2.066415548324585, "rewards/margins": 8.6983642578125, "rewards/rejected": -6.631948947906494, "step": 2221 }, { "epoch": 0.49, "learning_rate": 8.827404534600723e-06, "logits/chosen": -1.2419707775115967, "logits/rejected": -1.2193692922592163, "logps/chosen": -93.55868530273438, "logps/rejected": -107.44502258300781, "loss": 0.3527, "rewards/accuracies": 0.0, "rewards/chosen": -1.8590621948242188, "rewards/margins": -0.02430880069732666, "rewards/rejected": -1.834753394126892, "step": 2222 }, { "epoch": 0.49, "learning_rate": 8.826250999788397e-06, "logits/chosen": -1.2748007774353027, "logits/rejected": -1.2590889930725098, "logps/chosen": -167.88873291015625, "logps/rejected": -152.5410614013672, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": -1.7441192865371704, "rewards/margins": 2.9152069091796875, "rewards/rejected": -4.659326076507568, "step": 2223 }, { "epoch": 0.49, "learning_rate": 8.825096973312014e-06, "logits/chosen": -0.9849393367767334, "logits/rejected": -0.862098217010498, "logps/chosen": -113.35236358642578, "logps/rejected": -415.545654296875, "loss": 0.0977, "rewards/accuracies": 1.0, "rewards/chosen": -2.688929796218872, "rewards/margins": 1.6785171031951904, "rewards/rejected": -4.3674468994140625, "step": 2224 }, { "epoch": 0.49, "learning_rate": 8.823942455319866e-06, "logits/chosen": -1.0731019973754883, "logits/rejected": -1.074637770652771, "logps/chosen": -118.10208892822266, "logps/rejected": -203.01788330078125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -4.748171329498291, "rewards/margins": 4.5781474113464355, "rewards/rejected": -9.326318740844727, "step": 2225 }, { "epoch": 0.49, "learning_rate": 8.822787445960303e-06, "logits/chosen": -0.6681097745895386, "logits/rejected": -0.5369283556938171, "logps/chosen": -136.99505615234375, "logps/rejected": -139.01222229003906, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -1.5499733686447144, "rewards/margins": 3.7419357299804688, "rewards/rejected": -5.291909217834473, "step": 2226 }, { "epoch": 0.49, "learning_rate": 8.821631945381746e-06, "logits/chosen": -1.0423862934112549, "logits/rejected": -1.0415900945663452, "logps/chosen": -209.43072509765625, "logps/rejected": -173.69346618652344, "loss": 1.0534, "rewards/accuracies": 1.0, "rewards/chosen": 1.2223999500274658, "rewards/margins": 4.927268981933594, "rewards/rejected": -3.704869031906128, "step": 2227 }, { "epoch": 0.49, "learning_rate": 8.82047595373267e-06, "logits/chosen": -0.8515899777412415, "logits/rejected": -0.660466194152832, "logps/chosen": -193.8778076171875, "logps/rejected": -453.6297607421875, "loss": 0.3472, "rewards/accuracies": 1.0, "rewards/chosen": -0.737274169921875, "rewards/margins": 6.623773097991943, "rewards/rejected": -7.361047267913818, "step": 2228 }, { "epoch": 0.49, "learning_rate": 8.819319471161617e-06, "logits/chosen": -0.902955174446106, "logits/rejected": -0.8437867760658264, "logps/chosen": -110.39878845214844, "logps/rejected": -202.648681640625, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": -3.744158983230591, "rewards/margins": 5.630814552307129, "rewards/rejected": -9.37497329711914, "step": 2229 }, { "epoch": 0.49, "learning_rate": 8.818162497817195e-06, "logits/chosen": -0.7485556602478027, "logits/rejected": -0.7147785425186157, "logps/chosen": -349.177001953125, "logps/rejected": -408.65216064453125, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -8.197278022766113, "rewards/margins": 7.994006156921387, "rewards/rejected": -16.1912841796875, "step": 2230 }, { "epoch": 0.49, "learning_rate": 8.81700503384807e-06, "logits/chosen": -0.966524064540863, "logits/rejected": -1.062308669090271, "logps/chosen": -214.778564453125, "logps/rejected": -83.49465942382812, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/chosen": 0.2785186767578125, "rewards/margins": 2.86236572265625, "rewards/rejected": -2.5838470458984375, "step": 2231 }, { "epoch": 0.49, "learning_rate": 8.815847079402972e-06, "logits/chosen": -1.0184086561203003, "logits/rejected": -1.0184086561203003, "logps/chosen": -83.635986328125, "logps/rejected": -83.635986328125, "loss": 0.5703, "rewards/accuracies": 0.0, "rewards/chosen": -4.082473278045654, "rewards/margins": 0.0, "rewards/rejected": -4.082473278045654, "step": 2232 }, { "epoch": 0.49, "learning_rate": 8.814688634630699e-06, "logits/chosen": -0.7755127549171448, "logits/rejected": -0.7776795625686646, "logps/chosen": -238.53619384765625, "logps/rejected": -285.7845458984375, "loss": 0.4295, "rewards/accuracies": 0.0, "rewards/chosen": -1.6323364973068237, "rewards/margins": -0.30784308910369873, "rewards/rejected": -1.324493408203125, "step": 2233 }, { "epoch": 0.49, "learning_rate": 8.813529699680108e-06, "logits/chosen": -0.9561697840690613, "logits/rejected": -0.9063423275947571, "logps/chosen": -139.2415313720703, "logps/rejected": -171.32351684570312, "loss": 0.2414, "rewards/accuracies": 1.0, "rewards/chosen": -2.498243808746338, "rewards/margins": 0.5360548496246338, "rewards/rejected": -3.0342986583709717, "step": 2234 }, { "epoch": 0.49, "learning_rate": 8.812370274700117e-06, "logits/chosen": -0.8725546002388, "logits/rejected": -0.8468323945999146, "logps/chosen": -91.6771011352539, "logps/rejected": -213.10311889648438, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": 0.8643531799316406, "rewards/margins": 4.51763391494751, "rewards/rejected": -3.653280735015869, "step": 2235 }, { "epoch": 0.49, "learning_rate": 8.81121035983971e-06, "logits/chosen": -1.089033842086792, "logits/rejected": -1.0789765119552612, "logps/chosen": -117.25318908691406, "logps/rejected": -140.25437927246094, "loss": 1.2584, "rewards/accuracies": 1.0, "rewards/chosen": -2.0863053798675537, "rewards/margins": 0.6907210350036621, "rewards/rejected": -2.777026414871216, "step": 2236 }, { "epoch": 0.5, "learning_rate": 8.810049955247933e-06, "logits/chosen": -1.0205823183059692, "logits/rejected": -1.0803213119506836, "logps/chosen": -164.4119415283203, "logps/rejected": -86.04537963867188, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -0.6048874258995056, "rewards/margins": 3.276750087738037, "rewards/rejected": -3.8816375732421875, "step": 2237 }, { "epoch": 0.5, "learning_rate": 8.808889061073897e-06, "logits/chosen": -1.2887307405471802, "logits/rejected": -1.2887307405471802, "logps/chosen": -130.25064086914062, "logps/rejected": -130.25064086914062, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -4.505357265472412, "rewards/margins": 0.0, "rewards/rejected": -4.505357265472412, "step": 2238 }, { "epoch": 0.5, "learning_rate": 8.807727677466773e-06, "logits/chosen": -0.9846950769424438, "logits/rejected": -0.9877102971076965, "logps/chosen": -108.47789001464844, "logps/rejected": -65.53040313720703, "loss": 0.0645, "rewards/accuracies": 1.0, "rewards/chosen": -0.5652489066123962, "rewards/margins": 2.5262300968170166, "rewards/rejected": -3.0914790630340576, "step": 2239 }, { "epoch": 0.5, "learning_rate": 8.806565804575796e-06, "logits/chosen": -1.036400556564331, "logits/rejected": -0.9281176924705505, "logps/chosen": -154.5428009033203, "logps/rejected": -269.92034912109375, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": -5.784975528717041, "rewards/margins": 13.979572296142578, "rewards/rejected": -19.76454734802246, "step": 2240 }, { "epoch": 0.5, "learning_rate": 8.805403442550261e-06, "logits/chosen": -0.9595648050308228, "logits/rejected": -0.9350975155830383, "logps/chosen": -226.38577270507812, "logps/rejected": -195.19671630859375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 1.2249726057052612, "rewards/margins": 6.141800403594971, "rewards/rejected": -4.91682767868042, "step": 2241 }, { "epoch": 0.5, "learning_rate": 8.804240591539537e-06, "logits/chosen": -0.9945959448814392, "logits/rejected": -0.9779550433158875, "logps/chosen": -81.02510070800781, "logps/rejected": -70.40564727783203, "loss": 0.1286, "rewards/accuracies": 1.0, "rewards/chosen": -2.1358420848846436, "rewards/margins": 1.2275886535644531, "rewards/rejected": -3.3634307384490967, "step": 2242 }, { "epoch": 0.5, "learning_rate": 8.80307725169304e-06, "logits/chosen": -0.9293320775032043, "logits/rejected": -0.9293320775032043, "logps/chosen": -73.91012573242188, "logps/rejected": -73.91012573242188, "loss": 0.4355, "rewards/accuracies": 0.0, "rewards/chosen": -3.299689531326294, "rewards/margins": 0.0, "rewards/rejected": -3.299689531326294, "step": 2243 }, { "epoch": 0.5, "learning_rate": 8.801913423160256e-06, "logits/chosen": -0.6574356555938721, "logits/rejected": -0.40856674313545227, "logps/chosen": -52.03871154785156, "logps/rejected": -249.22137451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4240589141845703, "rewards/margins": 16.289615631103516, "rewards/rejected": -16.713674545288086, "step": 2244 }, { "epoch": 0.5, "learning_rate": 8.800749106090739e-06, "logits/chosen": -0.8776576519012451, "logits/rejected": -0.8969627022743225, "logps/chosen": -147.78765869140625, "logps/rejected": -94.87417602539062, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 0.3265624940395355, "rewards/margins": 4.660338401794434, "rewards/rejected": -4.333775997161865, "step": 2245 }, { "epoch": 0.5, "learning_rate": 8.799584300634096e-06, "logits/chosen": -0.8129732012748718, "logits/rejected": -0.8114697337150574, "logps/chosen": -98.3245849609375, "logps/rejected": -123.07178497314453, "loss": 0.176, "rewards/accuracies": 1.0, "rewards/chosen": -1.5850387811660767, "rewards/margins": 0.8657470941543579, "rewards/rejected": -2.4507858753204346, "step": 2246 }, { "epoch": 0.5, "learning_rate": 8.798419006940008e-06, "logits/chosen": -0.9314588308334351, "logits/rejected": -1.0502837896347046, "logps/chosen": -248.56875610351562, "logps/rejected": -321.382080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.5893921256065369, "rewards/margins": 17.79849624633789, "rewards/rejected": -17.209104537963867, "step": 2247 }, { "epoch": 0.5, "learning_rate": 8.797253225158206e-06, "logits/chosen": -1.162580132484436, "logits/rejected": -1.1238549947738647, "logps/chosen": -182.53663635253906, "logps/rejected": -217.741943359375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.7926254272460938, "rewards/margins": 9.689767837524414, "rewards/rejected": -8.89714241027832, "step": 2248 }, { "epoch": 0.5, "learning_rate": 8.796086955438494e-06, "logits/chosen": -0.9783844351768494, "logits/rejected": -0.9832172393798828, "logps/chosen": -120.40948486328125, "logps/rejected": -138.191162109375, "loss": 0.1464, "rewards/accuracies": 1.0, "rewards/chosen": -1.1469849348068237, "rewards/margins": 1.127282738685608, "rewards/rejected": -2.2742676734924316, "step": 2249 }, { "epoch": 0.5, "learning_rate": 8.794920197930735e-06, "logits/chosen": -0.8632843494415283, "logits/rejected": -0.6560879945755005, "logps/chosen": -170.32516479492188, "logps/rejected": -599.9521484375, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -1.0781983137130737, "rewards/margins": 48.07282257080078, "rewards/rejected": -49.15102005004883, "step": 2250 }, { "epoch": 0.5, "learning_rate": 8.79375295278485e-06, "logits/chosen": -1.1222084760665894, "logits/rejected": -1.091947078704834, "logps/chosen": -63.00590896606445, "logps/rejected": -139.73069763183594, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.05620002746582031, "rewards/margins": 6.517955303192139, "rewards/rejected": -6.461755275726318, "step": 2251 }, { "epoch": 0.5, "learning_rate": 8.792585220150834e-06, "logits/chosen": -0.6814525723457336, "logits/rejected": -0.6694599986076355, "logps/chosen": -209.9542236328125, "logps/rejected": -187.5823974609375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 2.354222059249878, "rewards/margins": 5.607383728027344, "rewards/rejected": -3.253161668777466, "step": 2252 }, { "epoch": 0.5, "learning_rate": 8.791417000178732e-06, "logits/chosen": -1.0214632749557495, "logits/rejected": -1.010114073753357, "logps/chosen": -198.878662109375, "logps/rejected": -126.90216064453125, "loss": 0.2111, "rewards/accuracies": 1.0, "rewards/chosen": 0.6662842035293579, "rewards/margins": 3.1934714317321777, "rewards/rejected": -2.5271873474121094, "step": 2253 }, { "epoch": 0.5, "learning_rate": 8.790248293018662e-06, "logits/chosen": -1.0886222124099731, "logits/rejected": -1.0671297311782837, "logps/chosen": -85.49601745605469, "logps/rejected": -96.00920867919922, "loss": 0.573, "rewards/accuracies": 1.0, "rewards/chosen": -1.026788353919983, "rewards/margins": 1.6377097368240356, "rewards/rejected": -2.6644980907440186, "step": 2254 }, { "epoch": 0.5, "learning_rate": 8.789079098820796e-06, "logits/chosen": -0.9458792209625244, "logits/rejected": -0.8582085967063904, "logps/chosen": -116.93402099609375, "logps/rejected": -327.2735595703125, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.4773872494697571, "rewards/margins": 6.9379143714904785, "rewards/rejected": -7.41530179977417, "step": 2255 }, { "epoch": 0.5, "learning_rate": 8.787909417735374e-06, "logits/chosen": -1.2997136116027832, "logits/rejected": -1.2963911294937134, "logps/chosen": -85.96017456054688, "logps/rejected": -83.03645324707031, "loss": 0.1531, "rewards/accuracies": 1.0, "rewards/chosen": -1.2429367303848267, "rewards/margins": 1.0286518335342407, "rewards/rejected": -2.2715885639190674, "step": 2256 }, { "epoch": 0.5, "learning_rate": 8.7867392499127e-06, "logits/chosen": -0.891333281993866, "logits/rejected": -0.8915683627128601, "logps/chosen": -144.87008666992188, "logps/rejected": -129.69151306152344, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": 0.2874999940395355, "rewards/margins": 2.438772439956665, "rewards/rejected": -2.1512725353240967, "step": 2257 }, { "epoch": 0.5, "learning_rate": 8.785568595503134e-06, "logits/chosen": -0.899665117263794, "logits/rejected": -0.928402304649353, "logps/chosen": -232.98324584960938, "logps/rejected": -212.09173583984375, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.8914123773574829, "rewards/margins": 4.163547039031982, "rewards/rejected": -5.054959297180176, "step": 2258 }, { "epoch": 0.5, "learning_rate": 8.784397454657103e-06, "logits/chosen": -0.9763545393943787, "logits/rejected": -1.0860122442245483, "logps/chosen": -166.39515686035156, "logps/rejected": -97.87107849121094, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": 0.787384033203125, "rewards/margins": 4.72464656829834, "rewards/rejected": -3.9372622966766357, "step": 2259 }, { "epoch": 0.5, "learning_rate": 8.783225827525098e-06, "logits/chosen": -1.284865379333496, "logits/rejected": -1.2164236307144165, "logps/chosen": -176.2985382080078, "logps/rejected": -232.71188354492188, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": 1.06297767162323, "rewards/margins": 7.191739082336426, "rewards/rejected": -6.128761291503906, "step": 2260 }, { "epoch": 0.5, "learning_rate": 8.782053714257668e-06, "logits/chosen": -0.7624838948249817, "logits/rejected": -0.5476186871528625, "logps/chosen": -63.35112380981445, "logps/rejected": -312.8011474609375, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -3.4954986572265625, "rewards/margins": 17.185266494750977, "rewards/rejected": -20.68076515197754, "step": 2261 }, { "epoch": 0.5, "learning_rate": 8.780881115005428e-06, "logits/chosen": -0.9760846495628357, "logits/rejected": -0.9109044671058655, "logps/chosen": -86.6523666381836, "logps/rejected": -56.12644958496094, "loss": 0.0549, "rewards/accuracies": 1.0, "rewards/chosen": -0.10626220703125, "rewards/margins": 2.1632754802703857, "rewards/rejected": -2.2695376873016357, "step": 2262 }, { "epoch": 0.5, "learning_rate": 8.779708029919054e-06, "logits/chosen": -0.8346952795982361, "logits/rejected": -0.7461239695549011, "logps/chosen": -144.67575073242188, "logps/rejected": -267.2655944824219, "loss": 0.0623, "rewards/accuracies": 1.0, "rewards/chosen": -1.853907823562622, "rewards/margins": 2.0251846313476562, "rewards/rejected": -3.8790924549102783, "step": 2263 }, { "epoch": 0.5, "learning_rate": 8.778534459149283e-06, "logits/chosen": -1.0712412595748901, "logits/rejected": -0.9888576865196228, "logps/chosen": -92.42706298828125, "logps/rejected": -228.84786987304688, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -1.6507049798965454, "rewards/margins": 3.699545383453369, "rewards/rejected": -5.350250244140625, "step": 2264 }, { "epoch": 0.5, "learning_rate": 8.777360402846919e-06, "logits/chosen": -0.8208603262901306, "logits/rejected": -0.841436505317688, "logps/chosen": -114.77139282226562, "logps/rejected": -102.54307556152344, "loss": 0.7889, "rewards/accuracies": 0.0, "rewards/chosen": -1.1132911443710327, "rewards/margins": -1.3465019464492798, "rewards/rejected": 0.2332107573747635, "step": 2265 }, { "epoch": 0.5, "learning_rate": 8.776185861162822e-06, "logits/chosen": -1.0389851331710815, "logits/rejected": -1.1064127683639526, "logps/chosen": -190.72032165527344, "logps/rejected": -152.52378845214844, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.5646454095840454, "rewards/margins": 6.383563995361328, "rewards/rejected": -5.818918704986572, "step": 2266 }, { "epoch": 0.5, "learning_rate": 8.77501083424792e-06, "logits/chosen": -0.5862007141113281, "logits/rejected": -0.5787820219993591, "logps/chosen": -179.26654052734375, "logps/rejected": -100.80329132080078, "loss": 0.0677, "rewards/accuracies": 1.0, "rewards/chosen": 1.4130127429962158, "rewards/margins": 1.9304115772247314, "rewards/rejected": -0.5173988342285156, "step": 2267 }, { "epoch": 0.5, "learning_rate": 8.773835322253202e-06, "logits/chosen": -0.8180334568023682, "logits/rejected": -0.7719306349754333, "logps/chosen": -148.8347930908203, "logps/rejected": -336.3265686035156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.9984634518623352, "rewards/margins": 11.016209602355957, "rewards/rejected": -10.017745971679688, "step": 2268 }, { "epoch": 0.5, "learning_rate": 8.772659325329717e-06, "logits/chosen": -1.213221788406372, "logits/rejected": -1.2997530698776245, "logps/chosen": -133.48504638671875, "logps/rejected": -91.28402709960938, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.21566620469093323, "rewards/margins": 4.889326572418213, "rewards/rejected": -5.104992866516113, "step": 2269 }, { "epoch": 0.5, "learning_rate": 8.771482843628576e-06, "logits/chosen": -1.0477384328842163, "logits/rejected": -0.6843245029449463, "logps/chosen": -166.07546997070312, "logps/rejected": -785.469482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5728821158409119, "rewards/margins": 54.877994537353516, "rewards/rejected": -55.45087814331055, "step": 2270 }, { "epoch": 0.5, "learning_rate": 8.770305877300958e-06, "logits/chosen": -0.8754441142082214, "logits/rejected": -1.0211834907531738, "logps/chosen": -227.733154296875, "logps/rejected": -101.34872436523438, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -0.17241822183132172, "rewards/margins": 3.4116196632385254, "rewards/rejected": -3.5840377807617188, "step": 2271 }, { "epoch": 0.5, "learning_rate": 8.769128426498098e-06, "logits/chosen": -0.9659862518310547, "logits/rejected": -0.8436830639839172, "logps/chosen": -150.53115844726562, "logps/rejected": -298.90264892578125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.9972962141036987, "rewards/margins": 7.3373918533325195, "rewards/rejected": -9.334688186645508, "step": 2272 }, { "epoch": 0.5, "learning_rate": 8.767950491371295e-06, "logits/chosen": -0.9467195272445679, "logits/rejected": -0.9467195272445679, "logps/chosen": -190.2909698486328, "logps/rejected": -190.2909698486328, "loss": 0.3479, "rewards/accuracies": 0.0, "rewards/chosen": -3.6957015991210938, "rewards/margins": 0.0, "rewards/rejected": -3.6957015991210938, "step": 2273 }, { "epoch": 0.5, "learning_rate": 8.766772072071911e-06, "logits/chosen": -0.7058947086334229, "logits/rejected": -0.6984391808509827, "logps/chosen": -80.03385162353516, "logps/rejected": -100.2127456665039, "loss": 0.1585, "rewards/accuracies": 1.0, "rewards/chosen": -2.380861759185791, "rewards/margins": 0.9862105846405029, "rewards/rejected": -3.367072343826294, "step": 2274 }, { "epoch": 0.5, "learning_rate": 8.765593168751373e-06, "logits/chosen": -1.0398627519607544, "logits/rejected": -1.0699703693389893, "logps/chosen": -155.3622589111328, "logps/rejected": -200.7878875732422, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -0.4321487545967102, "rewards/margins": 4.401113986968994, "rewards/rejected": -4.833262920379639, "step": 2275 }, { "epoch": 0.5, "learning_rate": 8.764413781561164e-06, "logits/chosen": -1.0031213760375977, "logits/rejected": -0.9918469190597534, "logps/chosen": -147.2574462890625, "logps/rejected": -129.12281799316406, "loss": 0.3356, "rewards/accuracies": 1.0, "rewards/chosen": -2.998044729232788, "rewards/margins": 0.055066585540771484, "rewards/rejected": -3.0531113147735596, "step": 2276 }, { "epoch": 0.5, "learning_rate": 8.763233910652833e-06, "logits/chosen": -1.3968626260757446, "logits/rejected": -1.4409797191619873, "logps/chosen": -85.40156555175781, "logps/rejected": -68.3101577758789, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": 1.2899048328399658, "rewards/margins": 3.7513973712921143, "rewards/rejected": -2.4614925384521484, "step": 2277 }, { "epoch": 0.5, "learning_rate": 8.762053556177991e-06, "logits/chosen": -0.7601858973503113, "logits/rejected": -0.7073786854743958, "logps/chosen": -116.52434539794922, "logps/rejected": -102.86082458496094, "loss": 0.0392, "rewards/accuracies": 1.0, "rewards/chosen": 1.3368186950683594, "rewards/margins": 2.701115608215332, "rewards/rejected": -1.364296793937683, "step": 2278 }, { "epoch": 0.5, "learning_rate": 8.760872718288311e-06, "logits/chosen": -0.8807072043418884, "logits/rejected": -0.8580039739608765, "logps/chosen": -158.93399047851562, "logps/rejected": -147.9556427001953, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.623516857624054, "rewards/margins": 5.533661365509033, "rewards/rejected": -4.910144329071045, "step": 2279 }, { "epoch": 0.5, "learning_rate": 8.759691397135528e-06, "logits/chosen": -0.8478806614875793, "logits/rejected": -0.8610211610794067, "logps/chosen": -93.77767944335938, "logps/rejected": -126.8328628540039, "loss": 0.0481, "rewards/accuracies": 1.0, "rewards/chosen": -0.4041328430175781, "rewards/margins": 2.292759656906128, "rewards/rejected": -2.696892499923706, "step": 2280 }, { "epoch": 0.5, "learning_rate": 8.758509592871439e-06, "logits/chosen": -0.7983552813529968, "logits/rejected": -0.7001619338989258, "logps/chosen": -191.06858825683594, "logps/rejected": -311.60888671875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 1.5191971063613892, "rewards/margins": 17.900192260742188, "rewards/rejected": -16.38099479675293, "step": 2281 }, { "epoch": 0.51, "learning_rate": 8.7573273056479e-06, "logits/chosen": -1.1442227363586426, "logits/rejected": -1.186428189277649, "logps/chosen": -102.17887878417969, "logps/rejected": -59.127532958984375, "loss": 0.1178, "rewards/accuracies": 1.0, "rewards/chosen": -0.7648414969444275, "rewards/margins": 1.328704833984375, "rewards/rejected": -2.0935463905334473, "step": 2282 }, { "epoch": 0.51, "learning_rate": 8.756144535616838e-06, "logits/chosen": -0.7713157534599304, "logits/rejected": -0.7332088947296143, "logps/chosen": -75.62130737304688, "logps/rejected": -36.269832611083984, "loss": 0.0921, "rewards/accuracies": 1.0, "rewards/chosen": -0.5697898864746094, "rewards/margins": 1.6144921779632568, "rewards/rejected": -2.184282064437866, "step": 2283 }, { "epoch": 0.51, "learning_rate": 8.754961282930231e-06, "logits/chosen": -1.0613914728164673, "logits/rejected": -1.1168245077133179, "logps/chosen": -91.2200698852539, "logps/rejected": -84.29676055908203, "loss": 0.1146, "rewards/accuracies": 1.0, "rewards/chosen": -0.9238548278808594, "rewards/margins": 2.5486810207366943, "rewards/rejected": -3.4725358486175537, "step": 2284 }, { "epoch": 0.51, "learning_rate": 8.753777547740126e-06, "logits/chosen": -0.9016061425209045, "logits/rejected": -0.8721147775650024, "logps/chosen": -77.75961303710938, "logps/rejected": -128.10693359375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016502380603924394, "rewards/margins": 5.287406921386719, "rewards/rejected": -5.28905725479126, "step": 2285 }, { "epoch": 0.51, "learning_rate": 8.752593330198631e-06, "logits/chosen": -0.8744432926177979, "logits/rejected": -0.8744432926177979, "logps/chosen": -76.94874572753906, "logps/rejected": -76.94874572753906, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.981751918792725, "rewards/margins": 0.0, "rewards/rejected": -4.981751918792725, "step": 2286 }, { "epoch": 0.51, "learning_rate": 8.751408630457911e-06, "logits/chosen": -0.9312167167663574, "logits/rejected": -0.975462019443512, "logps/chosen": -185.149169921875, "logps/rejected": -208.96563720703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.06539001315832138, "rewards/margins": 8.618955612182617, "rewards/rejected": -8.684345245361328, "step": 2287 }, { "epoch": 0.51, "learning_rate": 8.750223448670204e-06, "logits/chosen": -0.8987873196601868, "logits/rejected": -0.8830553889274597, "logps/chosen": -137.46176147460938, "logps/rejected": -142.10211181640625, "loss": 0.1717, "rewards/accuracies": 1.0, "rewards/chosen": 0.7019409537315369, "rewards/margins": 0.8935425281524658, "rewards/rejected": -0.19160155951976776, "step": 2288 }, { "epoch": 0.51, "learning_rate": 8.749037784987797e-06, "logits/chosen": -0.8690345883369446, "logits/rejected": -0.8522481918334961, "logps/chosen": -119.73812103271484, "logps/rejected": -125.31230926513672, "loss": 0.0674, "rewards/accuracies": 1.0, "rewards/chosen": -0.8949653506278992, "rewards/margins": 2.0179641246795654, "rewards/rejected": -2.9129295349121094, "step": 2289 }, { "epoch": 0.51, "learning_rate": 8.747851639563048e-06, "logits/chosen": -1.331343173980713, "logits/rejected": -1.3573358058929443, "logps/chosen": -122.0322036743164, "logps/rejected": -135.97537231445312, "loss": 0.0497, "rewards/accuracies": 1.0, "rewards/chosen": -1.3440834283828735, "rewards/margins": 2.272512912750244, "rewards/rejected": -3.616596221923828, "step": 2290 }, { "epoch": 0.51, "learning_rate": 8.746665012548373e-06, "logits/chosen": -1.093762993812561, "logits/rejected": -1.0765604972839355, "logps/chosen": -140.19732666015625, "logps/rejected": -50.373779296875, "loss": 0.5762, "rewards/accuracies": 0.0, "rewards/chosen": -2.9598405361175537, "rewards/margins": -0.7726602554321289, "rewards/rejected": -2.187180280685425, "step": 2291 }, { "epoch": 0.51, "learning_rate": 8.745477904096247e-06, "logits/chosen": -1.2268074750900269, "logits/rejected": -1.2404682636260986, "logps/chosen": -137.06460571289062, "logps/rejected": -182.97264099121094, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": 0.5563705563545227, "rewards/margins": 3.370230197906494, "rewards/rejected": -2.813859701156616, "step": 2292 }, { "epoch": 0.51, "learning_rate": 8.744290314359219e-06, "logits/chosen": -1.2445346117019653, "logits/rejected": -1.206534504890442, "logps/chosen": -92.73074340820312, "logps/rejected": -141.67327880859375, "loss": 0.0719, "rewards/accuracies": 1.0, "rewards/chosen": -1.748024821281433, "rewards/margins": 1.892255425453186, "rewards/rejected": -3.640280246734619, "step": 2293 }, { "epoch": 0.51, "learning_rate": 8.743102243489885e-06, "logits/chosen": -0.7757341861724854, "logits/rejected": -0.7636095285415649, "logps/chosen": -155.68858337402344, "logps/rejected": -138.10391235351562, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 2.09844970703125, "rewards/margins": 7.3833513259887695, "rewards/rejected": -5.2849016189575195, "step": 2294 }, { "epoch": 0.51, "learning_rate": 8.74191369164091e-06, "logits/chosen": -0.8028556704521179, "logits/rejected": -0.8033847808837891, "logps/chosen": -110.0333023071289, "logps/rejected": -151.4002227783203, "loss": 0.679, "rewards/accuracies": 0.0, "rewards/chosen": -0.10934829711914062, "rewards/margins": -1.0054435729980469, "rewards/rejected": 0.8960952758789062, "step": 2295 }, { "epoch": 0.51, "learning_rate": 8.74072465896502e-06, "logits/chosen": -1.2799036502838135, "logits/rejected": -1.246598482131958, "logps/chosen": -92.18551635742188, "logps/rejected": -160.65237426757812, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": 0.92315673828125, "rewards/margins": 6.680963039398193, "rewards/rejected": -5.757806301116943, "step": 2296 }, { "epoch": 0.51, "learning_rate": 8.739535145615005e-06, "logits/chosen": -1.1640894412994385, "logits/rejected": -1.2708269357681274, "logps/chosen": -185.01182556152344, "logps/rejected": -70.59637451171875, "loss": 0.3866, "rewards/accuracies": 1.0, "rewards/chosen": -1.204670786857605, "rewards/margins": 2.4162468910217285, "rewards/rejected": -3.620917558670044, "step": 2297 }, { "epoch": 0.51, "learning_rate": 8.738345151743715e-06, "logits/chosen": -1.1326732635498047, "logits/rejected": -1.2118173837661743, "logps/chosen": -275.3625793457031, "logps/rejected": -97.46827697753906, "loss": 0.4687, "rewards/accuracies": 1.0, "rewards/chosen": 0.481820672750473, "rewards/margins": 5.1524786949157715, "rewards/rejected": -4.670658111572266, "step": 2298 }, { "epoch": 0.51, "learning_rate": 8.737154677504059e-06, "logits/chosen": -1.0591708421707153, "logits/rejected": -1.0591708421707153, "logps/chosen": -41.64436340332031, "logps/rejected": -41.64436340332031, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -1.6041412353515625, "rewards/margins": 0.0, "rewards/rejected": -1.6041412353515625, "step": 2299 }, { "epoch": 0.51, "learning_rate": 8.73596372304901e-06, "logits/chosen": -0.8741565942764282, "logits/rejected": -0.8609160780906677, "logps/chosen": -267.2374572753906, "logps/rejected": -244.19691467285156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.368499755859375, "rewards/margins": 10.13495922088623, "rewards/rejected": -9.766459465026855, "step": 2300 }, { "epoch": 0.51, "learning_rate": 8.734772288531604e-06, "logits/chosen": -1.339777946472168, "logits/rejected": -1.3144068717956543, "logps/chosen": -150.0816650390625, "logps/rejected": -228.75306701660156, "loss": 0.0594, "rewards/accuracies": 1.0, "rewards/chosen": -1.2857818603515625, "rewards/margins": 2.107708692550659, "rewards/rejected": -3.3934905529022217, "step": 2301 }, { "epoch": 0.51, "learning_rate": 8.733580374104936e-06, "logits/chosen": -0.9240483045578003, "logits/rejected": -0.920810878276825, "logps/chosen": -91.88533782958984, "logps/rejected": -94.14950561523438, "loss": 0.0703, "rewards/accuracies": 1.0, "rewards/chosen": -0.4421852231025696, "rewards/margins": 1.9077141284942627, "rewards/rejected": -2.3498992919921875, "step": 2302 }, { "epoch": 0.51, "learning_rate": 8.732387979922167e-06, "logits/chosen": -1.055469036102295, "logits/rejected": -1.0142772197723389, "logps/chosen": -80.18639373779297, "logps/rejected": -105.89190673828125, "loss": 0.085, "rewards/accuracies": 1.0, "rewards/chosen": -0.9965614676475525, "rewards/margins": 1.6899864673614502, "rewards/rejected": -2.6865479946136475, "step": 2303 }, { "epoch": 0.51, "learning_rate": 8.731195106136515e-06, "logits/chosen": -1.1367762088775635, "logits/rejected": -1.0784791707992554, "logps/chosen": -111.22061920166016, "logps/rejected": -170.73989868164062, "loss": 0.0488, "rewards/accuracies": 1.0, "rewards/chosen": -0.37642669677734375, "rewards/margins": 2.4158692359924316, "rewards/rejected": -2.7922959327697754, "step": 2304 }, { "epoch": 0.51, "learning_rate": 8.730001752901258e-06, "logits/chosen": -1.327785611152649, "logits/rejected": -1.3350577354431152, "logps/chosen": -177.8195343017578, "logps/rejected": -244.16839599609375, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": 1.4728683233261108, "rewards/margins": 3.4995040893554688, "rewards/rejected": -2.0266358852386475, "step": 2305 }, { "epoch": 0.51, "learning_rate": 8.728807920369747e-06, "logits/chosen": -1.1590665578842163, "logits/rejected": -1.0853638648986816, "logps/chosen": -232.689453125, "logps/rejected": -192.33270263671875, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": 0.45215150713920593, "rewards/margins": 3.1030335426330566, "rewards/rejected": -2.6508820056915283, "step": 2306 }, { "epoch": 0.51, "learning_rate": 8.727613608695379e-06, "logits/chosen": -1.1468700170516968, "logits/rejected": -1.1706347465515137, "logps/chosen": -118.25965881347656, "logps/rejected": -164.030029296875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.9203209280967712, "rewards/margins": 4.836237907409668, "rewards/rejected": -5.756558895111084, "step": 2307 }, { "epoch": 0.51, "learning_rate": 8.726418818031623e-06, "logits/chosen": -1.264997959136963, "logits/rejected": -1.2617253065109253, "logps/chosen": -113.99594116210938, "logps/rejected": -179.1824493408203, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.48173293471336365, "rewards/margins": 8.717989921569824, "rewards/rejected": -8.23625659942627, "step": 2308 }, { "epoch": 0.51, "learning_rate": 8.72522354853201e-06, "logits/chosen": -0.8020301461219788, "logits/rejected": -0.7908036708831787, "logps/chosen": -90.40350341796875, "logps/rejected": -151.6717529296875, "loss": 0.355, "rewards/accuracies": 1.0, "rewards/chosen": 0.6662773489952087, "rewards/margins": 4.069709777832031, "rewards/rejected": -3.403432607650757, "step": 2309 }, { "epoch": 0.51, "learning_rate": 8.724027800350123e-06, "logits/chosen": -0.7793018817901611, "logits/rejected": -0.4212316572666168, "logps/chosen": -55.1728515625, "logps/rejected": -321.58935546875, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -2.2050769329071045, "rewards/margins": 20.11220359802246, "rewards/rejected": -22.317279815673828, "step": 2310 }, { "epoch": 0.51, "learning_rate": 8.722831573639618e-06, "logits/chosen": -1.1056547164916992, "logits/rejected": -1.0786190032958984, "logps/chosen": -85.11074829101562, "logps/rejected": -221.93167114257812, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.5438438653945923, "rewards/margins": 6.140589237213135, "rewards/rejected": -6.6844329833984375, "step": 2311 }, { "epoch": 0.51, "learning_rate": 8.721634868554204e-06, "logits/chosen": -0.8913766145706177, "logits/rejected": -0.9132147431373596, "logps/chosen": -135.60885620117188, "logps/rejected": -158.8927764892578, "loss": 0.0673, "rewards/accuracies": 1.0, "rewards/chosen": -0.7665908932685852, "rewards/margins": 1.9926819801330566, "rewards/rejected": -2.759272813796997, "step": 2312 }, { "epoch": 0.51, "learning_rate": 8.720437685247657e-06, "logits/chosen": -1.3902162313461304, "logits/rejected": -1.4175786972045898, "logps/chosen": -244.01535034179688, "logps/rejected": -183.05270385742188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.11717529594898224, "rewards/margins": 6.182889461517334, "rewards/rejected": -6.065714359283447, "step": 2313 }, { "epoch": 0.51, "learning_rate": 8.719240023873809e-06, "logits/chosen": -1.0204607248306274, "logits/rejected": -0.5133060812950134, "logps/chosen": -109.36111450195312, "logps/rejected": -467.49554443359375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.8133697509765625, "rewards/margins": 23.644840240478516, "rewards/rejected": -24.458209991455078, "step": 2314 }, { "epoch": 0.51, "learning_rate": 8.71804188458656e-06, "logits/chosen": -0.9768987894058228, "logits/rejected": -0.9472089409828186, "logps/chosen": -69.61968231201172, "logps/rejected": -132.25253295898438, "loss": 0.5632, "rewards/accuracies": 1.0, "rewards/chosen": -1.0011364221572876, "rewards/margins": 2.693202495574951, "rewards/rejected": -3.6943390369415283, "step": 2315 }, { "epoch": 0.51, "learning_rate": 8.716843267539868e-06, "logits/chosen": -0.8235899806022644, "logits/rejected": -0.8110122680664062, "logps/chosen": -64.28597259521484, "logps/rejected": -68.8061752319336, "loss": 0.3059, "rewards/accuracies": 1.0, "rewards/chosen": -1.1543327569961548, "rewards/margins": 0.17153775691986084, "rewards/rejected": -1.3258705139160156, "step": 2316 }, { "epoch": 0.51, "learning_rate": 8.715644172887751e-06, "logits/chosen": -0.899674654006958, "logits/rejected": -0.8649895787239075, "logps/chosen": -75.39948272705078, "logps/rejected": -70.23678588867188, "loss": 0.0513, "rewards/accuracies": 1.0, "rewards/chosen": -0.9810013175010681, "rewards/margins": 2.2394843101501465, "rewards/rejected": -3.2204856872558594, "step": 2317 }, { "epoch": 0.51, "learning_rate": 8.714444600784289e-06, "logits/chosen": -1.1006357669830322, "logits/rejected": -1.1254287958145142, "logps/chosen": -72.94257354736328, "logps/rejected": -51.9700927734375, "loss": 0.2244, "rewards/accuracies": 1.0, "rewards/chosen": -1.8863896131515503, "rewards/margins": 1.0156055688858032, "rewards/rejected": -2.9019951820373535, "step": 2318 }, { "epoch": 0.51, "learning_rate": 8.713244551383626e-06, "logits/chosen": -1.1010773181915283, "logits/rejected": -1.087446928024292, "logps/chosen": -65.68571472167969, "logps/rejected": -55.40445327758789, "loss": 0.4424, "rewards/accuracies": 1.0, "rewards/chosen": -1.0430443286895752, "rewards/margins": 0.01377105712890625, "rewards/rejected": -1.0568153858184814, "step": 2319 }, { "epoch": 0.51, "learning_rate": 8.712044024839962e-06, "logits/chosen": -0.8534281253814697, "logits/rejected": -0.8534281253814697, "logps/chosen": -158.39881896972656, "logps/rejected": -158.39881896972656, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -5.676825046539307, "rewards/margins": 0.0, "rewards/rejected": -5.676825046539307, "step": 2320 }, { "epoch": 0.51, "learning_rate": 8.710843021307567e-06, "logits/chosen": -0.8511500954627991, "logits/rejected": -0.44810208678245544, "logps/chosen": -183.50033569335938, "logps/rejected": -552.1702880859375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.2073853015899658, "rewards/margins": 17.36029052734375, "rewards/rejected": -18.567676544189453, "step": 2321 }, { "epoch": 0.51, "learning_rate": 8.709641540940764e-06, "logits/chosen": -1.2369087934494019, "logits/rejected": -1.3775943517684937, "logps/chosen": -179.13516235351562, "logps/rejected": -130.5255889892578, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": 1.3972686529159546, "rewards/margins": 4.6273193359375, "rewards/rejected": -3.230050802230835, "step": 2322 }, { "epoch": 0.51, "learning_rate": 8.70843958389394e-06, "logits/chosen": -0.7876622080802917, "logits/rejected": -0.7148398160934448, "logps/chosen": -108.1329116821289, "logps/rejected": -526.4678955078125, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -3.0116546154022217, "rewards/margins": 42.896026611328125, "rewards/rejected": -45.90768051147461, "step": 2323 }, { "epoch": 0.51, "learning_rate": 8.707237150321544e-06, "logits/chosen": -1.0383238792419434, "logits/rejected": -1.0257103443145752, "logps/chosen": -119.24079895019531, "logps/rejected": -108.52035522460938, "loss": 0.0536, "rewards/accuracies": 1.0, "rewards/chosen": -0.016628265380859375, "rewards/margins": 2.234255313873291, "rewards/rejected": -2.2508835792541504, "step": 2324 }, { "epoch": 0.51, "learning_rate": 8.706034240378087e-06, "logits/chosen": -1.2348359823226929, "logits/rejected": -1.199055552482605, "logps/chosen": -100.01569366455078, "logps/rejected": -84.78323364257812, "loss": 0.283, "rewards/accuracies": 1.0, "rewards/chosen": 0.9404441714286804, "rewards/margins": 3.163235664367676, "rewards/rejected": -2.2227914333343506, "step": 2325 }, { "epoch": 0.51, "learning_rate": 8.704830854218138e-06, "logits/chosen": -1.0294631719589233, "logits/rejected": -0.9735859632492065, "logps/chosen": -92.7666015625, "logps/rejected": -238.38238525390625, "loss": 0.1967, "rewards/accuracies": 1.0, "rewards/chosen": -0.10248260945081711, "rewards/margins": 0.7320343255996704, "rewards/rejected": -0.8345169425010681, "step": 2326 }, { "epoch": 0.52, "learning_rate": 8.703626991996333e-06, "logits/chosen": -0.8897609114646912, "logits/rejected": -0.9176815152168274, "logps/chosen": -188.54437255859375, "logps/rejected": -87.83241271972656, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/chosen": 1.678155541419983, "rewards/margins": 2.8848929405212402, "rewards/rejected": -1.2067375183105469, "step": 2327 }, { "epoch": 0.52, "learning_rate": 8.70242265386736e-06, "logits/chosen": -1.086737036705017, "logits/rejected": -1.0376144647598267, "logps/chosen": -210.22427368164062, "logps/rejected": -366.7825927734375, "loss": 1.0978, "rewards/accuracies": 0.0, "rewards/chosen": -0.21250610053539276, "rewards/margins": -2.0773561000823975, "rewards/rejected": 1.8648499250411987, "step": 2328 }, { "epoch": 0.52, "learning_rate": 8.701217839985978e-06, "logits/chosen": -0.8909323811531067, "logits/rejected": -0.8998079299926758, "logps/chosen": -95.02405548095703, "logps/rejected": -129.14877319335938, "loss": 0.1014, "rewards/accuracies": 1.0, "rewards/chosen": -3.0662286281585693, "rewards/margins": 1.5260260105133057, "rewards/rejected": -4.592254638671875, "step": 2329 }, { "epoch": 0.52, "learning_rate": 8.700012550507e-06, "logits/chosen": -1.1692969799041748, "logits/rejected": -1.17471182346344, "logps/chosen": -235.57522583007812, "logps/rejected": -407.51885986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2569931745529175, "rewards/margins": 12.013310432434082, "rewards/rejected": -13.270303726196289, "step": 2330 }, { "epoch": 0.52, "learning_rate": 8.698806785585305e-06, "logits/chosen": -1.3165616989135742, "logits/rejected": -1.2950820922851562, "logps/chosen": -147.95535278320312, "logps/rejected": -182.17977905273438, "loss": 2.4361, "rewards/accuracies": 0.0, "rewards/chosen": -3.559809923171997, "rewards/margins": -4.4927520751953125, "rewards/rejected": 0.9329422116279602, "step": 2331 }, { "epoch": 0.52, "learning_rate": 8.697600545375829e-06, "logits/chosen": -1.2084219455718994, "logits/rejected": -1.1896800994873047, "logps/chosen": -44.083763122558594, "logps/rejected": -54.82249450683594, "loss": 0.6191, "rewards/accuracies": 0.0, "rewards/chosen": -2.148264169692993, "rewards/margins": -0.8061544895172119, "rewards/rejected": -1.3421096801757812, "step": 2332 }, { "epoch": 0.52, "learning_rate": 8.696393830033571e-06, "logits/chosen": -1.3533118963241577, "logits/rejected": -1.3468761444091797, "logps/chosen": -112.12989807128906, "logps/rejected": -108.73548126220703, "loss": 0.1328, "rewards/accuracies": 1.0, "rewards/chosen": -1.9626740217208862, "rewards/margins": 1.2338463068008423, "rewards/rejected": -3.1965203285217285, "step": 2333 }, { "epoch": 0.52, "learning_rate": 8.695186639713593e-06, "logits/chosen": -1.1799776554107666, "logits/rejected": -1.1916264295578003, "logps/chosen": -150.85910034179688, "logps/rejected": -133.891357421875, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 1.288049340248108, "rewards/margins": 3.659689426422119, "rewards/rejected": -2.3716399669647217, "step": 2334 }, { "epoch": 0.52, "learning_rate": 8.693978974571013e-06, "logits/chosen": -1.1546136140823364, "logits/rejected": -1.1313273906707764, "logps/chosen": -64.56378173828125, "logps/rejected": -101.49200439453125, "loss": 0.567, "rewards/accuracies": 1.0, "rewards/chosen": -0.4231460690498352, "rewards/margins": 2.90110182762146, "rewards/rejected": -3.3242478370666504, "step": 2335 }, { "epoch": 0.52, "learning_rate": 8.692770834761017e-06, "logits/chosen": -1.05684494972229, "logits/rejected": -1.0692031383514404, "logps/chosen": -208.25064086914062, "logps/rejected": -140.3352508544922, "loss": 1.0041, "rewards/accuracies": 0.0, "rewards/chosen": -6.513858318328857, "rewards/margins": -0.2735004425048828, "rewards/rejected": -6.240357875823975, "step": 2336 }, { "epoch": 0.52, "learning_rate": 8.691562220438845e-06, "logits/chosen": -0.9872081875801086, "logits/rejected": -0.9922647476196289, "logps/chosen": -67.4435806274414, "logps/rejected": -70.93688201904297, "loss": 0.4218, "rewards/accuracies": 0.0, "rewards/chosen": -1.9835853576660156, "rewards/margins": -0.2799251079559326, "rewards/rejected": -1.703660249710083, "step": 2337 }, { "epoch": 0.52, "learning_rate": 8.690353131759802e-06, "logits/chosen": -0.9460452198982239, "logits/rejected": -0.9342738389968872, "logps/chosen": -158.90554809570312, "logps/rejected": -81.10616302490234, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 1.070002794265747, "rewards/margins": 4.9983415603637695, "rewards/rejected": -3.9283390045166016, "step": 2338 }, { "epoch": 0.52, "learning_rate": 8.689143568879252e-06, "logits/chosen": -1.1787662506103516, "logits/rejected": -1.0883346796035767, "logps/chosen": -144.208251953125, "logps/rejected": -292.9070739746094, "loss": 1.3466, "rewards/accuracies": 1.0, "rewards/chosen": 1.7165939807891846, "rewards/margins": 8.344929695129395, "rewards/rejected": -6.628335475921631, "step": 2339 }, { "epoch": 0.52, "learning_rate": 8.687933531952624e-06, "logits/chosen": -0.8122148513793945, "logits/rejected": -0.8279527425765991, "logps/chosen": -272.4601745605469, "logps/rejected": -188.10272216796875, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -0.30999755859375, "rewards/margins": 3.5593369007110596, "rewards/rejected": -3.8693344593048096, "step": 2340 }, { "epoch": 0.52, "learning_rate": 8.686723021135402e-06, "logits/chosen": -1.004002571105957, "logits/rejected": -0.5908324718475342, "logps/chosen": -101.6795425415039, "logps/rejected": -542.72119140625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 1.1019691228866577, "rewards/margins": 37.24867248535156, "rewards/rejected": -36.14670181274414, "step": 2341 }, { "epoch": 0.52, "learning_rate": 8.685512036583132e-06, "logits/chosen": -0.9188616871833801, "logits/rejected": -0.9608725309371948, "logps/chosen": -84.22964477539062, "logps/rejected": -53.10095977783203, "loss": 0.4375, "rewards/accuracies": 0.0, "rewards/chosen": -2.5571541786193848, "rewards/margins": -0.33557868003845215, "rewards/rejected": -2.2215754985809326, "step": 2342 }, { "epoch": 0.52, "learning_rate": 8.684300578451428e-06, "logits/chosen": -0.8355237245559692, "logits/rejected": -0.8803823590278625, "logps/chosen": -95.44114685058594, "logps/rejected": -147.31675720214844, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": 0.5618835687637329, "rewards/margins": 3.6353759765625, "rewards/rejected": -3.0734925270080566, "step": 2343 }, { "epoch": 0.52, "learning_rate": 8.683088646895955e-06, "logits/chosen": -1.1491730213165283, "logits/rejected": -1.0580699443817139, "logps/chosen": -151.37860107421875, "logps/rejected": -175.00274658203125, "loss": 0.1056, "rewards/accuracies": 1.0, "rewards/chosen": 0.32102662324905396, "rewards/margins": 1.4472947120666504, "rewards/rejected": -1.1262680292129517, "step": 2344 }, { "epoch": 0.52, "learning_rate": 8.681876242072445e-06, "logits/chosen": -0.8742769360542297, "logits/rejected": -0.6075637936592102, "logps/chosen": -262.636474609375, "logps/rejected": -265.61785888671875, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -0.04543457180261612, "rewards/margins": 12.686508178710938, "rewards/rejected": -12.731943130493164, "step": 2345 }, { "epoch": 0.52, "learning_rate": 8.68066336413669e-06, "logits/chosen": -1.1532379388809204, "logits/rejected": -1.1749534606933594, "logps/chosen": -107.81034851074219, "logps/rejected": -56.96064376831055, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": 0.3385719358921051, "rewards/margins": 3.8245644569396973, "rewards/rejected": -3.485992431640625, "step": 2346 }, { "epoch": 0.52, "learning_rate": 8.67945001324454e-06, "logits/chosen": -0.8284748196601868, "logits/rejected": -0.8280697464942932, "logps/chosen": -124.17196655273438, "logps/rejected": -74.37130737304688, "loss": 0.3528, "rewards/accuracies": 1.0, "rewards/chosen": -0.32014694809913635, "rewards/margins": 0.015621185302734375, "rewards/rejected": -0.3357681334018707, "step": 2347 }, { "epoch": 0.52, "learning_rate": 8.678236189551907e-06, "logits/chosen": -0.7095308899879456, "logits/rejected": -0.658035933971405, "logps/chosen": -76.19636535644531, "logps/rejected": -184.79623413085938, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.2141388654708862, "rewards/margins": 5.6489338874816895, "rewards/rejected": -6.863072872161865, "step": 2348 }, { "epoch": 0.52, "learning_rate": 8.677021893214768e-06, "logits/chosen": -0.9256380796432495, "logits/rejected": -0.914645791053772, "logps/chosen": -79.9253921508789, "logps/rejected": -119.80801391601562, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": -1.3336037397384644, "rewards/margins": 3.0728402137756348, "rewards/rejected": -4.406444072723389, "step": 2349 }, { "epoch": 0.52, "learning_rate": 8.675807124389153e-06, "logits/chosen": -1.1226520538330078, "logits/rejected": -0.7492058873176575, "logps/chosen": -111.78346252441406, "logps/rejected": -455.0960693359375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.5010010004043579, "rewards/margins": 33.14265823364258, "rewards/rejected": -32.641658782958984, "step": 2350 }, { "epoch": 0.52, "learning_rate": 8.67459188323116e-06, "logits/chosen": -1.2731003761291504, "logits/rejected": -1.2203105688095093, "logps/chosen": -132.50653076171875, "logps/rejected": -205.29757690429688, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.2631378173828125, "rewards/margins": 5.412884712219238, "rewards/rejected": -5.676022529602051, "step": 2351 }, { "epoch": 0.52, "learning_rate": 8.673376169896944e-06, "logits/chosen": -1.1082037687301636, "logits/rejected": -1.1082037687301636, "logps/chosen": -187.105224609375, "logps/rejected": -187.105224609375, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -6.470452308654785, "rewards/margins": 0.0, "rewards/rejected": -6.470452308654785, "step": 2352 }, { "epoch": 0.52, "learning_rate": 8.672159984542721e-06, "logits/chosen": -0.8438760042190552, "logits/rejected": -0.8463222980499268, "logps/chosen": -105.72634887695312, "logps/rejected": -110.940673828125, "loss": 0.5346, "rewards/accuracies": 0.0, "rewards/chosen": -2.8361268043518066, "rewards/margins": -0.6410644054412842, "rewards/rejected": -2.1950623989105225, "step": 2353 }, { "epoch": 0.52, "learning_rate": 8.670943327324767e-06, "logits/chosen": -0.9122311472892761, "logits/rejected": -0.8993138074874878, "logps/chosen": -184.65301513671875, "logps/rejected": -77.07778930664062, "loss": 0.2108, "rewards/accuracies": 1.0, "rewards/chosen": -2.958120822906494, "rewards/margins": 0.6676065921783447, "rewards/rejected": -3.625727415084839, "step": 2354 }, { "epoch": 0.52, "learning_rate": 8.66972619839942e-06, "logits/chosen": -1.0290217399597168, "logits/rejected": -1.0341922044754028, "logps/chosen": -74.14335632324219, "logps/rejected": -52.048072814941406, "loss": 0.0545, "rewards/accuracies": 1.0, "rewards/chosen": -0.8372581601142883, "rewards/margins": 2.373934745788574, "rewards/rejected": -3.2111928462982178, "step": 2355 }, { "epoch": 0.52, "learning_rate": 8.668508597923077e-06, "logits/chosen": -1.0927612781524658, "logits/rejected": -1.0133652687072754, "logps/chosen": -89.72823333740234, "logps/rejected": -185.68231201171875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": 0.28067323565483093, "rewards/margins": 7.722270965576172, "rewards/rejected": -7.441597938537598, "step": 2356 }, { "epoch": 0.52, "learning_rate": 8.6672905260522e-06, "logits/chosen": -0.9366231560707092, "logits/rejected": -0.9256092309951782, "logps/chosen": -227.3480224609375, "logps/rejected": -292.98345947265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 2.330021619796753, "rewards/margins": 6.663846015930176, "rewards/rejected": -4.333824157714844, "step": 2357 }, { "epoch": 0.52, "learning_rate": 8.666071982943306e-06, "logits/chosen": -0.9601565003395081, "logits/rejected": -0.9447962045669556, "logps/chosen": -82.88069152832031, "logps/rejected": -75.7437515258789, "loss": 0.1083, "rewards/accuracies": 1.0, "rewards/chosen": -0.21819305419921875, "rewards/margins": 2.0524826049804688, "rewards/rejected": -2.2706756591796875, "step": 2358 }, { "epoch": 0.52, "learning_rate": 8.664852968752975e-06, "logits/chosen": -0.8420873284339905, "logits/rejected": -0.841926634311676, "logps/chosen": -290.6655578613281, "logps/rejected": -145.67941284179688, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/chosen": 1.6293976306915283, "rewards/margins": 6.9260358810424805, "rewards/rejected": -5.296638488769531, "step": 2359 }, { "epoch": 0.52, "learning_rate": 8.663633483637847e-06, "logits/chosen": -1.2299599647521973, "logits/rejected": -1.183652639389038, "logps/chosen": -102.78065490722656, "logps/rejected": -217.9583740234375, "loss": 0.5635, "rewards/accuracies": 0.0, "rewards/chosen": -0.9509124755859375, "rewards/margins": -0.7169952392578125, "rewards/rejected": -0.233917236328125, "step": 2360 }, { "epoch": 0.52, "learning_rate": 8.662413527754624e-06, "logits/chosen": -1.138782262802124, "logits/rejected": -1.1643210649490356, "logps/chosen": -142.6605987548828, "logps/rejected": -150.196533203125, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": 3.437269687652588, "rewards/margins": 9.086164474487305, "rewards/rejected": -5.648895263671875, "step": 2361 }, { "epoch": 0.52, "learning_rate": 8.661193101260067e-06, "logits/chosen": -0.7706160545349121, "logits/rejected": -0.7854061126708984, "logps/chosen": -221.89532470703125, "logps/rejected": -161.47837829589844, "loss": 0.0489, "rewards/accuracies": 1.0, "rewards/chosen": 0.09416504204273224, "rewards/margins": 3.346942186355591, "rewards/rejected": -3.252777099609375, "step": 2362 }, { "epoch": 0.52, "learning_rate": 8.659972204310998e-06, "logits/chosen": -0.9108432531356812, "logits/rejected": -0.9253827333450317, "logps/chosen": -87.47733306884766, "logps/rejected": -141.3205108642578, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -0.9377037286758423, "rewards/margins": 3.6552934646606445, "rewards/rejected": -4.592997074127197, "step": 2363 }, { "epoch": 0.52, "learning_rate": 8.658750837064299e-06, "logits/chosen": -0.9257111549377441, "logits/rejected": -0.9149910807609558, "logps/chosen": -108.52456665039062, "logps/rejected": -128.25350952148438, "loss": 0.0596, "rewards/accuracies": 1.0, "rewards/chosen": -0.478790283203125, "rewards/margins": 2.755098819732666, "rewards/rejected": -3.233889102935791, "step": 2364 }, { "epoch": 0.52, "learning_rate": 8.657528999676912e-06, "logits/chosen": -0.9017645120620728, "logits/rejected": -0.883191704750061, "logps/chosen": -82.42594146728516, "logps/rejected": -144.9439697265625, "loss": 0.1177, "rewards/accuracies": 1.0, "rewards/chosen": -0.5093345642089844, "rewards/margins": 1.8778076171875, "rewards/rejected": -2.3871421813964844, "step": 2365 }, { "epoch": 0.52, "learning_rate": 8.65630669230584e-06, "logits/chosen": -1.0279637575149536, "logits/rejected": -1.030423879623413, "logps/chosen": -124.23243713378906, "logps/rejected": -114.95767211914062, "loss": 0.1758, "rewards/accuracies": 1.0, "rewards/chosen": -2.4543075561523438, "rewards/margins": 0.868539571762085, "rewards/rejected": -3.3228471279144287, "step": 2366 }, { "epoch": 0.52, "learning_rate": 8.65508391510815e-06, "logits/chosen": -0.8912652730941772, "logits/rejected": -0.8975019454956055, "logps/chosen": -136.73574829101562, "logps/rejected": -110.918212890625, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": 1.4804718494415283, "rewards/margins": 5.28457498550415, "rewards/rejected": -3.804103136062622, "step": 2367 }, { "epoch": 0.52, "learning_rate": 8.653860668240963e-06, "logits/chosen": -1.1266891956329346, "logits/rejected": -1.1044111251831055, "logps/chosen": -78.60363006591797, "logps/rejected": -137.14231872558594, "loss": 0.397, "rewards/accuracies": 0.0, "rewards/chosen": -1.8469024896621704, "rewards/margins": -0.16837310791015625, "rewards/rejected": -1.6785293817520142, "step": 2368 }, { "epoch": 0.52, "learning_rate": 8.652636951861463e-06, "logits/chosen": -1.1528369188308716, "logits/rejected": -1.1434890031814575, "logps/chosen": -79.20246124267578, "logps/rejected": -85.44095611572266, "loss": 0.0713, "rewards/accuracies": 1.0, "rewards/chosen": 0.13134078681468964, "rewards/margins": 1.955130696296692, "rewards/rejected": -1.8237899541854858, "step": 2369 }, { "epoch": 0.52, "learning_rate": 8.651412766126896e-06, "logits/chosen": -1.2492560148239136, "logits/rejected": -1.2498242855072021, "logps/chosen": -180.22767639160156, "logps/rejected": -126.81287384033203, "loss": 0.1446, "rewards/accuracies": 1.0, "rewards/chosen": -1.0258530378341675, "rewards/margins": 1.3051894903182983, "rewards/rejected": -2.331042528152466, "step": 2370 }, { "epoch": 0.52, "learning_rate": 8.650188111194565e-06, "logits/chosen": -1.1371010541915894, "logits/rejected": -1.2661799192428589, "logps/chosen": -219.799072265625, "logps/rejected": -91.33171844482422, "loss": 0.0804, "rewards/accuracies": 1.0, "rewards/chosen": 0.5698150992393494, "rewards/margins": 1.8735313415527344, "rewards/rejected": -1.3037163019180298, "step": 2371 }, { "epoch": 0.53, "learning_rate": 8.648962987221837e-06, "logits/chosen": -0.7748421430587769, "logits/rejected": -0.8044822812080383, "logps/chosen": -183.20343017578125, "logps/rejected": -188.91416931152344, "loss": 0.4583, "rewards/accuracies": 1.0, "rewards/chosen": 1.3423432111740112, "rewards/margins": 1.307917833328247, "rewards/rejected": 0.03442535549402237, "step": 2372 }, { "epoch": 0.53, "learning_rate": 8.647737394366138e-06, "logits/chosen": -0.9802128672599792, "logits/rejected": -0.9796620607376099, "logps/chosen": -186.25189208984375, "logps/rejected": -148.60914611816406, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 1.423376441001892, "rewards/margins": 4.3279008865356445, "rewards/rejected": -2.904524326324463, "step": 2373 }, { "epoch": 0.53, "learning_rate": 8.646511332784953e-06, "logits/chosen": -1.0875298976898193, "logits/rejected": -1.0948212146759033, "logps/chosen": -103.890625, "logps/rejected": -105.74745178222656, "loss": 0.3836, "rewards/accuracies": 0.0, "rewards/chosen": -0.9525443911552429, "rewards/margins": -0.1426239013671875, "rewards/rejected": -0.8099204897880554, "step": 2374 }, { "epoch": 0.53, "learning_rate": 8.645284802635827e-06, "logits/chosen": -1.18871009349823, "logits/rejected": -1.1805716753005981, "logps/chosen": -119.90724182128906, "logps/rejected": -103.95466613769531, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": 0.3724838197231293, "rewards/margins": 3.096466064453125, "rewards/rejected": -2.723982334136963, "step": 2375 }, { "epoch": 0.53, "learning_rate": 8.644057804076367e-06, "logits/chosen": -0.9366753101348877, "logits/rejected": -0.948706865310669, "logps/chosen": -168.74118041992188, "logps/rejected": -83.03837585449219, "loss": 0.4618, "rewards/accuracies": 0.0, "rewards/chosen": -2.3067002296447754, "rewards/margins": -0.41660165786743164, "rewards/rejected": -1.8900985717773438, "step": 2376 }, { "epoch": 0.53, "learning_rate": 8.642830337264239e-06, "logits/chosen": -0.9663749933242798, "logits/rejected": -0.9116693139076233, "logps/chosen": -84.94296264648438, "logps/rejected": -65.20832061767578, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": 0.4251693785190582, "rewards/margins": 4.289278030395508, "rewards/rejected": -3.8641088008880615, "step": 2377 }, { "epoch": 0.53, "learning_rate": 8.641602402357168e-06, "logits/chosen": -0.9329606294631958, "logits/rejected": -0.9036335349082947, "logps/chosen": -148.97659301757812, "logps/rejected": -265.2847900390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.8387512564659119, "rewards/margins": 8.998896598815918, "rewards/rejected": -8.16014575958252, "step": 2378 }, { "epoch": 0.53, "learning_rate": 8.640373999512946e-06, "logits/chosen": -0.8801233768463135, "logits/rejected": -0.8776159286499023, "logps/chosen": -92.67555236816406, "logps/rejected": -126.22978973388672, "loss": 0.0446, "rewards/accuracies": 1.0, "rewards/chosen": 1.0792999267578125, "rewards/margins": 5.281487464904785, "rewards/rejected": -4.202187538146973, "step": 2379 }, { "epoch": 0.53, "learning_rate": 8.639145128889415e-06, "logits/chosen": -0.9316551089286804, "logits/rejected": -0.5571624636650085, "logps/chosen": -139.9491729736328, "logps/rejected": -681.4273071289062, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 1.2166458368301392, "rewards/margins": 49.538604736328125, "rewards/rejected": -48.32196044921875, "step": 2380 }, { "epoch": 0.53, "learning_rate": 8.637915790644482e-06, "logits/chosen": -1.206066370010376, "logits/rejected": -1.2650913000106812, "logps/chosen": -266.2428894042969, "logps/rejected": -109.77644348144531, "loss": 0.0429, "rewards/accuracies": 1.0, "rewards/chosen": 1.887243628501892, "rewards/margins": 4.638001918792725, "rewards/rejected": -2.750758409500122, "step": 2381 }, { "epoch": 0.53, "learning_rate": 8.636685984936115e-06, "logits/chosen": -0.8398855924606323, "logits/rejected": -0.8253188133239746, "logps/chosen": -173.33987426757812, "logps/rejected": -160.1971435546875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 2.9118804931640625, "rewards/margins": 8.43856430053711, "rewards/rejected": -5.526683330535889, "step": 2382 }, { "epoch": 0.53, "learning_rate": 8.635455711922343e-06, "logits/chosen": -0.8828617334365845, "logits/rejected": -0.8690106272697449, "logps/chosen": -131.5974884033203, "logps/rejected": -106.08058166503906, "loss": 0.0745, "rewards/accuracies": 1.0, "rewards/chosen": 1.5984222888946533, "rewards/margins": 1.831298828125, "rewards/rejected": -0.23287658393383026, "step": 2383 }, { "epoch": 0.53, "learning_rate": 8.634224971761251e-06, "logits/chosen": -1.0223684310913086, "logits/rejected": -1.0394948720932007, "logps/chosen": -86.65156555175781, "logps/rejected": -170.60862731933594, "loss": 0.3477, "rewards/accuracies": 1.0, "rewards/chosen": 1.6377280950546265, "rewards/margins": 6.085615634918213, "rewards/rejected": -4.447887420654297, "step": 2384 }, { "epoch": 0.53, "learning_rate": 8.632993764610986e-06, "logits/chosen": -0.7224667072296143, "logits/rejected": -0.45595911145210266, "logps/chosen": -226.9744415283203, "logps/rejected": -344.019287109375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.3950760066509247, "rewards/margins": 22.376361846923828, "rewards/rejected": -22.771438598632812, "step": 2385 }, { "epoch": 0.53, "learning_rate": 8.631762090629756e-06, "logits/chosen": -0.858877956867218, "logits/rejected": -0.8843910098075867, "logps/chosen": -166.7564697265625, "logps/rejected": -56.524505615234375, "loss": 1.9607, "rewards/accuracies": 0.0, "rewards/chosen": -3.4031479358673096, "rewards/margins": -0.8112912178039551, "rewards/rejected": -2.5918567180633545, "step": 2386 }, { "epoch": 0.53, "learning_rate": 8.630529949975828e-06, "logits/chosen": -0.7072466611862183, "logits/rejected": -0.8181447386741638, "logps/chosen": -176.00955200195312, "logps/rejected": -53.7069206237793, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 2.40283203125, "rewards/margins": 5.912961959838867, "rewards/rejected": -3.510129690170288, "step": 2387 }, { "epoch": 0.53, "learning_rate": 8.629297342807528e-06, "logits/chosen": -0.790614902973175, "logits/rejected": -0.7305377721786499, "logps/chosen": -160.07823181152344, "logps/rejected": -171.24148559570312, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": 2.3438949584960938, "rewards/margins": 5.247686862945557, "rewards/rejected": -2.903791904449463, "step": 2388 }, { "epoch": 0.53, "learning_rate": 8.628064269283246e-06, "logits/chosen": -0.6544767022132874, "logits/rejected": -0.6032353639602661, "logps/chosen": -202.5590057373047, "logps/rejected": -688.6666259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.254237413406372, "rewards/margins": 50.75248718261719, "rewards/rejected": -54.0067253112793, "step": 2389 }, { "epoch": 0.53, "learning_rate": 8.626830729561426e-06, "logits/chosen": -0.8151291608810425, "logits/rejected": -0.897485077381134, "logps/chosen": -261.82452392578125, "logps/rejected": -167.521484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 2.096112012863159, "rewards/margins": 6.023266792297363, "rewards/rejected": -3.927154541015625, "step": 2390 }, { "epoch": 0.53, "learning_rate": 8.625596723800575e-06, "logits/chosen": -1.1173608303070068, "logits/rejected": -0.618481457233429, "logps/chosen": -193.5281982421875, "logps/rejected": -715.2509155273438, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -2.0452423095703125, "rewards/margins": 56.817386627197266, "rewards/rejected": -58.86262893676758, "step": 2391 }, { "epoch": 0.53, "learning_rate": 8.624362252159262e-06, "logits/chosen": -1.4184919595718384, "logits/rejected": -1.4375535249710083, "logps/chosen": -121.47232055664062, "logps/rejected": -139.4397735595703, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 1.1351302862167358, "rewards/margins": 3.9296646118164062, "rewards/rejected": -2.79453444480896, "step": 2392 }, { "epoch": 0.53, "learning_rate": 8.623127314796111e-06, "logits/chosen": -1.2822185754776, "logits/rejected": -1.2539455890655518, "logps/chosen": -107.61747741699219, "logps/rejected": -57.3204345703125, "loss": 0.0655, "rewards/accuracies": 1.0, "rewards/chosen": -0.9674659967422485, "rewards/margins": 1.979209065437317, "rewards/rejected": -2.9466750621795654, "step": 2393 }, { "epoch": 0.53, "learning_rate": 8.621891911869811e-06, "logits/chosen": -0.9007457494735718, "logits/rejected": -0.6971325278282166, "logps/chosen": -129.83639526367188, "logps/rejected": -547.7815551757812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.311325788497925, "rewards/margins": 22.45450210571289, "rewards/rejected": -25.765827178955078, "step": 2394 }, { "epoch": 0.53, "learning_rate": 8.620656043539106e-06, "logits/chosen": -1.1693698167800903, "logits/rejected": -1.145826816558838, "logps/chosen": -112.12701416015625, "logps/rejected": -140.18768310546875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 1.1038764715194702, "rewards/margins": 5.389543533325195, "rewards/rejected": -4.2856669425964355, "step": 2395 }, { "epoch": 0.53, "learning_rate": 8.619419709962804e-06, "logits/chosen": -0.8133704662322998, "logits/rejected": -0.7894793152809143, "logps/chosen": -93.75155639648438, "logps/rejected": -206.2813720703125, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": 0.054840087890625, "rewards/margins": 3.6587753295898438, "rewards/rejected": -3.6039352416992188, "step": 2396 }, { "epoch": 0.53, "learning_rate": 8.61818291129977e-06, "logits/chosen": -0.7966776490211487, "logits/rejected": -0.8009090423583984, "logps/chosen": -76.79310607910156, "logps/rejected": -121.10771179199219, "loss": 0.2411, "rewards/accuracies": 1.0, "rewards/chosen": 0.9338394403457642, "rewards/margins": 0.5446532964706421, "rewards/rejected": 0.3891861140727997, "step": 2397 }, { "epoch": 0.53, "learning_rate": 8.61694564770893e-06, "logits/chosen": -1.1915919780731201, "logits/rejected": -1.2150421142578125, "logps/chosen": -183.02236938476562, "logps/rejected": -186.17340087890625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.304251104593277, "rewards/margins": 5.484012126922607, "rewards/rejected": -5.179760932922363, "step": 2398 }, { "epoch": 0.53, "learning_rate": 8.61570791934927e-06, "logits/chosen": -1.1953188180923462, "logits/rejected": -1.2252920866012573, "logps/chosen": -172.3750762939453, "logps/rejected": -66.88436889648438, "loss": 0.0935, "rewards/accuracies": 1.0, "rewards/chosen": 2.858442783355713, "rewards/margins": 2.240264892578125, "rewards/rejected": 0.6181778311729431, "step": 2399 }, { "epoch": 0.53, "learning_rate": 8.614469726379833e-06, "logits/chosen": -0.9982643723487854, "logits/rejected": -1.0040475130081177, "logps/chosen": -176.77574157714844, "logps/rejected": -156.5275421142578, "loss": 0.0504, "rewards/accuracies": 1.0, "rewards/chosen": 2.4676437377929688, "rewards/margins": 2.2447280883789062, "rewards/rejected": 0.2229156494140625, "step": 2400 }, { "epoch": 0.53, "learning_rate": 8.613231068959726e-06, "logits/chosen": -1.0807801485061646, "logits/rejected": -0.9640422463417053, "logps/chosen": -206.05889892578125, "logps/rejected": -330.6947021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.4510133266448975, "rewards/margins": 17.302486419677734, "rewards/rejected": -14.851472854614258, "step": 2401 }, { "epoch": 0.53, "learning_rate": 8.61199194724811e-06, "logits/chosen": -1.0430511236190796, "logits/rejected": -1.0186152458190918, "logps/chosen": -125.96034240722656, "logps/rejected": -97.71653747558594, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": 1.7215499877929688, "rewards/margins": 2.8654556274414062, "rewards/rejected": -1.1439056396484375, "step": 2402 }, { "epoch": 0.53, "learning_rate": 8.610752361404216e-06, "logits/chosen": -1.2580136060714722, "logits/rejected": -1.2269493341445923, "logps/chosen": -143.6472930908203, "logps/rejected": -169.32582092285156, "loss": 2.6179, "rewards/accuracies": 0.0, "rewards/chosen": -3.508059024810791, "rewards/margins": -5.2197136878967285, "rewards/rejected": 1.7116546630859375, "step": 2403 }, { "epoch": 0.53, "learning_rate": 8.60951231158732e-06, "logits/chosen": -1.2553300857543945, "logits/rejected": -1.3600484132766724, "logps/chosen": -481.45916748046875, "logps/rejected": -88.46215057373047, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": 0.09984741359949112, "rewards/margins": 4.7976908683776855, "rewards/rejected": -4.697843551635742, "step": 2404 }, { "epoch": 0.53, "learning_rate": 8.60827179795677e-06, "logits/chosen": -1.0705640316009521, "logits/rejected": -1.0439229011535645, "logps/chosen": -68.4959716796875, "logps/rejected": -84.48014831542969, "loss": 0.0746, "rewards/accuracies": 1.0, "rewards/chosen": 0.30333635210990906, "rewards/margins": 1.8602662086486816, "rewards/rejected": -1.5569298267364502, "step": 2405 }, { "epoch": 0.53, "learning_rate": 8.607030820671969e-06, "logits/chosen": -1.0300343036651611, "logits/rejected": -1.0300343036651611, "logps/chosen": -73.04655456542969, "logps/rejected": -73.04655456542969, "loss": 1.2622, "rewards/accuracies": 0.0, "rewards/chosen": 1.2180061340332031, "rewards/margins": 0.0, "rewards/rejected": 1.2180061340332031, "step": 2406 }, { "epoch": 0.53, "learning_rate": 8.605789379892378e-06, "logits/chosen": -1.083052396774292, "logits/rejected": -1.0457500219345093, "logps/chosen": -197.39266967773438, "logps/rejected": -199.16287231445312, "loss": 0.1947, "rewards/accuracies": 1.0, "rewards/chosen": -0.218434140086174, "rewards/margins": 0.8907471299171448, "rewards/rejected": -1.10918128490448, "step": 2407 }, { "epoch": 0.53, "learning_rate": 8.60454747577752e-06, "logits/chosen": -0.8778342008590698, "logits/rejected": -0.7519529461860657, "logps/chosen": -211.39942932128906, "logps/rejected": -259.9104309082031, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/chosen": -0.5857269167900085, "rewards/margins": 2.9284820556640625, "rewards/rejected": -3.514209032058716, "step": 2408 }, { "epoch": 0.53, "learning_rate": 8.603305108486975e-06, "logits/chosen": -0.9063424468040466, "logits/rejected": -0.8962228894233704, "logps/chosen": -245.30560302734375, "logps/rejected": -79.90872192382812, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -0.6622512936592102, "rewards/margins": 3.230172872543335, "rewards/rejected": -3.8924241065979004, "step": 2409 }, { "epoch": 0.53, "learning_rate": 8.602062278180388e-06, "logits/chosen": -1.0954614877700806, "logits/rejected": -1.042179822921753, "logps/chosen": -158.47140502929688, "logps/rejected": -214.6212615966797, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": 0.2466888427734375, "rewards/margins": 4.255059719085693, "rewards/rejected": -4.008370876312256, "step": 2410 }, { "epoch": 0.53, "learning_rate": 8.600818985017457e-06, "logits/chosen": -0.9991039037704468, "logits/rejected": -0.9636289477348328, "logps/chosen": -166.1179962158203, "logps/rejected": -214.50732421875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.8530105948448181, "rewards/margins": 5.6642961502075195, "rewards/rejected": -4.811285495758057, "step": 2411 }, { "epoch": 0.53, "learning_rate": 8.59957522915794e-06, "logits/chosen": -0.7443692684173584, "logits/rejected": -0.6819597482681274, "logps/chosen": -91.0701675415039, "logps/rejected": -103.22248840332031, "loss": 0.0689, "rewards/accuracies": 1.0, "rewards/chosen": -1.327460527420044, "rewards/margins": 2.059272050857544, "rewards/rejected": -3.386732578277588, "step": 2412 }, { "epoch": 0.53, "learning_rate": 8.598331010761662e-06, "logits/chosen": -0.8018062114715576, "logits/rejected": -0.7141103744506836, "logps/chosen": -114.21185302734375, "logps/rejected": -224.54519653320312, "loss": 0.7371, "rewards/accuracies": 0.0, "rewards/chosen": -1.6993744373321533, "rewards/margins": -1.2102752923965454, "rewards/rejected": -0.4890991151332855, "step": 2413 }, { "epoch": 0.53, "learning_rate": 8.597086329988498e-06, "logits/chosen": -1.1650370359420776, "logits/rejected": -1.158752679824829, "logps/chosen": -94.6701889038086, "logps/rejected": -95.42010498046875, "loss": 0.0681, "rewards/accuracies": 1.0, "rewards/chosen": 0.4664299190044403, "rewards/margins": 1.9269837141036987, "rewards/rejected": -1.460553765296936, "step": 2414 }, { "epoch": 0.53, "learning_rate": 8.595841186998388e-06, "logits/chosen": -1.1760342121124268, "logits/rejected": -0.5983084440231323, "logps/chosen": -229.62802124023438, "logps/rejected": -477.4497375488281, "loss": 0.0518, "rewards/accuracies": 1.0, "rewards/chosen": 0.7082687616348267, "rewards/margins": 39.77628707885742, "rewards/rejected": -39.06801986694336, "step": 2415 }, { "epoch": 0.53, "learning_rate": 8.594595581951329e-06, "logits/chosen": -1.1453461647033691, "logits/rejected": -1.1209608316421509, "logps/chosen": -135.32546997070312, "logps/rejected": -64.39280700683594, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": -0.04549407958984375, "rewards/margins": 3.4598946571350098, "rewards/rejected": -3.5053887367248535, "step": 2416 }, { "epoch": 0.53, "learning_rate": 8.593349515007379e-06, "logits/chosen": -0.6872362494468689, "logits/rejected": -0.6383944749832153, "logps/chosen": -247.779296875, "logps/rejected": -337.51043701171875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 1.410882592201233, "rewards/margins": 6.642370700836182, "rewards/rejected": -5.231488227844238, "step": 2417 }, { "epoch": 0.54, "learning_rate": 8.592102986326656e-06, "logits/chosen": -0.9966183304786682, "logits/rejected": -0.9782132506370544, "logps/chosen": -94.91554260253906, "logps/rejected": -195.2123260498047, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 1.5476967096328735, "rewards/margins": 6.691725730895996, "rewards/rejected": -5.144029140472412, "step": 2418 }, { "epoch": 0.54, "learning_rate": 8.590855996069334e-06, "logits/chosen": -0.980029284954071, "logits/rejected": -0.9896584749221802, "logps/chosen": -35.277503967285156, "logps/rejected": -18.570449829101562, "loss": 0.7974, "rewards/accuracies": 0.0, "rewards/chosen": -2.1705596446990967, "rewards/margins": -1.367133617401123, "rewards/rejected": -0.8034259676933289, "step": 2419 }, { "epoch": 0.54, "learning_rate": 8.589608544395646e-06, "logits/chosen": -1.16587233543396, "logits/rejected": -1.1544032096862793, "logps/chosen": -112.43960571289062, "logps/rejected": -75.46543884277344, "loss": 0.3423, "rewards/accuracies": 1.0, "rewards/chosen": 0.9061325192451477, "rewards/margins": 0.02015841007232666, "rewards/rejected": 0.885974109172821, "step": 2420 }, { "epoch": 0.54, "learning_rate": 8.588360631465893e-06, "logits/chosen": -1.0744118690490723, "logits/rejected": -1.0465304851531982, "logps/chosen": -208.90306091308594, "logps/rejected": -161.7653045654297, "loss": 0.3021, "rewards/accuracies": 1.0, "rewards/chosen": 0.318716436624527, "rewards/margins": 0.194285586476326, "rewards/rejected": 0.12443085014820099, "step": 2421 }, { "epoch": 0.54, "learning_rate": 8.587112257440422e-06, "logits/chosen": -1.1327418088912964, "logits/rejected": -1.199993371963501, "logps/chosen": -225.30233764648438, "logps/rejected": -158.5736846923828, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.4350753724575043, "rewards/margins": 4.198202610015869, "rewards/rejected": -4.633277893066406, "step": 2422 }, { "epoch": 0.54, "learning_rate": 8.585863422479652e-06, "logits/chosen": -1.0349174737930298, "logits/rejected": -1.0349174737930298, "logps/chosen": -102.05638122558594, "logps/rejected": -102.05638122558594, "loss": 0.5784, "rewards/accuracies": 0.0, "rewards/chosen": -4.829497814178467, "rewards/margins": 0.0, "rewards/rejected": -4.829497814178467, "step": 2423 }, { "epoch": 0.54, "learning_rate": 8.584614126744051e-06, "logits/chosen": -0.8326981663703918, "logits/rejected": -0.8877103328704834, "logps/chosen": -94.63377380371094, "logps/rejected": -60.766300201416016, "loss": 0.6614, "rewards/accuracies": 1.0, "rewards/chosen": -1.3358818292617798, "rewards/margins": 1.5126816034317017, "rewards/rejected": -2.8485634326934814, "step": 2424 }, { "epoch": 0.54, "learning_rate": 8.583364370394152e-06, "logits/chosen": -1.0418682098388672, "logits/rejected": -1.0163073539733887, "logps/chosen": -191.55960083007812, "logps/rejected": -139.1779327392578, "loss": 0.0707, "rewards/accuracies": 1.0, "rewards/chosen": 0.40806275606155396, "rewards/margins": 6.298845291137695, "rewards/rejected": -5.890782356262207, "step": 2425 }, { "epoch": 0.54, "learning_rate": 8.582114153590543e-06, "logits/chosen": -0.6643697023391724, "logits/rejected": -0.6479665637016296, "logps/chosen": -100.3747787475586, "logps/rejected": -168.21615600585938, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.07849808037281036, "rewards/margins": 5.434637069702148, "rewards/rejected": -5.513134956359863, "step": 2426 }, { "epoch": 0.54, "learning_rate": 8.58086347649388e-06, "logits/chosen": -1.2090423107147217, "logits/rejected": -1.1943975687026978, "logps/chosen": -81.51571655273438, "logps/rejected": -120.50498962402344, "loss": 0.1259, "rewards/accuracies": 1.0, "rewards/chosen": 0.25381776690483093, "rewards/margins": 2.5947952270507812, "rewards/rejected": -2.340977430343628, "step": 2427 }, { "epoch": 0.54, "learning_rate": 8.579612339264867e-06, "logits/chosen": -0.5316941142082214, "logits/rejected": -0.573308527469635, "logps/chosen": -91.4156723022461, "logps/rejected": -105.31966400146484, "loss": 0.91, "rewards/accuracies": 1.0, "rewards/chosen": -1.1982421875, "rewards/margins": 1.7737877368927002, "rewards/rejected": -2.9720299243927, "step": 2428 }, { "epoch": 0.54, "learning_rate": 8.578360742064274e-06, "logits/chosen": -0.9897222518920898, "logits/rejected": -0.9897222518920898, "logps/chosen": -99.63182067871094, "logps/rejected": -99.63182067871094, "loss": 0.588, "rewards/accuracies": 0.0, "rewards/chosen": -0.21301423013210297, "rewards/margins": 0.0, "rewards/rejected": -0.21301423013210297, "step": 2429 }, { "epoch": 0.54, "learning_rate": 8.577108685052927e-06, "logits/chosen": -1.034520149230957, "logits/rejected": -1.1033862829208374, "logps/chosen": -207.45242309570312, "logps/rejected": -211.35308837890625, "loss": 0.2753, "rewards/accuracies": 1.0, "rewards/chosen": -2.478231906890869, "rewards/margins": 0.3090667724609375, "rewards/rejected": -2.7872986793518066, "step": 2430 }, { "epoch": 0.54, "learning_rate": 8.575856168391714e-06, "logits/chosen": -1.129568338394165, "logits/rejected": -1.1565722227096558, "logps/chosen": -169.70864868164062, "logps/rejected": -193.85476684570312, "loss": 0.0962, "rewards/accuracies": 1.0, "rewards/chosen": -0.13684692978858948, "rewards/margins": 4.868914604187012, "rewards/rejected": -5.005761623382568, "step": 2431 }, { "epoch": 0.54, "learning_rate": 8.57460319224158e-06, "logits/chosen": -1.251854419708252, "logits/rejected": -0.7523075938224792, "logps/chosen": -201.25515747070312, "logps/rejected": -594.73046875, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -0.7669525146484375, "rewards/margins": 46.12398910522461, "rewards/rejected": -46.89094161987305, "step": 2432 }, { "epoch": 0.54, "learning_rate": 8.573349756763527e-06, "logits/chosen": -0.854987382888794, "logits/rejected": -0.854987382888794, "logps/chosen": -86.78242492675781, "logps/rejected": -86.78242492675781, "loss": 0.3475, "rewards/accuracies": 0.0, "rewards/chosen": -2.2175309658050537, "rewards/margins": 0.0, "rewards/rejected": -2.2175309658050537, "step": 2433 }, { "epoch": 0.54, "learning_rate": 8.572095862118621e-06, "logits/chosen": -1.125357985496521, "logits/rejected": -1.0750675201416016, "logps/chosen": -52.182167053222656, "logps/rejected": -132.7535858154297, "loss": 0.0483, "rewards/accuracies": 1.0, "rewards/chosen": 0.267194002866745, "rewards/margins": 2.2928638458251953, "rewards/rejected": -2.025669813156128, "step": 2434 }, { "epoch": 0.54, "learning_rate": 8.570841508467984e-06, "logits/chosen": -1.2893222570419312, "logits/rejected": -1.30177640914917, "logps/chosen": -77.8174819946289, "logps/rejected": -161.6413116455078, "loss": 0.0301, "rewards/accuracies": 1.0, "rewards/chosen": -0.18688736855983734, "rewards/margins": 2.8034310340881348, "rewards/rejected": -2.9903182983398438, "step": 2435 }, { "epoch": 0.54, "learning_rate": 8.569586695972798e-06, "logits/chosen": -1.3290811777114868, "logits/rejected": -1.3690216541290283, "logps/chosen": -114.23934173583984, "logps/rejected": -85.4486083984375, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": 0.7204864621162415, "rewards/margins": 4.416874885559082, "rewards/rejected": -3.6963882446289062, "step": 2436 }, { "epoch": 0.54, "learning_rate": 8.568331424794301e-06, "logits/chosen": -1.1113920211791992, "logits/rejected": -1.0878633260726929, "logps/chosen": -162.57862854003906, "logps/rejected": -222.80979919433594, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": 0.7988815307617188, "rewards/margins": 4.857293605804443, "rewards/rejected": -4.058412075042725, "step": 2437 }, { "epoch": 0.54, "learning_rate": 8.567075695093796e-06, "logits/chosen": -0.8689420223236084, "logits/rejected": -0.8522729873657227, "logps/chosen": -111.55271911621094, "logps/rejected": -136.87274169921875, "loss": 0.0422, "rewards/accuracies": 1.0, "rewards/chosen": -2.1359169483184814, "rewards/margins": 2.4373457431793213, "rewards/rejected": -4.573262691497803, "step": 2438 }, { "epoch": 0.54, "learning_rate": 8.565819507032637e-06, "logits/chosen": -0.7243385910987854, "logits/rejected": -0.4328625500202179, "logps/chosen": -259.6927490234375, "logps/rejected": -616.4403686523438, "loss": 0.028, "rewards/accuracies": 1.0, "rewards/chosen": 1.9714539051055908, "rewards/margins": 41.03968811035156, "rewards/rejected": -39.068233489990234, "step": 2439 }, { "epoch": 0.54, "learning_rate": 8.564562860772246e-06, "logits/chosen": -1.0333703756332397, "logits/rejected": -1.0361838340759277, "logps/chosen": -158.8603515625, "logps/rejected": -165.3679656982422, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": 0.3149551451206207, "rewards/margins": 3.3631792068481445, "rewards/rejected": -3.0482239723205566, "step": 2440 }, { "epoch": 0.54, "learning_rate": 8.563305756474094e-06, "logits/chosen": -1.0846360921859741, "logits/rejected": -1.0846360921859741, "logps/chosen": -186.32130432128906, "logps/rejected": -186.32130432128906, "loss": 0.3864, "rewards/accuracies": 0.0, "rewards/chosen": -8.085741996765137, "rewards/margins": 0.0, "rewards/rejected": -8.085741996765137, "step": 2441 }, { "epoch": 0.54, "learning_rate": 8.562048194299719e-06, "logits/chosen": -1.2213627099990845, "logits/rejected": -1.174038290977478, "logps/chosen": -134.5661163330078, "logps/rejected": -136.42465209960938, "loss": 0.4251, "rewards/accuracies": 1.0, "rewards/chosen": 0.7966507077217102, "rewards/margins": 1.575352430343628, "rewards/rejected": -0.7787017822265625, "step": 2442 }, { "epoch": 0.54, "learning_rate": 8.560790174410713e-06, "logits/chosen": -1.3483221530914307, "logits/rejected": -1.350027084350586, "logps/chosen": -121.7352294921875, "logps/rejected": -200.96202087402344, "loss": 0.2034, "rewards/accuracies": 1.0, "rewards/chosen": 0.22056809067726135, "rewards/margins": 6.354714393615723, "rewards/rejected": -6.134146213531494, "step": 2443 }, { "epoch": 0.54, "learning_rate": 8.559531696968733e-06, "logits/chosen": -0.8556979894638062, "logits/rejected": -0.8856785297393799, "logps/chosen": -225.8393096923828, "logps/rejected": -261.72406005859375, "loss": 0.0749, "rewards/accuracies": 1.0, "rewards/chosen": -2.4582366943359375, "rewards/margins": 1.822357177734375, "rewards/rejected": -4.2805938720703125, "step": 2444 }, { "epoch": 0.54, "learning_rate": 8.558272762135483e-06, "logits/chosen": -1.0245436429977417, "logits/rejected": -0.5098512768745422, "logps/chosen": -186.38906860351562, "logps/rejected": -482.36859130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24053040146827698, "rewards/margins": 30.005794525146484, "rewards/rejected": -29.7652645111084, "step": 2445 }, { "epoch": 0.54, "learning_rate": 8.557013370072737e-06, "logits/chosen": -0.7665660381317139, "logits/rejected": -0.6645752191543579, "logps/chosen": -71.15501403808594, "logps/rejected": -235.89585876464844, "loss": 0.0933, "rewards/accuracies": 1.0, "rewards/chosen": 0.05891723558306694, "rewards/margins": 1.6480988264083862, "rewards/rejected": -1.5891815423965454, "step": 2446 }, { "epoch": 0.54, "learning_rate": 8.555753520942327e-06, "logits/chosen": -1.066377878189087, "logits/rejected": -1.054070234298706, "logps/chosen": -199.39793395996094, "logps/rejected": -197.83766174316406, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -3.0837905406951904, "rewards/margins": 3.154205560684204, "rewards/rejected": -6.2379961013793945, "step": 2447 }, { "epoch": 0.54, "learning_rate": 8.554493214906135e-06, "logits/chosen": -1.1618351936340332, "logits/rejected": -1.1542491912841797, "logps/chosen": -213.58538818359375, "logps/rejected": -131.4701385498047, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.21912994980812073, "rewards/margins": 6.566668510437012, "rewards/rejected": -6.7857985496521, "step": 2448 }, { "epoch": 0.54, "learning_rate": 8.55323245212611e-06, "logits/chosen": -0.9669326543807983, "logits/rejected": -0.8997090458869934, "logps/chosen": -95.2010498046875, "logps/rejected": -206.57083129882812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.3184013366699219, "rewards/margins": 6.055910587310791, "rewards/rejected": -5.737509250640869, "step": 2449 }, { "epoch": 0.54, "learning_rate": 8.551971232764255e-06, "logits/chosen": -0.8731074333190918, "logits/rejected": -0.8673935532569885, "logps/chosen": -183.54739379882812, "logps/rejected": -212.30101013183594, "loss": 0.617, "rewards/accuracies": 1.0, "rewards/chosen": -2.537733554840088, "rewards/margins": 5.041924953460693, "rewards/rejected": -7.579658508300781, "step": 2450 }, { "epoch": 0.54, "learning_rate": 8.550709556982637e-06, "logits/chosen": -1.0446398258209229, "logits/rejected": -1.0162564516067505, "logps/chosen": -97.839599609375, "logps/rejected": -92.19706726074219, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": 0.9473007321357727, "rewards/margins": 5.946228504180908, "rewards/rejected": -4.998927593231201, "step": 2451 }, { "epoch": 0.54, "learning_rate": 8.549447424943379e-06, "logits/chosen": -0.799270510673523, "logits/rejected": -0.7115304470062256, "logps/chosen": -155.86398315429688, "logps/rejected": -258.12957763671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.6027191281318665, "rewards/margins": 8.110528945922852, "rewards/rejected": -7.507809638977051, "step": 2452 }, { "epoch": 0.54, "learning_rate": 8.548184836808657e-06, "logits/chosen": -0.7203463315963745, "logits/rejected": -0.7489423155784607, "logps/chosen": -255.61392211914062, "logps/rejected": -226.41494750976562, "loss": 0.126, "rewards/accuracies": 1.0, "rewards/chosen": 0.8277435302734375, "rewards/margins": 1.2496337890625, "rewards/rejected": -0.4218902587890625, "step": 2453 }, { "epoch": 0.54, "learning_rate": 8.546921792740712e-06, "logits/chosen": -0.8595602512359619, "logits/rejected": -0.8022067546844482, "logps/chosen": -176.47152709960938, "logps/rejected": -228.75405883789062, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.832855224609375, "rewards/margins": 5.069389343261719, "rewards/rejected": -7.902244567871094, "step": 2454 }, { "epoch": 0.54, "learning_rate": 8.545658292901844e-06, "logits/chosen": -1.312343955039978, "logits/rejected": -1.3837840557098389, "logps/chosen": -186.32699584960938, "logps/rejected": -267.1596374511719, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": 2.9722611904144287, "rewards/margins": 12.502472877502441, "rewards/rejected": -9.530211448669434, "step": 2455 }, { "epoch": 0.54, "learning_rate": 8.544394337454409e-06, "logits/chosen": -0.9329442381858826, "logits/rejected": -0.8970975279808044, "logps/chosen": -131.81881713867188, "logps/rejected": -182.6634521484375, "loss": 0.5079, "rewards/accuracies": 0.0, "rewards/chosen": -2.2640931606292725, "rewards/margins": -0.5618776082992554, "rewards/rejected": -1.702215552330017, "step": 2456 }, { "epoch": 0.54, "learning_rate": 8.543129926560822e-06, "logits/chosen": -1.2107423543930054, "logits/rejected": -1.217739224433899, "logps/chosen": -115.01760864257812, "logps/rejected": -112.33949279785156, "loss": 0.1015, "rewards/accuracies": 1.0, "rewards/chosen": 0.3370567262172699, "rewards/margins": 1.6218620538711548, "rewards/rejected": -1.2848052978515625, "step": 2457 }, { "epoch": 0.54, "learning_rate": 8.541865060383559e-06, "logits/chosen": -0.9057106375694275, "logits/rejected": -0.8771876096725464, "logps/chosen": -121.35569763183594, "logps/rejected": -249.52117919921875, "loss": 1.5848, "rewards/accuracies": 1.0, "rewards/chosen": -0.44550323486328125, "rewards/margins": 1.4263076782226562, "rewards/rejected": -1.8718109130859375, "step": 2458 }, { "epoch": 0.54, "learning_rate": 8.540599739085147e-06, "logits/chosen": -1.1226285696029663, "logits/rejected": -1.1199021339416504, "logps/chosen": -111.15235137939453, "logps/rejected": -151.70822143554688, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 0.31227341294288635, "rewards/margins": 4.956849098205566, "rewards/rejected": -4.644575595855713, "step": 2459 }, { "epoch": 0.54, "learning_rate": 8.539333962828182e-06, "logits/chosen": -1.1968657970428467, "logits/rejected": -1.2366918325424194, "logps/chosen": -153.99636840820312, "logps/rejected": -77.76670837402344, "loss": 0.4947, "rewards/accuracies": 0.0, "rewards/chosen": -4.494494915008545, "rewards/margins": -0.4332122802734375, "rewards/rejected": -4.061282634735107, "step": 2460 }, { "epoch": 0.54, "learning_rate": 8.53806773177531e-06, "logits/chosen": -0.9429258704185486, "logits/rejected": -0.9528979063034058, "logps/chosen": -196.496826171875, "logps/rejected": -134.90232849121094, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 1.5279206037521362, "rewards/margins": 6.594383716583252, "rewards/rejected": -5.066462993621826, "step": 2461 }, { "epoch": 0.54, "learning_rate": 8.53680104608924e-06, "logits/chosen": -0.7787856459617615, "logits/rejected": -0.7313287258148193, "logps/chosen": -160.92445373535156, "logps/rejected": -199.12306213378906, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": 1.4622513055801392, "rewards/margins": 3.410905361175537, "rewards/rejected": -1.9486541748046875, "step": 2462 }, { "epoch": 0.55, "learning_rate": 8.535533905932739e-06, "logits/chosen": -0.8992987275123596, "logits/rejected": -0.8992987275123596, "logps/chosen": -140.20594787597656, "logps/rejected": -140.20594787597656, "loss": 0.3844, "rewards/accuracies": 0.0, "rewards/chosen": -5.411357879638672, "rewards/margins": 0.0, "rewards/rejected": -5.411357879638672, "step": 2463 }, { "epoch": 0.55, "learning_rate": 8.534266311468629e-06, "logits/chosen": -1.0828940868377686, "logits/rejected": -1.0455654859542847, "logps/chosen": -74.40319061279297, "logps/rejected": -218.3023681640625, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": -0.34376296401023865, "rewards/margins": 3.799083709716797, "rewards/rejected": -4.142846584320068, "step": 2464 }, { "epoch": 0.55, "learning_rate": 8.532998262859794e-06, "logits/chosen": -0.9475114941596985, "logits/rejected": -0.970403790473938, "logps/chosen": -102.15630340576172, "logps/rejected": -96.80928039550781, "loss": 0.1286, "rewards/accuracies": 1.0, "rewards/chosen": -0.2445060759782791, "rewards/margins": 1.22748863697052, "rewards/rejected": -1.4719947576522827, "step": 2465 }, { "epoch": 0.55, "learning_rate": 8.531729760269176e-06, "logits/chosen": -1.137169361114502, "logits/rejected": -1.1589269638061523, "logps/chosen": -75.03890228271484, "logps/rejected": -102.6427001953125, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -0.033318329602479935, "rewards/margins": 2.6508705615997314, "rewards/rejected": -2.6841888427734375, "step": 2466 }, { "epoch": 0.55, "learning_rate": 8.530460803859772e-06, "logits/chosen": -0.7964272499084473, "logits/rejected": -0.7778327465057373, "logps/chosen": -222.06771850585938, "logps/rejected": -262.1575622558594, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -0.726489245891571, "rewards/margins": 3.783348321914673, "rewards/rejected": -4.509837627410889, "step": 2467 }, { "epoch": 0.55, "learning_rate": 8.529191393794645e-06, "logits/chosen": -0.903925359249115, "logits/rejected": -0.903925359249115, "logps/chosen": -145.88723754882812, "logps/rejected": -145.88723754882812, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -4.338035583496094, "rewards/margins": 0.0, "rewards/rejected": -4.338035583496094, "step": 2468 }, { "epoch": 0.55, "learning_rate": 8.527921530236905e-06, "logits/chosen": -0.9336152672767639, "logits/rejected": -0.9487460255622864, "logps/chosen": -101.35394287109375, "logps/rejected": -137.71226501464844, "loss": 0.0996, "rewards/accuracies": 1.0, "rewards/chosen": 0.8397102355957031, "rewards/margins": 1.5712897777557373, "rewards/rejected": -0.731579601764679, "step": 2469 }, { "epoch": 0.55, "learning_rate": 8.52665121334973e-06, "logits/chosen": -0.6441364288330078, "logits/rejected": -0.6997255682945251, "logps/chosen": -123.61264038085938, "logps/rejected": -88.92129516601562, "loss": 0.392, "rewards/accuracies": 0.0, "rewards/chosen": 0.15375518798828125, "rewards/margins": -0.12483522295951843, "rewards/rejected": 0.2785904109477997, "step": 2470 }, { "epoch": 0.55, "learning_rate": 8.525380443296353e-06, "logits/chosen": -0.84068763256073, "logits/rejected": -0.8251217007637024, "logps/chosen": -190.724853515625, "logps/rejected": -320.92047119140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.8810486197471619, "rewards/margins": 7.863733291625977, "rewards/rejected": -6.98268461227417, "step": 2471 }, { "epoch": 0.55, "learning_rate": 8.524109220240064e-06, "logits/chosen": -0.9602106213569641, "logits/rejected": -0.9149899482727051, "logps/chosen": -72.40451049804688, "logps/rejected": -139.7071990966797, "loss": 0.2054, "rewards/accuracies": 1.0, "rewards/chosen": 0.02683563344180584, "rewards/margins": 2.3891663551330566, "rewards/rejected": -2.362330675125122, "step": 2472 }, { "epoch": 0.55, "learning_rate": 8.52283754434421e-06, "logits/chosen": -0.9601749181747437, "logits/rejected": -0.923541247844696, "logps/chosen": -138.3404541015625, "logps/rejected": -216.94268798828125, "loss": 0.6345, "rewards/accuracies": 1.0, "rewards/chosen": -3.1777665615081787, "rewards/margins": 0.07645702362060547, "rewards/rejected": -3.254223585128784, "step": 2473 }, { "epoch": 0.55, "learning_rate": 8.521565415772201e-06, "logits/chosen": -1.0033968687057495, "logits/rejected": -0.9657952189445496, "logps/chosen": -98.61386108398438, "logps/rejected": -115.2996597290039, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": 1.769464135169983, "rewards/margins": 4.274410247802734, "rewards/rejected": -2.504946231842041, "step": 2474 }, { "epoch": 0.55, "learning_rate": 8.520292834687503e-06, "logits/chosen": -0.619968056678772, "logits/rejected": -0.5811911225318909, "logps/chosen": -65.85768127441406, "logps/rejected": -106.48905944824219, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": 0.16654816269874573, "rewards/margins": 3.507037401199341, "rewards/rejected": -3.340489149093628, "step": 2475 }, { "epoch": 0.55, "learning_rate": 8.519019801253637e-06, "logits/chosen": -0.9677891731262207, "logits/rejected": -0.9711766839027405, "logps/chosen": -158.75094604492188, "logps/rejected": -190.11508178710938, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -0.5534164309501648, "rewards/margins": 5.184375286102295, "rewards/rejected": -5.737791538238525, "step": 2476 }, { "epoch": 0.55, "learning_rate": 8.517746315634186e-06, "logits/chosen": -1.1907955408096313, "logits/rejected": -1.1869654655456543, "logps/chosen": -76.33171844482422, "logps/rejected": -140.49974060058594, "loss": 0.3864, "rewards/accuracies": 1.0, "rewards/chosen": 0.4240921139717102, "rewards/margins": 4.127404689788818, "rewards/rejected": -3.703312635421753, "step": 2477 }, { "epoch": 0.55, "learning_rate": 8.51647237799279e-06, "logits/chosen": -1.2384642362594604, "logits/rejected": -1.1929720640182495, "logps/chosen": -94.32841491699219, "logps/rejected": -120.56681823730469, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -0.7541580200195312, "rewards/margins": 2.9421868324279785, "rewards/rejected": -3.6963448524475098, "step": 2478 }, { "epoch": 0.55, "learning_rate": 8.515197988493146e-06, "logits/chosen": -0.9666956663131714, "logits/rejected": -1.0091180801391602, "logps/chosen": -177.70562744140625, "logps/rejected": -158.3726806640625, "loss": 0.2895, "rewards/accuracies": 1.0, "rewards/chosen": 0.5848144888877869, "rewards/margins": 0.29676517844200134, "rewards/rejected": 0.2880493104457855, "step": 2479 }, { "epoch": 0.55, "learning_rate": 8.513923147299012e-06, "logits/chosen": -0.8321347832679749, "logits/rejected": -0.8145999908447266, "logps/chosen": -92.92310333251953, "logps/rejected": -170.01791381835938, "loss": 0.0506, "rewards/accuracies": 1.0, "rewards/chosen": -0.2317802459001541, "rewards/margins": 2.919867753982544, "rewards/rejected": -3.1516480445861816, "step": 2480 }, { "epoch": 0.55, "learning_rate": 8.512647854574201e-06, "logits/chosen": -1.1067402362823486, "logits/rejected": -1.1435730457305908, "logps/chosen": -182.91542053222656, "logps/rejected": -144.45167541503906, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 1.6859359741210938, "rewards/margins": 5.6475629806518555, "rewards/rejected": -3.961627244949341, "step": 2481 }, { "epoch": 0.55, "learning_rate": 8.511372110482583e-06, "logits/chosen": -0.8684159517288208, "logits/rejected": -0.8271386027336121, "logps/chosen": -86.87666320800781, "logps/rejected": -190.15509033203125, "loss": 0.6564, "rewards/accuracies": 0.0, "rewards/chosen": -1.660082221031189, "rewards/margins": -0.9994575381278992, "rewards/rejected": -0.6606246829032898, "step": 2482 }, { "epoch": 0.55, "learning_rate": 8.510095915188093e-06, "logits/chosen": -1.2088799476623535, "logits/rejected": -0.9166426062583923, "logps/chosen": -252.40402221679688, "logps/rejected": -527.9639282226562, "loss": 0.2445, "rewards/accuracies": 1.0, "rewards/chosen": -1.1511459350585938, "rewards/margins": 35.71785354614258, "rewards/rejected": -36.86899948120117, "step": 2483 }, { "epoch": 0.55, "learning_rate": 8.508819268854713e-06, "logits/chosen": -1.0026874542236328, "logits/rejected": -1.008799433708191, "logps/chosen": -93.07699584960938, "logps/rejected": -91.56609344482422, "loss": 0.7921, "rewards/accuracies": 0.0, "rewards/chosen": -0.9106689691543579, "rewards/margins": -0.10147249698638916, "rewards/rejected": -0.8091964721679688, "step": 2484 }, { "epoch": 0.55, "learning_rate": 8.507542171646493e-06, "logits/chosen": -1.149744987487793, "logits/rejected": -1.2300628423690796, "logps/chosen": -111.57328796386719, "logps/rejected": -66.99528503417969, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": -0.757440984249115, "rewards/margins": 2.9367830753326416, "rewards/rejected": -3.6942241191864014, "step": 2485 }, { "epoch": 0.55, "learning_rate": 8.506264623727536e-06, "logits/chosen": -1.0833337306976318, "logits/rejected": -1.0963060855865479, "logps/chosen": -115.32081604003906, "logps/rejected": -60.47391891479492, "loss": 0.0527, "rewards/accuracies": 1.0, "rewards/chosen": -1.9652146100997925, "rewards/margins": 2.196016788482666, "rewards/rejected": -4.161231517791748, "step": 2486 }, { "epoch": 0.55, "learning_rate": 8.504986625262004e-06, "logits/chosen": -1.3817576169967651, "logits/rejected": -1.3915802240371704, "logps/chosen": -3.2640552520751953, "logps/rejected": -36.47798538208008, "loss": 0.1111, "rewards/accuracies": 1.0, "rewards/chosen": 0.07557354122400284, "rewards/margins": 1.4065256118774414, "rewards/rejected": -1.3309520483016968, "step": 2487 }, { "epoch": 0.55, "learning_rate": 8.503708176414115e-06, "logits/chosen": -1.191351294517517, "logits/rejected": -1.2395902872085571, "logps/chosen": -124.67121124267578, "logps/rejected": -191.6263427734375, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": 0.17906418442726135, "rewards/margins": 7.580319404602051, "rewards/rejected": -7.401255130767822, "step": 2488 }, { "epoch": 0.55, "learning_rate": 8.50242927734815e-06, "logits/chosen": -0.8762567639350891, "logits/rejected": -0.8421147465705872, "logps/chosen": -114.4942626953125, "logps/rejected": -161.23580932617188, "loss": 0.3563, "rewards/accuracies": 1.0, "rewards/chosen": -0.09334564208984375, "rewards/margins": 5.152139186859131, "rewards/rejected": -5.245484828948975, "step": 2489 }, { "epoch": 0.55, "learning_rate": 8.501149928228441e-06, "logits/chosen": -1.2519500255584717, "logits/rejected": -1.2840731143951416, "logps/chosen": -105.01509094238281, "logps/rejected": -115.2764663696289, "loss": 0.2606, "rewards/accuracies": 1.0, "rewards/chosen": 0.41659852862358093, "rewards/margins": 4.109214782714844, "rewards/rejected": -3.6926162242889404, "step": 2490 }, { "epoch": 0.55, "learning_rate": 8.499870129219383e-06, "logits/chosen": -1.2063833475112915, "logits/rejected": -1.2349292039871216, "logps/chosen": -106.61734008789062, "logps/rejected": -70.34223937988281, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": 0.061647798866033554, "rewards/margins": 3.7204689979553223, "rewards/rejected": -3.6588211059570312, "step": 2491 }, { "epoch": 0.55, "learning_rate": 8.498589880485428e-06, "logits/chosen": -0.9607129693031311, "logits/rejected": -0.9863570332527161, "logps/chosen": -224.7421875, "logps/rejected": -117.44645690917969, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -1.0444061756134033, "rewards/margins": 3.0960123538970947, "rewards/rejected": -4.140418529510498, "step": 2492 }, { "epoch": 0.55, "learning_rate": 8.497309182191082e-06, "logits/chosen": -0.8325178027153015, "logits/rejected": -0.8088169097900391, "logps/chosen": -115.59613037109375, "logps/rejected": -50.007259368896484, "loss": 0.2677, "rewards/accuracies": 1.0, "rewards/chosen": 0.14473114907741547, "rewards/margins": 2.6208138465881348, "rewards/rejected": -2.4760828018188477, "step": 2493 }, { "epoch": 0.55, "learning_rate": 8.496028034500914e-06, "logits/chosen": -1.1055161952972412, "logits/rejected": -1.1535987854003906, "logps/chosen": -218.30545043945312, "logps/rejected": -132.08157348632812, "loss": 0.1011, "rewards/accuracies": 1.0, "rewards/chosen": 0.5048874020576477, "rewards/margins": 1.4960136413574219, "rewards/rejected": -0.9911262392997742, "step": 2494 }, { "epoch": 0.55, "learning_rate": 8.49474643757955e-06, "logits/chosen": -0.678878664970398, "logits/rejected": -0.7140251994132996, "logps/chosen": -185.86361694335938, "logps/rejected": -201.7120819091797, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": 2.035174608230591, "rewards/margins": 3.394796848297119, "rewards/rejected": -1.3596222400665283, "step": 2495 }, { "epoch": 0.55, "learning_rate": 8.493464391591665e-06, "logits/chosen": -1.2565940618515015, "logits/rejected": -1.1473402976989746, "logps/chosen": -106.62056732177734, "logps/rejected": -220.42665100097656, "loss": 0.069, "rewards/accuracies": 1.0, "rewards/chosen": -1.0712028741836548, "rewards/margins": 1.9596794843673706, "rewards/rejected": -3.0308823585510254, "step": 2496 }, { "epoch": 0.55, "learning_rate": 8.492181896702008e-06, "logits/chosen": -0.7781355381011963, "logits/rejected": -0.7393007874488831, "logps/chosen": -96.408447265625, "logps/rejected": -100.29871368408203, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": -1.4806091785430908, "rewards/margins": 3.0522873401641846, "rewards/rejected": -4.532896518707275, "step": 2497 }, { "epoch": 0.55, "learning_rate": 8.49089895307537e-06, "logits/chosen": -1.268712043762207, "logits/rejected": -1.2770001888275146, "logps/chosen": -81.60615539550781, "logps/rejected": -97.015869140625, "loss": 0.0768, "rewards/accuracies": 1.0, "rewards/chosen": 0.4965759217739105, "rewards/margins": 1.8012688159942627, "rewards/rejected": -1.3046928644180298, "step": 2498 }, { "epoch": 0.55, "learning_rate": 8.48961556087661e-06, "logits/chosen": -1.1314289569854736, "logits/rejected": -1.1244580745697021, "logps/chosen": -145.58917236328125, "logps/rejected": -181.6343536376953, "loss": 0.0751, "rewards/accuracies": 1.0, "rewards/chosen": -2.0803849697113037, "rewards/margins": 4.102675437927246, "rewards/rejected": -6.183060646057129, "step": 2499 }, { "epoch": 0.55, "learning_rate": 8.48833172027064e-06, "logits/chosen": -1.1833456754684448, "logits/rejected": -1.1639851331710815, "logps/chosen": -147.70892333984375, "logps/rejected": -144.46327209472656, "loss": 0.2138, "rewards/accuracies": 1.0, "rewards/chosen": -2.6090095043182373, "rewards/margins": 1.4912559986114502, "rewards/rejected": -4.1002655029296875, "step": 2500 }, { "epoch": 0.55, "learning_rate": 8.487047431422426e-06, "logits/chosen": -1.0784332752227783, "logits/rejected": -1.0497182607650757, "logps/chosen": -90.30546569824219, "logps/rejected": -181.99209594726562, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": 0.5130699276924133, "rewards/margins": 4.222153663635254, "rewards/rejected": -3.7090835571289062, "step": 2501 }, { "epoch": 0.55, "learning_rate": 8.485762694497001e-06, "logits/chosen": -1.3500584363937378, "logits/rejected": -0.6004295945167542, "logps/chosen": -134.02471923828125, "logps/rejected": -667.3411254882812, "loss": 0.3386, "rewards/accuracies": 1.0, "rewards/chosen": -3.4753189086914062, "rewards/margins": 48.531986236572266, "rewards/rejected": -52.00730514526367, "step": 2502 }, { "epoch": 0.55, "learning_rate": 8.484477509659452e-06, "logits/chosen": -0.9938164949417114, "logits/rejected": -0.9653587341308594, "logps/chosen": -151.66494750976562, "logps/rejected": -178.84222412109375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": 1.9301742315292358, "rewards/margins": 4.783700466156006, "rewards/rejected": -2.8535263538360596, "step": 2503 }, { "epoch": 0.55, "learning_rate": 8.483191877074916e-06, "logits/chosen": -0.8647158741950989, "logits/rejected": -0.865362823009491, "logps/chosen": -127.59950256347656, "logps/rejected": -115.5623779296875, "loss": 0.7132, "rewards/accuracies": 0.0, "rewards/chosen": -3.7847282886505127, "rewards/margins": -1.1515374183654785, "rewards/rejected": -2.633190870285034, "step": 2504 }, { "epoch": 0.55, "learning_rate": 8.4819057969086e-06, "logits/chosen": -1.1803526878356934, "logits/rejected": -1.123990774154663, "logps/chosen": -178.00308227539062, "logps/rejected": -322.20709228515625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 3.485736131668091, "rewards/margins": 12.371208190917969, "rewards/rejected": -8.885472297668457, "step": 2505 }, { "epoch": 0.55, "learning_rate": 8.480619269325759e-06, "logits/chosen": -1.0490003824234009, "logits/rejected": -1.026113510131836, "logps/chosen": -126.14727783203125, "logps/rejected": -189.73155212402344, "loss": 0.3356, "rewards/accuracies": 1.0, "rewards/chosen": -0.4026687741279602, "rewards/margins": 0.36610716581344604, "rewards/rejected": -0.7687759399414062, "step": 2506 }, { "epoch": 0.55, "learning_rate": 8.479332294491707e-06, "logits/chosen": -1.0853958129882812, "logits/rejected": -1.1159154176712036, "logps/chosen": -167.18276977539062, "logps/rejected": -180.52349853515625, "loss": 0.0739, "rewards/accuracies": 1.0, "rewards/chosen": 0.8030593991279602, "rewards/margins": 1.8369920253753662, "rewards/rejected": -1.0339325666427612, "step": 2507 }, { "epoch": 0.56, "learning_rate": 8.47804487257182e-06, "logits/chosen": -0.9576209187507629, "logits/rejected": -0.955702006816864, "logps/chosen": -168.90289306640625, "logps/rejected": -125.56201171875, "loss": 1.4649, "rewards/accuracies": 1.0, "rewards/chosen": -0.633068859577179, "rewards/margins": 1.6283693313598633, "rewards/rejected": -2.2614381313323975, "step": 2508 }, { "epoch": 0.56, "learning_rate": 8.47675700373153e-06, "logits/chosen": -1.3614929914474487, "logits/rejected": -1.3063937425613403, "logps/chosen": -79.7270278930664, "logps/rejected": -190.11029052734375, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -1.1487327814102173, "rewards/margins": 3.6080408096313477, "rewards/rejected": -4.756773471832275, "step": 2509 }, { "epoch": 0.56, "learning_rate": 8.475468688136322e-06, "logits/chosen": -1.1161750555038452, "logits/rejected": -1.1123486757278442, "logps/chosen": -211.65919494628906, "logps/rejected": -228.43814086914062, "loss": 0.263, "rewards/accuracies": 1.0, "rewards/chosen": 0.12196197360754013, "rewards/margins": 0.36801910400390625, "rewards/rejected": -0.24605713784694672, "step": 2510 }, { "epoch": 0.56, "learning_rate": 8.47417992595174e-06, "logits/chosen": -1.0425735712051392, "logits/rejected": -0.8499607443809509, "logps/chosen": -96.18659973144531, "logps/rejected": -315.245361328125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.6329269409179688, "rewards/margins": 5.494908332824707, "rewards/rejected": -4.861981391906738, "step": 2511 }, { "epoch": 0.56, "learning_rate": 8.472890717343391e-06, "logits/chosen": -1.0136966705322266, "logits/rejected": -1.0659217834472656, "logps/chosen": -260.89837646484375, "logps/rejected": -149.27059936523438, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 2.8330323696136475, "rewards/margins": 6.91805362701416, "rewards/rejected": -4.085021495819092, "step": 2512 }, { "epoch": 0.56, "learning_rate": 8.471601062476933e-06, "logits/chosen": -0.9861100316047668, "logits/rejected": -0.9861100316047668, "logps/chosen": -196.3040771484375, "logps/rejected": -196.3040771484375, "loss": 0.3821, "rewards/accuracies": 0.0, "rewards/chosen": -1.516595482826233, "rewards/margins": 0.0, "rewards/rejected": -1.516595482826233, "step": 2513 }, { "epoch": 0.56, "learning_rate": 8.470310961518085e-06, "logits/chosen": -0.8497636914253235, "logits/rejected": -0.838080644607544, "logps/chosen": -150.05690002441406, "logps/rejected": -121.45195007324219, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/chosen": -1.6873397827148438, "rewards/margins": 2.800861358642578, "rewards/rejected": -4.488201141357422, "step": 2514 }, { "epoch": 0.56, "learning_rate": 8.469020414632619e-06, "logits/chosen": -0.9898430109024048, "logits/rejected": -0.9458275437355042, "logps/chosen": -162.97549438476562, "logps/rejected": -257.6556091308594, "loss": 2.0142, "rewards/accuracies": 0.0, "rewards/chosen": -2.814701795578003, "rewards/margins": -4.007420539855957, "rewards/rejected": 1.192718505859375, "step": 2515 }, { "epoch": 0.56, "learning_rate": 8.467729421986371e-06, "logits/chosen": -0.9645001292228699, "logits/rejected": -0.9120919704437256, "logps/chosen": -137.50698852539062, "logps/rejected": -190.13265991210938, "loss": 0.0443, "rewards/accuracies": 1.0, "rewards/chosen": -0.9962021112442017, "rewards/margins": 2.3808531761169434, "rewards/rejected": -3.3770554065704346, "step": 2516 }, { "epoch": 0.56, "learning_rate": 8.466437983745227e-06, "logits/chosen": -1.044758677482605, "logits/rejected": -1.0350697040557861, "logps/chosen": -75.53607940673828, "logps/rejected": -84.75505828857422, "loss": 0.544, "rewards/accuracies": 0.0, "rewards/chosen": 0.18583908677101135, "rewards/margins": -0.036133572459220886, "rewards/rejected": 0.22197265923023224, "step": 2517 }, { "epoch": 0.56, "learning_rate": 8.465146100075136e-06, "logits/chosen": -0.970148503780365, "logits/rejected": -1.0160537958145142, "logps/chosen": -148.42041015625, "logps/rejected": -196.38064575195312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.320364475250244, "rewards/margins": 10.245737075805664, "rewards/rejected": -6.92537260055542, "step": 2518 }, { "epoch": 0.56, "learning_rate": 8.4638537711421e-06, "logits/chosen": -1.242257833480835, "logits/rejected": -1.2255275249481201, "logps/chosen": -132.59625244140625, "logps/rejected": -173.67938232421875, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": 0.5024017691612244, "rewards/margins": 5.192588806152344, "rewards/rejected": -4.690186977386475, "step": 2519 }, { "epoch": 0.56, "learning_rate": 8.462560997112184e-06, "logits/chosen": -0.8700136542320251, "logits/rejected": -0.8569374084472656, "logps/chosen": -211.89599609375, "logps/rejected": -241.67466735839844, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": 0.218109130859375, "rewards/margins": 4.4307451248168945, "rewards/rejected": -4.2126359939575195, "step": 2520 }, { "epoch": 0.56, "learning_rate": 8.4612677781515e-06, "logits/chosen": -1.160853624343872, "logits/rejected": -1.1524155139923096, "logps/chosen": -62.109745025634766, "logps/rejected": -79.65950012207031, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.9353908896446228, "rewards/margins": 2.1614041328430176, "rewards/rejected": -1.22601318359375, "step": 2521 }, { "epoch": 0.56, "learning_rate": 8.45997411442623e-06, "logits/chosen": -0.7204445004463196, "logits/rejected": -0.7528255581855774, "logps/chosen": -89.56587982177734, "logps/rejected": -100.77223205566406, "loss": 0.6066, "rewards/accuracies": 0.0, "rewards/chosen": -2.381990909576416, "rewards/margins": -0.8561692237854004, "rewards/rejected": -1.5258216857910156, "step": 2522 }, { "epoch": 0.56, "learning_rate": 8.458680006102602e-06, "logits/chosen": -1.2433687448501587, "logits/rejected": -0.5532752871513367, "logps/chosen": -33.12374496459961, "logps/rejected": -360.6954345703125, "loss": 0.5971, "rewards/accuracies": 1.0, "rewards/chosen": -2.511950969696045, "rewards/margins": 8.379043579101562, "rewards/rejected": -10.89099407196045, "step": 2523 }, { "epoch": 0.56, "learning_rate": 8.45738545334691e-06, "logits/chosen": -0.7237722873687744, "logits/rejected": -0.640440821647644, "logps/chosen": -94.5619125366211, "logps/rejected": -196.86688232421875, "loss": 0.0827, "rewards/accuracies": 1.0, "rewards/chosen": 0.28820496797561646, "rewards/margins": 1.731044054031372, "rewards/rejected": -1.4428390264511108, "step": 2524 }, { "epoch": 0.56, "learning_rate": 8.456090456325496e-06, "logits/chosen": -0.9828616976737976, "logits/rejected": -0.9828616976737976, "logps/chosen": -81.62040710449219, "logps/rejected": -81.62040710449219, "loss": 0.3469, "rewards/accuracies": 0.0, "rewards/chosen": -3.638920307159424, "rewards/margins": 0.0, "rewards/rejected": -3.638920307159424, "step": 2525 }, { "epoch": 0.56, "learning_rate": 8.454795015204767e-06, "logits/chosen": -1.1196845769882202, "logits/rejected": -1.1196845769882202, "logps/chosen": -52.07359313964844, "logps/rejected": -52.07359313964844, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -0.8056305050849915, "rewards/margins": 0.0, "rewards/rejected": -0.8056305050849915, "step": 2526 }, { "epoch": 0.56, "learning_rate": 8.453499130151183e-06, "logits/chosen": -1.4304968118667603, "logits/rejected": -1.4922502040863037, "logps/chosen": -131.5599365234375, "logps/rejected": -147.74462890625, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": 0.8786987662315369, "rewards/margins": 3.41361403465271, "rewards/rejected": -2.5349152088165283, "step": 2527 }, { "epoch": 0.56, "learning_rate": 8.452202801331265e-06, "logits/chosen": -0.8425068855285645, "logits/rejected": -0.8022116422653198, "logps/chosen": -59.98422622680664, "logps/rejected": -111.1926040649414, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": 0.8223102688789368, "rewards/margins": 3.627988815307617, "rewards/rejected": -2.805678606033325, "step": 2528 }, { "epoch": 0.56, "learning_rate": 8.450906028911585e-06, "logits/chosen": -0.6471381187438965, "logits/rejected": -0.6328882575035095, "logps/chosen": -142.1436767578125, "logps/rejected": -106.02247619628906, "loss": 0.5202, "rewards/accuracies": 0.0, "rewards/chosen": -2.3944091796875, "rewards/margins": -0.6011565923690796, "rewards/rejected": -1.7932525873184204, "step": 2529 }, { "epoch": 0.56, "learning_rate": 8.449608813058776e-06, "logits/chosen": -0.9605780839920044, "logits/rejected": -0.9307567477226257, "logps/chosen": -96.99627685546875, "logps/rejected": -155.5166778564453, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -1.5403717756271362, "rewards/margins": 5.304775238037109, "rewards/rejected": -6.845147132873535, "step": 2530 }, { "epoch": 0.56, "learning_rate": 8.448311153939527e-06, "logits/chosen": -1.0503113269805908, "logits/rejected": -0.9826313257217407, "logps/chosen": -140.82510375976562, "logps/rejected": -212.07785034179688, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.6917266845703125, "rewards/margins": 8.77830982208252, "rewards/rejected": -7.086583137512207, "step": 2531 }, { "epoch": 0.56, "learning_rate": 8.447013051720585e-06, "logits/chosen": -1.1046150922775269, "logits/rejected": -1.0808171033859253, "logps/chosen": -200.17230224609375, "logps/rejected": -269.50079345703125, "loss": 0.6588, "rewards/accuracies": 0.0, "rewards/chosen": -3.9352967739105225, "rewards/margins": -1.00602126121521, "rewards/rejected": -2.9292755126953125, "step": 2532 }, { "epoch": 0.56, "learning_rate": 8.445714506568751e-06, "logits/chosen": -1.092090368270874, "logits/rejected": -1.127500295639038, "logps/chosen": -176.69247436523438, "logps/rejected": -158.70904541015625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": 4.402462959289551, "rewards/margins": 9.193754196166992, "rewards/rejected": -4.7912917137146, "step": 2533 }, { "epoch": 0.56, "learning_rate": 8.444415518650887e-06, "logits/chosen": -1.0223033428192139, "logits/rejected": -1.036434292793274, "logps/chosen": -213.51963806152344, "logps/rejected": -176.46644592285156, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.285745233297348, "rewards/margins": 5.759772300720215, "rewards/rejected": -5.474027156829834, "step": 2534 }, { "epoch": 0.56, "learning_rate": 8.443116088133908e-06, "logits/chosen": -1.1020997762680054, "logits/rejected": -1.0993733406066895, "logps/chosen": -85.60069274902344, "logps/rejected": -76.51605224609375, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 2.5638749599456787, "rewards/margins": 5.909106731414795, "rewards/rejected": -3.345231771469116, "step": 2535 }, { "epoch": 0.56, "learning_rate": 8.44181621518479e-06, "logits/chosen": -1.306199550628662, "logits/rejected": -1.2971911430358887, "logps/chosen": -243.95370483398438, "logps/rejected": -373.379638671875, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": 1.0739105939865112, "rewards/margins": 12.431363105773926, "rewards/rejected": -11.357452392578125, "step": 2536 }, { "epoch": 0.56, "learning_rate": 8.440515899970561e-06, "logits/chosen": -0.6257851719856262, "logits/rejected": -0.6331552863121033, "logps/chosen": -94.76708984375, "logps/rejected": -72.5886001586914, "loss": 0.2767, "rewards/accuracies": 1.0, "rewards/chosen": -3.284691333770752, "rewards/margins": 0.5633246898651123, "rewards/rejected": -3.8480160236358643, "step": 2537 }, { "epoch": 0.56, "learning_rate": 8.43921514265831e-06, "logits/chosen": -1.1486138105392456, "logits/rejected": -1.1056581735610962, "logps/chosen": -105.1040267944336, "logps/rejected": -212.33465576171875, "loss": 0.0769, "rewards/accuracies": 1.0, "rewards/chosen": 1.0464423894882202, "rewards/margins": 5.715973854064941, "rewards/rejected": -4.669531345367432, "step": 2538 }, { "epoch": 0.56, "learning_rate": 8.437913943415181e-06, "logits/chosen": -1.1764965057373047, "logits/rejected": -1.2304891347885132, "logps/chosen": -185.55836486816406, "logps/rejected": -142.2139434814453, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7065353393554688, "rewards/margins": 5.750369548797607, "rewards/rejected": -6.456904888153076, "step": 2539 }, { "epoch": 0.56, "learning_rate": 8.436612302408376e-06, "logits/chosen": -1.0628536939620972, "logits/rejected": -1.0194157361984253, "logps/chosen": -152.69676208496094, "logps/rejected": -260.156982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.7652328610420227, "rewards/margins": 9.516876220703125, "rewards/rejected": -8.751643180847168, "step": 2540 }, { "epoch": 0.56, "learning_rate": 8.43531021980515e-06, "logits/chosen": -1.2451765537261963, "logits/rejected": -1.230446219444275, "logps/chosen": -104.04145812988281, "logps/rejected": -70.20156860351562, "loss": 0.5649, "rewards/accuracies": 0.0, "rewards/chosen": -0.49730148911476135, "rewards/margins": -0.7387703061103821, "rewards/rejected": 0.24146881699562073, "step": 2541 }, { "epoch": 0.56, "learning_rate": 8.434007695772819e-06, "logits/chosen": -1.1232496500015259, "logits/rejected": -1.1148159503936768, "logps/chosen": -184.91702270507812, "logps/rejected": -247.36399841308594, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 1.6879761219024658, "rewards/margins": 6.0234174728393555, "rewards/rejected": -4.335441589355469, "step": 2542 }, { "epoch": 0.56, "learning_rate": 8.432704730478756e-06, "logits/chosen": -0.593051016330719, "logits/rejected": -0.5703250765800476, "logps/chosen": -181.0389862060547, "logps/rejected": -142.4913330078125, "loss": 0.1018, "rewards/accuracies": 1.0, "rewards/chosen": 0.2240035980939865, "rewards/margins": 5.294126510620117, "rewards/rejected": -5.070122718811035, "step": 2543 }, { "epoch": 0.56, "learning_rate": 8.431401324090384e-06, "logits/chosen": -0.7620365619659424, "logits/rejected": -0.7298579812049866, "logps/chosen": -77.1591796875, "logps/rejected": -73.16024017333984, "loss": 0.0511, "rewards/accuracies": 1.0, "rewards/chosen": 0.8977562189102173, "rewards/margins": 2.2317757606506348, "rewards/rejected": -1.3340195417404175, "step": 2544 }, { "epoch": 0.56, "learning_rate": 8.430097476775194e-06, "logits/chosen": -0.823627233505249, "logits/rejected": -1.0316026210784912, "logps/chosen": -248.02377319335938, "logps/rejected": -241.41925048828125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.221383571624756, "rewards/margins": 7.759438991546631, "rewards/rejected": -11.980822563171387, "step": 2545 }, { "epoch": 0.56, "learning_rate": 8.428793188700722e-06, "logits/chosen": -1.0708231925964355, "logits/rejected": -1.0097877979278564, "logps/chosen": -78.68988800048828, "logps/rejected": -110.98318481445312, "loss": 0.2328, "rewards/accuracies": 1.0, "rewards/chosen": -0.13989029824733734, "rewards/margins": 0.5253356695175171, "rewards/rejected": -0.6652259826660156, "step": 2546 }, { "epoch": 0.56, "learning_rate": 8.427488460034567e-06, "logits/chosen": -1.1137007474899292, "logits/rejected": -1.0677114725112915, "logps/chosen": -186.37591552734375, "logps/rejected": -158.77044677734375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.752471923828125, "rewards/margins": 4.551543712615967, "rewards/rejected": -6.304015636444092, "step": 2547 }, { "epoch": 0.56, "learning_rate": 8.426183290944387e-06, "logits/chosen": -1.234501838684082, "logits/rejected": -1.2011446952819824, "logps/chosen": -115.19143676757812, "logps/rejected": -183.00634765625, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": -0.25878602266311646, "rewards/margins": 3.3508424758911133, "rewards/rejected": -3.609628438949585, "step": 2548 }, { "epoch": 0.56, "learning_rate": 8.424877681597889e-06, "logits/chosen": -1.0423147678375244, "logits/rejected": -1.0423147678375244, "logps/chosen": -105.73007202148438, "logps/rejected": -105.73007202148438, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.9864959716796875, "rewards/margins": 0.0, "rewards/rejected": -2.9864959716796875, "step": 2549 }, { "epoch": 0.56, "learning_rate": 8.423571632162843e-06, "logits/chosen": -0.9989978671073914, "logits/rejected": -0.9479172825813293, "logps/chosen": -85.3078842163086, "logps/rejected": -156.87911987304688, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": -0.5668525695800781, "rewards/margins": 4.555849552154541, "rewards/rejected": -5.122702121734619, "step": 2550 }, { "epoch": 0.56, "learning_rate": 8.422265142807071e-06, "logits/chosen": -0.936215877532959, "logits/rejected": -0.9091652631759644, "logps/chosen": -268.6652526855469, "logps/rejected": -184.92776489257812, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.6274185180664062, "rewards/margins": 6.417658805847168, "rewards/rejected": -8.045077323913574, "step": 2551 }, { "epoch": 0.56, "learning_rate": 8.420958213698455e-06, "logits/chosen": -1.211530327796936, "logits/rejected": -1.1797560453414917, "logps/chosen": -115.24406433105469, "logps/rejected": -67.71391296386719, "loss": 0.3812, "rewards/accuracies": 1.0, "rewards/chosen": 1.5628799200057983, "rewards/margins": 4.979402542114258, "rewards/rejected": -3.416522741317749, "step": 2552 }, { "epoch": 0.57, "learning_rate": 8.419650845004932e-06, "logits/chosen": -1.2811354398727417, "logits/rejected": -1.2049548625946045, "logps/chosen": -97.56719207763672, "logps/rejected": -203.82049560546875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.0205955505371094, "rewards/margins": 8.731819152832031, "rewards/rejected": -9.75241470336914, "step": 2553 }, { "epoch": 0.57, "learning_rate": 8.418343036894497e-06, "logits/chosen": -1.1031936407089233, "logits/rejected": -1.0545337200164795, "logps/chosen": -118.62248229980469, "logps/rejected": -228.71702575683594, "loss": 0.0536, "rewards/accuracies": 1.0, "rewards/chosen": 0.6536971926689148, "rewards/margins": 5.911782741546631, "rewards/rejected": -5.25808572769165, "step": 2554 }, { "epoch": 0.57, "learning_rate": 8.4170347895352e-06, "logits/chosen": -0.802756130695343, "logits/rejected": -0.8190150260925293, "logps/chosen": -45.651649475097656, "logps/rejected": -37.30525588989258, "loss": 1.5933, "rewards/accuracies": 0.0, "rewards/chosen": -2.6810476779937744, "rewards/margins": -0.7634319067001343, "rewards/rejected": -1.9176157712936401, "step": 2555 }, { "epoch": 0.57, "learning_rate": 8.415726103095146e-06, "logits/chosen": -0.9184556603431702, "logits/rejected": -0.8737136125564575, "logps/chosen": -84.04285430908203, "logps/rejected": -138.83584594726562, "loss": 0.1713, "rewards/accuracies": 1.0, "rewards/chosen": -2.4520058631896973, "rewards/margins": 1.1662757396697998, "rewards/rejected": -3.618281602859497, "step": 2556 }, { "epoch": 0.57, "learning_rate": 8.414416977742498e-06, "logits/chosen": -0.9433974623680115, "logits/rejected": -0.9638665318489075, "logps/chosen": -114.46035766601562, "logps/rejected": -137.4533233642578, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 0.24269257485866547, "rewards/margins": 4.879218101501465, "rewards/rejected": -4.636525630950928, "step": 2557 }, { "epoch": 0.57, "learning_rate": 8.413107413645477e-06, "logits/chosen": -0.7596645951271057, "logits/rejected": -0.7484292984008789, "logps/chosen": -54.03031921386719, "logps/rejected": -92.68932342529297, "loss": 0.0735, "rewards/accuracies": 1.0, "rewards/chosen": -2.0978381633758545, "rewards/margins": 1.8466770648956299, "rewards/rejected": -3.9445152282714844, "step": 2558 }, { "epoch": 0.57, "learning_rate": 8.411797410972358e-06, "logits/chosen": -1.0013760328292847, "logits/rejected": -0.986176073551178, "logps/chosen": -110.64361572265625, "logps/rejected": -209.73342895507812, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.6054710149765015, "rewards/margins": 4.406111717224121, "rewards/rejected": -6.011582851409912, "step": 2559 }, { "epoch": 0.57, "learning_rate": 8.410486969891475e-06, "logits/chosen": -0.9866787791252136, "logits/rejected": -0.9266942739486694, "logps/chosen": -293.78460693359375, "logps/rejected": -230.04147338867188, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.3560791015625, "rewards/margins": 11.233328819274902, "rewards/rejected": -12.589407920837402, "step": 2560 }, { "epoch": 0.57, "learning_rate": 8.409176090571214e-06, "logits/chosen": -0.8021902441978455, "logits/rejected": -0.7774296998977661, "logps/chosen": -79.1238784790039, "logps/rejected": -175.28012084960938, "loss": 0.3668, "rewards/accuracies": 1.0, "rewards/chosen": -0.9204940795898438, "rewards/margins": 3.186234951019287, "rewards/rejected": -4.106729030609131, "step": 2561 }, { "epoch": 0.57, "learning_rate": 8.40786477318002e-06, "logits/chosen": -0.9764621257781982, "logits/rejected": -0.9764621257781982, "logps/chosen": -55.3625602722168, "logps/rejected": -55.3625602722168, "loss": 0.3672, "rewards/accuracies": 0.0, "rewards/chosen": -3.157088041305542, "rewards/margins": 0.0, "rewards/rejected": -3.157088041305542, "step": 2562 }, { "epoch": 0.57, "learning_rate": 8.406553017886397e-06, "logits/chosen": -1.1763436794281006, "logits/rejected": -1.1520081758499146, "logps/chosen": -89.59535217285156, "logps/rejected": -159.19970703125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.9507904052734375, "rewards/margins": 6.577115058898926, "rewards/rejected": -7.527905464172363, "step": 2563 }, { "epoch": 0.57, "learning_rate": 8.405240824858898e-06, "logits/chosen": -0.7761600613594055, "logits/rejected": -0.7055301070213318, "logps/chosen": -83.66554260253906, "logps/rejected": -254.1302490234375, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": 0.22620239853858948, "rewards/margins": 3.47464919090271, "rewards/rejected": -3.2484467029571533, "step": 2564 }, { "epoch": 0.57, "learning_rate": 8.40392819426614e-06, "logits/chosen": -1.3298758268356323, "logits/rejected": -1.2776519060134888, "logps/chosen": -167.39041137695312, "logps/rejected": -214.91690063476562, "loss": 0.1916, "rewards/accuracies": 1.0, "rewards/chosen": -0.17709656059741974, "rewards/margins": 3.962454080581665, "rewards/rejected": -4.139550685882568, "step": 2565 }, { "epoch": 0.57, "learning_rate": 8.402615126276792e-06, "logits/chosen": -0.8062227368354797, "logits/rejected": -0.7908011078834534, "logps/chosen": -91.54684448242188, "logps/rejected": -116.70626831054688, "loss": 0.357, "rewards/accuracies": 1.0, "rewards/chosen": 0.7901886105537415, "rewards/margins": 3.85565185546875, "rewards/rejected": -3.0654633045196533, "step": 2566 }, { "epoch": 0.57, "learning_rate": 8.40130162105958e-06, "logits/chosen": -1.3833510875701904, "logits/rejected": -1.4121698141098022, "logps/chosen": -106.87060546875, "logps/rejected": -169.11856079101562, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -2.5840156078338623, "rewards/margins": 3.9553492069244385, "rewards/rejected": -6.539364814758301, "step": 2567 }, { "epoch": 0.57, "learning_rate": 8.399987678783285e-06, "logits/chosen": -1.3561458587646484, "logits/rejected": -1.4141396284103394, "logps/chosen": -119.71708679199219, "logps/rejected": -83.7939453125, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": 0.634149968624115, "rewards/margins": 2.733673572540283, "rewards/rejected": -2.0995235443115234, "step": 2568 }, { "epoch": 0.57, "learning_rate": 8.398673299616747e-06, "logits/chosen": -1.1056197881698608, "logits/rejected": -1.1605430841445923, "logps/chosen": -237.0891876220703, "logps/rejected": -125.49076080322266, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": 0.17366790771484375, "rewards/margins": 3.8503081798553467, "rewards/rejected": -3.676640272140503, "step": 2569 }, { "epoch": 0.57, "learning_rate": 8.397358483728861e-06, "logits/chosen": -1.4018361568450928, "logits/rejected": -1.3880348205566406, "logps/chosen": -110.64125061035156, "logps/rejected": -189.06423950195312, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.5072205066680908, "rewards/margins": 5.598496437072754, "rewards/rejected": -7.105716705322266, "step": 2570 }, { "epoch": 0.57, "learning_rate": 8.396043231288577e-06, "logits/chosen": -1.1114768981933594, "logits/rejected": -1.2027403116226196, "logps/chosen": -259.6518859863281, "logps/rejected": -157.19302368164062, "loss": 0.0387, "rewards/accuracies": 1.0, "rewards/chosen": -3.1658875942230225, "rewards/margins": 2.879408121109009, "rewards/rejected": -6.045295715332031, "step": 2571 }, { "epoch": 0.57, "learning_rate": 8.3947275424649e-06, "logits/chosen": -0.8936555981636047, "logits/rejected": -0.874697208404541, "logps/chosen": -170.215087890625, "logps/rejected": -314.86981201171875, "loss": 0.1251, "rewards/accuracies": 1.0, "rewards/chosen": 0.4352279603481293, "rewards/margins": 7.9181013107299805, "rewards/rejected": -7.482873439788818, "step": 2572 }, { "epoch": 0.57, "learning_rate": 8.393411417426895e-06, "logits/chosen": -0.650111198425293, "logits/rejected": -0.6136890053749084, "logps/chosen": -87.86697387695312, "logps/rejected": -190.88893127441406, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.15154419839382172, "rewards/margins": 7.211291313171387, "rewards/rejected": -7.059747219085693, "step": 2573 }, { "epoch": 0.57, "learning_rate": 8.392094856343682e-06, "logits/chosen": -1.1755541563034058, "logits/rejected": -1.7949070930480957, "logps/chosen": -57.873435974121094, "logps/rejected": -209.046875, "loss": 1.4315, "rewards/accuracies": 1.0, "rewards/chosen": -3.4634552001953125, "rewards/margins": 9.141122817993164, "rewards/rejected": -12.604578018188477, "step": 2574 }, { "epoch": 0.57, "learning_rate": 8.390777859384434e-06, "logits/chosen": -0.8851701617240906, "logits/rejected": -0.834522008895874, "logps/chosen": -226.63824462890625, "logps/rejected": -184.41366577148438, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": 1.9815704822540283, "rewards/margins": 6.6410980224609375, "rewards/rejected": -4.659527778625488, "step": 2575 }, { "epoch": 0.57, "learning_rate": 8.38946042671838e-06, "logits/chosen": -0.8640762567520142, "logits/rejected": -0.8187764286994934, "logps/chosen": -109.4375991821289, "logps/rejected": -209.127685546875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -2.6950478553771973, "rewards/margins": 5.152414798736572, "rewards/rejected": -7.8474626541137695, "step": 2576 }, { "epoch": 0.57, "learning_rate": 8.388142558514811e-06, "logits/chosen": -1.1955981254577637, "logits/rejected": -1.1664377450942993, "logps/chosen": -223.17355346679688, "logps/rejected": -292.09197998046875, "loss": 0.1798, "rewards/accuracies": 1.0, "rewards/chosen": 1.9816712141036987, "rewards/margins": 0.8701416254043579, "rewards/rejected": 1.1115295886993408, "step": 2577 }, { "epoch": 0.57, "learning_rate": 8.38682425494307e-06, "logits/chosen": -0.8246067762374878, "logits/rejected": -0.8246067762374878, "logps/chosen": -115.03887176513672, "logps/rejected": -115.03887176513672, "loss": 0.3495, "rewards/accuracies": 0.0, "rewards/chosen": -3.0368752479553223, "rewards/margins": 0.0, "rewards/rejected": -3.0368752479553223, "step": 2578 }, { "epoch": 0.57, "learning_rate": 8.38550551617255e-06, "logits/chosen": -1.0470526218414307, "logits/rejected": -1.1096787452697754, "logps/chosen": -152.25982666015625, "logps/rejected": -124.3802490234375, "loss": 1.3418, "rewards/accuracies": 0.0, "rewards/chosen": -4.580559730529785, "rewards/margins": -2.612874746322632, "rewards/rejected": -1.9676849842071533, "step": 2579 }, { "epoch": 0.57, "learning_rate": 8.384186342372711e-06, "logits/chosen": -0.7526061534881592, "logits/rejected": -0.6820511221885681, "logps/chosen": -246.52072143554688, "logps/rejected": -282.11102294921875, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/chosen": 0.30147552490234375, "rewards/margins": 2.4897537231445312, "rewards/rejected": -2.1882781982421875, "step": 2580 }, { "epoch": 0.57, "learning_rate": 8.382866733713064e-06, "logits/chosen": -0.5887575149536133, "logits/rejected": -0.6380473971366882, "logps/chosen": -152.30355834960938, "logps/rejected": -106.4281005859375, "loss": 0.8269, "rewards/accuracies": 0.0, "rewards/chosen": -3.3157143592834473, "rewards/margins": -1.440792202949524, "rewards/rejected": -1.8749221563339233, "step": 2581 }, { "epoch": 0.57, "learning_rate": 8.381546690363174e-06, "logits/chosen": -1.1026188135147095, "logits/rejected": -0.63776034116745, "logps/chosen": -122.10053253173828, "logps/rejected": -529.7506713867188, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -0.7882453799247742, "rewards/margins": 27.86549186706543, "rewards/rejected": -28.653738021850586, "step": 2582 }, { "epoch": 0.57, "learning_rate": 8.380226212492661e-06, "logits/chosen": -0.8686937093734741, "logits/rejected": -0.7668134570121765, "logps/chosen": -246.8651123046875, "logps/rejected": -474.0679626464844, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": 2.363476514816284, "rewards/margins": 9.540863037109375, "rewards/rejected": -7.17738676071167, "step": 2583 }, { "epoch": 0.57, "learning_rate": 8.378905300271207e-06, "logits/chosen": -1.025416612625122, "logits/rejected": -1.7148269414901733, "logps/chosen": -115.21728515625, "logps/rejected": -122.17217254638672, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.2394638061523438, "rewards/margins": 4.952266693115234, "rewards/rejected": -6.191730499267578, "step": 2584 }, { "epoch": 0.57, "learning_rate": 8.377583953868545e-06, "logits/chosen": -0.9830458164215088, "logits/rejected": -0.9424386620521545, "logps/chosen": -119.96253967285156, "logps/rejected": -201.88633728027344, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -2.9548966884613037, "rewards/margins": 3.3194854259490967, "rewards/rejected": -6.2743821144104, "step": 2585 }, { "epoch": 0.57, "learning_rate": 8.376262173454464e-06, "logits/chosen": -0.9921661019325256, "logits/rejected": -0.9547011852264404, "logps/chosen": -215.1217803955078, "logps/rejected": -246.2819366455078, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": 0.7049087882041931, "rewards/margins": 4.383186340332031, "rewards/rejected": -3.6782777309417725, "step": 2586 }, { "epoch": 0.57, "learning_rate": 8.374939959198809e-06, "logits/chosen": -1.2648515701293945, "logits/rejected": -1.3579444885253906, "logps/chosen": -223.96826171875, "logps/rejected": -121.91197204589844, "loss": 1.2102, "rewards/accuracies": 1.0, "rewards/chosen": 0.5376068353652954, "rewards/margins": 2.5677924156188965, "rewards/rejected": -2.0301856994628906, "step": 2587 }, { "epoch": 0.57, "learning_rate": 8.373617311271483e-06, "logits/chosen": -0.9955005049705505, "logits/rejected": -0.42632994055747986, "logps/chosen": -189.4569549560547, "logps/rejected": -262.9699401855469, "loss": 0.1427, "rewards/accuracies": 1.0, "rewards/chosen": 3.1549148559570312, "rewards/margins": 10.8366060256958, "rewards/rejected": -7.6816911697387695, "step": 2588 }, { "epoch": 0.57, "learning_rate": 8.372294229842442e-06, "logits/chosen": -0.7657585740089417, "logits/rejected": -0.7598947286605835, "logps/chosen": -31.428638458251953, "logps/rejected": -74.68547058105469, "loss": 0.3965, "rewards/accuracies": 0.0, "rewards/chosen": -0.639792263507843, "rewards/margins": -0.19068947434425354, "rewards/rejected": -0.4491027891635895, "step": 2589 }, { "epoch": 0.57, "learning_rate": 8.3709707150817e-06, "logits/chosen": -0.7390019297599792, "logits/rejected": -0.6960142850875854, "logps/chosen": -191.955078125, "logps/rejected": -215.1209259033203, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.699188232421875, "rewards/margins": 6.956958293914795, "rewards/rejected": -7.65614652633667, "step": 2590 }, { "epoch": 0.57, "learning_rate": 8.369646767159325e-06, "logits/chosen": -0.7981321811676025, "logits/rejected": -0.8063170909881592, "logps/chosen": -164.9586639404297, "logps/rejected": -136.11767578125, "loss": 3.1711, "rewards/accuracies": 0.0, "rewards/chosen": -8.42405891418457, "rewards/margins": -6.339748382568359, "rewards/rejected": -2.084310293197632, "step": 2591 }, { "epoch": 0.57, "learning_rate": 8.36832238624544e-06, "logits/chosen": -0.5698899030685425, "logits/rejected": -0.6114515066146851, "logps/chosen": -174.77818298339844, "logps/rejected": -94.10176849365234, "loss": 0.0501, "rewards/accuracies": 1.0, "rewards/chosen": -0.2565780580043793, "rewards/margins": 2.298387289047241, "rewards/rejected": -2.5549652576446533, "step": 2592 }, { "epoch": 0.57, "learning_rate": 8.366997572510228e-06, "logits/chosen": -0.7841475009918213, "logits/rejected": -0.7841475009918213, "logps/chosen": -123.94218444824219, "logps/rejected": -123.94218444824219, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -6.018919944763184, "rewards/margins": 0.0, "rewards/rejected": -6.018919944763184, "step": 2593 }, { "epoch": 0.57, "learning_rate": 8.365672326123918e-06, "logits/chosen": -1.120444655418396, "logits/rejected": -1.0733025074005127, "logps/chosen": -104.53987884521484, "logps/rejected": -198.20079040527344, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.8058929443359375, "rewards/margins": 5.255682468414307, "rewards/rejected": -6.061575412750244, "step": 2594 }, { "epoch": 0.57, "learning_rate": 8.364346647256808e-06, "logits/chosen": -0.8256741762161255, "logits/rejected": -0.7984200119972229, "logps/chosen": -163.06741333007812, "logps/rejected": -155.4136962890625, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": 1.9300110340118408, "rewards/margins": 7.028413772583008, "rewards/rejected": -5.098402500152588, "step": 2595 }, { "epoch": 0.57, "learning_rate": 8.36302053607924e-06, "logits/chosen": -1.0548373460769653, "logits/rejected": -1.043192982673645, "logps/chosen": -65.10308837890625, "logps/rejected": -61.81549072265625, "loss": 0.0759, "rewards/accuracies": 1.0, "rewards/chosen": -0.1878807097673416, "rewards/margins": 1.8542373180389404, "rewards/rejected": -2.0421180725097656, "step": 2596 }, { "epoch": 0.57, "learning_rate": 8.361693992761617e-06, "logits/chosen": -0.8686192631721497, "logits/rejected": -0.7932263612747192, "logps/chosen": -91.64051818847656, "logps/rejected": -151.62982177734375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 0.9932388663291931, "rewards/margins": 7.26264762878418, "rewards/rejected": -6.269408702850342, "step": 2597 }, { "epoch": 0.58, "learning_rate": 8.360367017474398e-06, "logits/chosen": -0.99009108543396, "logits/rejected": -0.99009108543396, "logps/chosen": -179.2823486328125, "logps/rejected": -179.2823486328125, "loss": 0.3726, "rewards/accuracies": 0.0, "rewards/chosen": -8.885385513305664, "rewards/margins": 0.0, "rewards/rejected": -8.885385513305664, "step": 2598 }, { "epoch": 0.58, "learning_rate": 8.359039610388096e-06, "logits/chosen": -0.7332204580307007, "logits/rejected": -0.7332204580307007, "logps/chosen": -207.92864990234375, "logps/rejected": -207.92864990234375, "loss": 0.3637, "rewards/accuracies": 0.0, "rewards/chosen": -4.095288276672363, "rewards/margins": 0.0, "rewards/rejected": -4.095288276672363, "step": 2599 }, { "epoch": 0.58, "learning_rate": 8.357711771673278e-06, "logits/chosen": -0.6359648704528809, "logits/rejected": -0.6076613068580627, "logps/chosen": -124.22938537597656, "logps/rejected": -118.9794692993164, "loss": 0.2489, "rewards/accuracies": 1.0, "rewards/chosen": -1.86339271068573, "rewards/margins": 0.5458251237869263, "rewards/rejected": -2.4092178344726562, "step": 2600 }, { "epoch": 0.58, "learning_rate": 8.35638350150057e-06, "logits/chosen": -0.9058758616447449, "logits/rejected": -0.8698660135269165, "logps/chosen": -89.44091796875, "logps/rejected": -63.100555419921875, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -0.01741638220846653, "rewards/margins": 3.268397092819214, "rewards/rejected": -3.285813570022583, "step": 2601 }, { "epoch": 0.58, "learning_rate": 8.35505480004065e-06, "logits/chosen": -0.8179123401641846, "logits/rejected": -0.8370795845985413, "logps/chosen": -256.10028076171875, "logps/rejected": -209.97279357910156, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": 3.37628173828125, "rewards/margins": 4.0688157081604, "rewards/rejected": -0.6925339102745056, "step": 2602 }, { "epoch": 0.58, "learning_rate": 8.353725667464254e-06, "logits/chosen": -0.7323707938194275, "logits/rejected": -0.7253000140190125, "logps/chosen": -71.38197326660156, "logps/rejected": -90.4455795288086, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -1.7440681457519531, "rewards/margins": 2.798084259033203, "rewards/rejected": -4.542152404785156, "step": 2603 }, { "epoch": 0.58, "learning_rate": 8.352396103942171e-06, "logits/chosen": -0.740638256072998, "logits/rejected": -0.7553285360336304, "logps/chosen": -176.95846557617188, "logps/rejected": -199.4325714111328, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -0.3621887266635895, "rewards/margins": 4.948002815246582, "rewards/rejected": -5.310191631317139, "step": 2604 }, { "epoch": 0.58, "learning_rate": 8.351066109645248e-06, "logits/chosen": -0.8509075045585632, "logits/rejected": -0.8650102615356445, "logps/chosen": -177.07574462890625, "logps/rejected": -185.3011016845703, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": 1.0691741704940796, "rewards/margins": 9.02392292022705, "rewards/rejected": -7.954748630523682, "step": 2605 }, { "epoch": 0.58, "learning_rate": 8.349735684744385e-06, "logits/chosen": -0.9664254188537598, "logits/rejected": -1.0031148195266724, "logps/chosen": -72.14736938476562, "logps/rejected": -128.1200714111328, "loss": 0.1204, "rewards/accuracies": 1.0, "rewards/chosen": 1.1223633289337158, "rewards/margins": 4.155059814453125, "rewards/rejected": -3.032696485519409, "step": 2606 }, { "epoch": 0.58, "learning_rate": 8.34840482941054e-06, "logits/chosen": -0.8328692317008972, "logits/rejected": -0.7427219152450562, "logps/chosen": -106.85488891601562, "logps/rejected": -198.0551300048828, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 0.5046135187149048, "rewards/margins": 6.242940425872803, "rewards/rejected": -5.7383270263671875, "step": 2607 }, { "epoch": 0.58, "learning_rate": 8.347073543814723e-06, "logits/chosen": -0.6724762916564941, "logits/rejected": -0.6615335941314697, "logps/chosen": -74.25717163085938, "logps/rejected": -98.06001281738281, "loss": 0.1813, "rewards/accuracies": 1.0, "rewards/chosen": 0.11098556965589523, "rewards/margins": 1.584967017173767, "rewards/rejected": -1.473981499671936, "step": 2608 }, { "epoch": 0.58, "learning_rate": 8.345741828128003e-06, "logits/chosen": -0.9795160889625549, "logits/rejected": -1.0021756887435913, "logps/chosen": -85.31477355957031, "logps/rejected": -105.08929443359375, "loss": 0.0755, "rewards/accuracies": 1.0, "rewards/chosen": -0.027487946674227715, "rewards/margins": 1.8153587579727173, "rewards/rejected": -1.8428467512130737, "step": 2609 }, { "epoch": 0.58, "learning_rate": 8.344409682521499e-06, "logits/chosen": -1.359965443611145, "logits/rejected": -1.3766279220581055, "logps/chosen": -109.0085678100586, "logps/rejected": -147.46115112304688, "loss": 0.0869, "rewards/accuracies": 1.0, "rewards/chosen": -0.5823158621788025, "rewards/margins": 1.9485504627227783, "rewards/rejected": -2.5308663845062256, "step": 2610 }, { "epoch": 0.58, "learning_rate": 8.343077107166394e-06, "logits/chosen": -0.693291425704956, "logits/rejected": -0.6342312693595886, "logps/chosen": -75.2099838256836, "logps/rejected": -62.324554443359375, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": 0.22684097290039062, "rewards/margins": 3.517503499984741, "rewards/rejected": -3.2906625270843506, "step": 2611 }, { "epoch": 0.58, "learning_rate": 8.341744102233916e-06, "logits/chosen": -0.7780846357345581, "logits/rejected": -0.7474269866943359, "logps/chosen": -89.03025817871094, "logps/rejected": -168.2032470703125, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.32512208819389343, "rewards/margins": 4.9952073097229, "rewards/rejected": -5.320329189300537, "step": 2612 }, { "epoch": 0.58, "learning_rate": 8.340410667895352e-06, "logits/chosen": -0.7300792336463928, "logits/rejected": -0.6679178476333618, "logps/chosen": -44.49544906616211, "logps/rejected": -137.09962463378906, "loss": 0.142, "rewards/accuracies": 1.0, "rewards/chosen": 0.3335948884487152, "rewards/margins": 6.294833183288574, "rewards/rejected": -5.961238384246826, "step": 2613 }, { "epoch": 0.58, "learning_rate": 8.339076804322048e-06, "logits/chosen": -1.3229804039001465, "logits/rejected": -1.2876890897750854, "logps/chosen": -64.55091857910156, "logps/rejected": -153.99026489257812, "loss": 0.0646, "rewards/accuracies": 1.0, "rewards/chosen": -1.0763405561447144, "rewards/margins": 4.392378330230713, "rewards/rejected": -5.468719005584717, "step": 2614 }, { "epoch": 0.58, "learning_rate": 8.337742511685403e-06, "logits/chosen": -1.082817554473877, "logits/rejected": -1.053131341934204, "logps/chosen": -193.50880432128906, "logps/rejected": -281.5057373046875, "loss": 0.1546, "rewards/accuracies": 1.0, "rewards/chosen": 1.5582932233810425, "rewards/margins": 4.293609619140625, "rewards/rejected": -2.735316514968872, "step": 2615 }, { "epoch": 0.58, "learning_rate": 8.336407790156868e-06, "logits/chosen": -0.6501949429512024, "logits/rejected": -0.5999951958656311, "logps/chosen": -142.1304931640625, "logps/rejected": -212.26956176757812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 1.4698609113693237, "rewards/margins": 8.947578430175781, "rewards/rejected": -7.477717876434326, "step": 2616 }, { "epoch": 0.58, "learning_rate": 8.335072639907953e-06, "logits/chosen": -0.5117473006248474, "logits/rejected": -0.45008009672164917, "logps/chosen": -164.33087158203125, "logps/rejected": -238.80763244628906, "loss": 0.0885, "rewards/accuracies": 1.0, "rewards/chosen": -1.6283172369003296, "rewards/margins": 1.9111648797988892, "rewards/rejected": -3.5394821166992188, "step": 2617 }, { "epoch": 0.58, "learning_rate": 8.33373706111022e-06, "logits/chosen": -1.056566834449768, "logits/rejected": -1.1054620742797852, "logps/chosen": -189.66014099121094, "logps/rejected": -332.06005859375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 2.774893283843994, "rewards/margins": 10.956165313720703, "rewards/rejected": -8.18127155303955, "step": 2618 }, { "epoch": 0.58, "learning_rate": 8.332401053935288e-06, "logits/chosen": -0.9498113989830017, "logits/rejected": -0.9324077367782593, "logps/chosen": -128.82899475097656, "logps/rejected": -150.31161499023438, "loss": 1.5388, "rewards/accuracies": 0.0, "rewards/chosen": -4.802040100097656, "rewards/margins": -3.0304975509643555, "rewards/rejected": -1.7715424299240112, "step": 2619 }, { "epoch": 0.58, "learning_rate": 8.331064618554834e-06, "logits/chosen": -0.8415842056274414, "logits/rejected": -0.8598897457122803, "logps/chosen": -80.1010513305664, "logps/rejected": -65.16056823730469, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/chosen": -0.7215934991836548, "rewards/margins": 2.562574863433838, "rewards/rejected": -3.284168243408203, "step": 2620 }, { "epoch": 0.58, "learning_rate": 8.329727755140584e-06, "logits/chosen": -1.1700409650802612, "logits/rejected": -1.2050657272338867, "logps/chosen": -102.2368392944336, "logps/rejected": -143.48922729492188, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.9244338870048523, "rewards/margins": 2.4841911792755127, "rewards/rejected": -1.5597572326660156, "step": 2621 }, { "epoch": 0.58, "learning_rate": 8.32839046386432e-06, "logits/chosen": -0.8587172031402588, "logits/rejected": -0.839002788066864, "logps/chosen": -109.67936706542969, "logps/rejected": -173.77976989746094, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.9886871576309204, "rewards/margins": 5.805327892303467, "rewards/rejected": -6.794014930725098, "step": 2622 }, { "epoch": 0.58, "learning_rate": 8.327052744897883e-06, "logits/chosen": -1.0607571601867676, "logits/rejected": -1.0585951805114746, "logps/chosen": -141.70980834960938, "logps/rejected": -178.85806274414062, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -1.9818443059921265, "rewards/margins": 2.9995245933532715, "rewards/rejected": -4.9813690185546875, "step": 2623 }, { "epoch": 0.58, "learning_rate": 8.325714598413169e-06, "logits/chosen": -1.18288254737854, "logits/rejected": -1.1617915630340576, "logps/chosen": -101.86726379394531, "logps/rejected": -135.33253479003906, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.35506439208984375, "rewards/margins": 5.891362190246582, "rewards/rejected": -5.536297798156738, "step": 2624 }, { "epoch": 0.58, "learning_rate": 8.32437602458212e-06, "logits/chosen": -1.0993633270263672, "logits/rejected": -1.1082842350006104, "logps/chosen": -74.01507568359375, "logps/rejected": -87.34480285644531, "loss": 0.0635, "rewards/accuracies": 1.0, "rewards/chosen": 0.1690933257341385, "rewards/margins": 2.0019021034240723, "rewards/rejected": -1.8328087329864502, "step": 2625 }, { "epoch": 0.58, "learning_rate": 8.323037023576745e-06, "logits/chosen": -1.697037696838379, "logits/rejected": -1.813599705696106, "logps/chosen": -131.92181396484375, "logps/rejected": -64.0209732055664, "loss": 1.1325, "rewards/accuracies": 0.0, "rewards/chosen": -5.68927526473999, "rewards/margins": -2.1551027297973633, "rewards/rejected": -3.534172534942627, "step": 2626 }, { "epoch": 0.58, "learning_rate": 8.3216975955691e-06, "logits/chosen": -0.7755037546157837, "logits/rejected": -0.7741696238517761, "logps/chosen": -65.91717529296875, "logps/rejected": -159.07861328125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.12664948403835297, "rewards/margins": 5.257587432861328, "rewards/rejected": -5.384236812591553, "step": 2627 }, { "epoch": 0.58, "learning_rate": 8.320357740731302e-06, "logits/chosen": -0.7609901428222656, "logits/rejected": -0.7076136469841003, "logps/chosen": -215.7936248779297, "logps/rejected": -190.92819213867188, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": 0.01539459265768528, "rewards/margins": 2.8302018642425537, "rewards/rejected": -2.814807176589966, "step": 2628 }, { "epoch": 0.58, "learning_rate": 8.319017459235515e-06, "logits/chosen": -0.8344241380691528, "logits/rejected": -0.7617362141609192, "logps/chosen": -101.23307800292969, "logps/rejected": -226.6536102294922, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.12818603217601776, "rewards/margins": 5.670994281768799, "rewards/rejected": -5.799180507659912, "step": 2629 }, { "epoch": 0.58, "learning_rate": 8.317676751253961e-06, "logits/chosen": -1.046106219291687, "logits/rejected": -1.0237305164337158, "logps/chosen": -109.3730697631836, "logps/rejected": -134.7858428955078, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.2663078308105469, "rewards/margins": 5.7250871658325195, "rewards/rejected": -5.458779335021973, "step": 2630 }, { "epoch": 0.58, "learning_rate": 8.316335616958922e-06, "logits/chosen": -1.0400385856628418, "logits/rejected": -0.5676335096359253, "logps/chosen": -72.12201690673828, "logps/rejected": -405.00604248046875, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": 0.26394423842430115, "rewards/margins": 35.3955192565918, "rewards/rejected": -35.13157653808594, "step": 2631 }, { "epoch": 0.58, "learning_rate": 8.314994056522727e-06, "logits/chosen": -0.8873751759529114, "logits/rejected": -0.8035866022109985, "logps/chosen": -184.10345458984375, "logps/rejected": -340.1991271972656, "loss": 0.0886, "rewards/accuracies": 1.0, "rewards/chosen": 1.2371612787246704, "rewards/margins": 3.3400330543518066, "rewards/rejected": -2.1028716564178467, "step": 2632 }, { "epoch": 0.58, "learning_rate": 8.313652070117765e-06, "logits/chosen": -0.7959468960762024, "logits/rejected": -0.8312019109725952, "logps/chosen": -212.709228515625, "logps/rejected": -122.99974060058594, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": 0.6382339596748352, "rewards/margins": 3.02764892578125, "rewards/rejected": -2.3894150257110596, "step": 2633 }, { "epoch": 0.58, "learning_rate": 8.31230965791648e-06, "logits/chosen": -1.082327127456665, "logits/rejected": -1.190830111503601, "logps/chosen": -296.80682373046875, "logps/rejected": -164.84300231933594, "loss": 0.052, "rewards/accuracies": 1.0, "rewards/chosen": -1.024469017982483, "rewards/margins": 3.265315532684326, "rewards/rejected": -4.2897844314575195, "step": 2634 }, { "epoch": 0.58, "learning_rate": 8.310966820091364e-06, "logits/chosen": -0.8849443197250366, "logits/rejected": -0.862571120262146, "logps/chosen": -155.18096923828125, "logps/rejected": -106.73797607421875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.9427642822265625, "rewards/margins": 5.494760036468506, "rewards/rejected": -4.551995754241943, "step": 2635 }, { "epoch": 0.58, "learning_rate": 8.309623556814972e-06, "logits/chosen": -1.1736164093017578, "logits/rejected": -1.1293830871582031, "logps/chosen": -122.00572204589844, "logps/rejected": -177.69534301757812, "loss": 0.3208, "rewards/accuracies": 1.0, "rewards/chosen": 1.1061325073242188, "rewards/margins": 3.2921738624572754, "rewards/rejected": -2.1860413551330566, "step": 2636 }, { "epoch": 0.58, "learning_rate": 8.30827986825991e-06, "logits/chosen": -0.8028842806816101, "logits/rejected": -0.7049663662910461, "logps/chosen": -85.4677734375, "logps/rejected": -168.57510375976562, "loss": 0.3475, "rewards/accuracies": 1.0, "rewards/chosen": -1.5997298955917358, "rewards/margins": 6.34372615814209, "rewards/rejected": -7.943456172943115, "step": 2637 }, { "epoch": 0.58, "learning_rate": 8.306935754598838e-06, "logits/chosen": -1.068651556968689, "logits/rejected": -1.0905344486236572, "logps/chosen": -94.97528076171875, "logps/rejected": -98.10932922363281, "loss": 0.0616, "rewards/accuracies": 1.0, "rewards/chosen": -0.267507940530777, "rewards/margins": 2.0566062927246094, "rewards/rejected": -2.3241143226623535, "step": 2638 }, { "epoch": 0.58, "learning_rate": 8.305591216004468e-06, "logits/chosen": -0.7712838053703308, "logits/rejected": -0.7586860656738281, "logps/chosen": -138.395263671875, "logps/rejected": -134.49481201171875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": 1.2404892444610596, "rewards/margins": 6.398404121398926, "rewards/rejected": -5.157914638519287, "step": 2639 }, { "epoch": 0.58, "learning_rate": 8.304246252649574e-06, "logits/chosen": -1.0690356492996216, "logits/rejected": -1.0690356492996216, "logps/chosen": -99.50634765625, "logps/rejected": -99.50634765625, "loss": 0.349, "rewards/accuracies": 0.0, "rewards/chosen": -0.8479202389717102, "rewards/margins": 0.0, "rewards/rejected": -0.8479202389717102, "step": 2640 }, { "epoch": 0.58, "learning_rate": 8.302900864706982e-06, "logits/chosen": -0.85789954662323, "logits/rejected": -0.7307518124580383, "logps/chosen": -172.44192504882812, "logps/rejected": -514.3438720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9147995710372925, "rewards/margins": 39.64313507080078, "rewards/rejected": -41.55793380737305, "step": 2641 }, { "epoch": 0.58, "learning_rate": 8.301555052349567e-06, "logits/chosen": -1.080891728401184, "logits/rejected": -1.080891728401184, "logps/chosen": -197.494873046875, "logps/rejected": -197.494873046875, "loss": 0.3477, "rewards/accuracies": 0.0, "rewards/chosen": -5.856269836425781, "rewards/margins": 0.0, "rewards/rejected": -5.856269836425781, "step": 2642 }, { "epoch": 0.58, "learning_rate": 8.300208815750266e-06, "logits/chosen": -0.7158429026603699, "logits/rejected": -0.6194645762443542, "logps/chosen": -216.1409454345703, "logps/rejected": -145.66732788085938, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 2.3200790882110596, "rewards/margins": 8.465555191040039, "rewards/rejected": -6.1454758644104, "step": 2643 }, { "epoch": 0.59, "learning_rate": 8.298862155082065e-06, "logits/chosen": -1.0193127393722534, "logits/rejected": -1.0312563180923462, "logps/chosen": -137.60704040527344, "logps/rejected": -131.74072265625, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": -2.959369659423828, "rewards/margins": 2.947152614593506, "rewards/rejected": -5.906522274017334, "step": 2644 }, { "epoch": 0.59, "learning_rate": 8.297515070518008e-06, "logits/chosen": -0.6485093235969543, "logits/rejected": -0.6485093235969543, "logps/chosen": -109.29013061523438, "logps/rejected": -109.29013061523438, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.0864243507385254, "rewards/margins": 0.0, "rewards/rejected": -2.0864243507385254, "step": 2645 }, { "epoch": 0.59, "learning_rate": 8.296167562231192e-06, "logits/chosen": -0.9589677453041077, "logits/rejected": -0.9268327355384827, "logps/chosen": -89.45167541503906, "logps/rejected": -50.51978302001953, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": 0.9566559195518494, "rewards/margins": 4.034880638122559, "rewards/rejected": -3.0782246589660645, "step": 2646 }, { "epoch": 0.59, "learning_rate": 8.294819630394767e-06, "logits/chosen": -0.7147660255432129, "logits/rejected": -0.6301295161247253, "logps/chosen": -106.45729064941406, "logps/rejected": -200.6938934326172, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.4348091185092926, "rewards/margins": 8.631763458251953, "rewards/rejected": -9.066572189331055, "step": 2647 }, { "epoch": 0.59, "learning_rate": 8.293471275181938e-06, "logits/chosen": -0.9010039567947388, "logits/rejected": -0.9247479438781738, "logps/chosen": -100.69762420654297, "logps/rejected": -96.90440368652344, "loss": 0.2904, "rewards/accuracies": 1.0, "rewards/chosen": 0.279000848531723, "rewards/margins": 0.23915404081344604, "rewards/rejected": 0.03984680399298668, "step": 2648 }, { "epoch": 0.59, "learning_rate": 8.292122496765969e-06, "logits/chosen": -0.9979677200317383, "logits/rejected": -0.9265299439430237, "logps/chosen": -167.15011596679688, "logps/rejected": -246.11050415039062, "loss": 0.9763, "rewards/accuracies": 0.0, "rewards/chosen": -2.7117981910705566, "rewards/margins": -1.7992050647735596, "rewards/rejected": -0.9125930666923523, "step": 2649 }, { "epoch": 0.59, "learning_rate": 8.290773295320173e-06, "logits/chosen": -0.8493028283119202, "logits/rejected": -0.8034422993659973, "logps/chosen": -193.36875915527344, "logps/rejected": -160.9654541015625, "loss": 0.4703, "rewards/accuracies": 1.0, "rewards/chosen": -3.9873015880584717, "rewards/margins": 2.050518274307251, "rewards/rejected": -6.037819862365723, "step": 2650 }, { "epoch": 0.59, "learning_rate": 8.28942367101792e-06, "logits/chosen": -0.9703479409217834, "logits/rejected": -0.6385498642921448, "logps/chosen": -168.90892028808594, "logps/rejected": -561.130126953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.9640121459960938, "rewards/margins": 46.30654525756836, "rewards/rejected": -45.342533111572266, "step": 2651 }, { "epoch": 0.59, "learning_rate": 8.288073624032634e-06, "logits/chosen": -1.1021435260772705, "logits/rejected": -1.0890681743621826, "logps/chosen": -139.0505828857422, "logps/rejected": -174.87416076660156, "loss": 0.4615, "rewards/accuracies": 1.0, "rewards/chosen": -1.8888847827911377, "rewards/margins": 0.859194278717041, "rewards/rejected": -2.7480790615081787, "step": 2652 }, { "epoch": 0.59, "learning_rate": 8.28672315453779e-06, "logits/chosen": -0.8657508492469788, "logits/rejected": -0.4407566785812378, "logps/chosen": -169.87338256835938, "logps/rejected": -351.4723205566406, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 2.268627882003784, "rewards/margins": 25.154634475708008, "rewards/rejected": -22.88600730895996, "step": 2653 }, { "epoch": 0.59, "learning_rate": 8.285372262706922e-06, "logits/chosen": -1.3699584007263184, "logits/rejected": -1.363379955291748, "logps/chosen": -115.39018249511719, "logps/rejected": -119.41381072998047, "loss": 0.6007, "rewards/accuracies": 0.0, "rewards/chosen": -0.5262252688407898, "rewards/margins": -0.8427482843399048, "rewards/rejected": 0.3165229856967926, "step": 2654 }, { "epoch": 0.59, "learning_rate": 8.284020948713615e-06, "logits/chosen": -0.8876053690910339, "logits/rejected": -1.0113449096679688, "logps/chosen": -196.4576416015625, "logps/rejected": -167.49618530273438, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 1.2927764654159546, "rewards/margins": 3.932187080383301, "rewards/rejected": -2.6394104957580566, "step": 2655 }, { "epoch": 0.59, "learning_rate": 8.282669212731511e-06, "logits/chosen": -0.9443441033363342, "logits/rejected": -0.9698641896247864, "logps/chosen": -172.89599609375, "logps/rejected": -351.39093017578125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 3.6679444313049316, "rewards/margins": 17.168771743774414, "rewards/rejected": -13.500826835632324, "step": 2656 }, { "epoch": 0.59, "learning_rate": 8.281317054934306e-06, "logits/chosen": -0.9950684905052185, "logits/rejected": -0.9950684905052185, "logps/chosen": -107.705078125, "logps/rejected": -107.705078125, "loss": 0.3472, "rewards/accuracies": 0.0, "rewards/chosen": -2.989314317703247, "rewards/margins": 0.0, "rewards/rejected": -2.989314317703247, "step": 2657 }, { "epoch": 0.59, "learning_rate": 8.279964475495745e-06, "logits/chosen": -0.7846241593360901, "logits/rejected": -0.7868412733078003, "logps/chosen": -145.183837890625, "logps/rejected": -158.7286834716797, "loss": 0.8804, "rewards/accuracies": 0.0, "rewards/chosen": -5.686673164367676, "rewards/margins": -1.5704011917114258, "rewards/rejected": -4.11627197265625, "step": 2658 }, { "epoch": 0.59, "learning_rate": 8.278611474589635e-06, "logits/chosen": -1.0155127048492432, "logits/rejected": -0.9840583801269531, "logps/chosen": -96.39935302734375, "logps/rejected": -155.9309844970703, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": 0.46158066391944885, "rewards/margins": 3.6340432167053223, "rewards/rejected": -3.1724624633789062, "step": 2659 }, { "epoch": 0.59, "learning_rate": 8.277258052389834e-06, "logits/chosen": -0.7732348442077637, "logits/rejected": -0.7366955876350403, "logps/chosen": -125.40591430664062, "logps/rejected": -132.55612182617188, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -2.039963483810425, "rewards/margins": 3.9132797718048096, "rewards/rejected": -5.953243255615234, "step": 2660 }, { "epoch": 0.59, "learning_rate": 8.27590420907025e-06, "logits/chosen": -0.9449440836906433, "logits/rejected": -0.8471452593803406, "logps/chosen": -67.8604736328125, "logps/rejected": -166.90463256835938, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": 1.1927025318145752, "rewards/margins": 8.10507583618164, "rewards/rejected": -6.9123735427856445, "step": 2661 }, { "epoch": 0.59, "learning_rate": 8.27454994480485e-06, "logits/chosen": -0.6389173865318298, "logits/rejected": -0.5824055671691895, "logps/chosen": -74.19526672363281, "logps/rejected": -114.0478515625, "loss": 0.0783, "rewards/accuracies": 1.0, "rewards/chosen": -1.2124245166778564, "rewards/margins": 1.941124677658081, "rewards/rejected": -3.1535491943359375, "step": 2662 }, { "epoch": 0.59, "learning_rate": 8.273195259767653e-06, "logits/chosen": -0.6898821592330933, "logits/rejected": -0.5778324604034424, "logps/chosen": -93.79641723632812, "logps/rejected": -216.77828979492188, "loss": 0.2066, "rewards/accuracies": 1.0, "rewards/chosen": -1.3141907453536987, "rewards/margins": 9.343564987182617, "rewards/rejected": -10.657755851745605, "step": 2663 }, { "epoch": 0.59, "learning_rate": 8.271840154132736e-06, "logits/chosen": -0.9778059124946594, "logits/rejected": -0.9946913719177246, "logps/chosen": -210.51480102539062, "logps/rejected": -139.8453369140625, "loss": 0.0873, "rewards/accuracies": 1.0, "rewards/chosen": 2.70703125, "rewards/margins": 9.810644149780273, "rewards/rejected": -7.103612422943115, "step": 2664 }, { "epoch": 0.59, "learning_rate": 8.270484628074222e-06, "logits/chosen": -1.1768561601638794, "logits/rejected": -1.2641595602035522, "logps/chosen": -192.96441650390625, "logps/rejected": -72.06980895996094, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 2.0843658447265625, "rewards/margins": 5.949273109436035, "rewards/rejected": -3.8649075031280518, "step": 2665 }, { "epoch": 0.59, "learning_rate": 8.269128681766296e-06, "logits/chosen": -0.9586151838302612, "logits/rejected": -0.865885853767395, "logps/chosen": -177.4691619873047, "logps/rejected": -307.432373046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.570456027984619, "rewards/margins": 23.236026763916016, "rewards/rejected": -19.665571212768555, "step": 2666 }, { "epoch": 0.59, "learning_rate": 8.267772315383195e-06, "logits/chosen": -0.8598219156265259, "logits/rejected": -0.8058639168739319, "logps/chosen": -98.14533233642578, "logps/rejected": -191.5867156982422, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.07938766479492188, "rewards/margins": 7.908313751220703, "rewards/rejected": -7.987701416015625, "step": 2667 }, { "epoch": 0.59, "learning_rate": 8.266415529099205e-06, "logits/chosen": -0.7828114628791809, "logits/rejected": -0.605972409248352, "logps/chosen": -166.184814453125, "logps/rejected": -331.88494873046875, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": 2.831860303878784, "rewards/margins": 18.077640533447266, "rewards/rejected": -15.245779991149902, "step": 2668 }, { "epoch": 0.59, "learning_rate": 8.265058323088673e-06, "logits/chosen": -1.2020517587661743, "logits/rejected": -1.1489920616149902, "logps/chosen": -169.40069580078125, "logps/rejected": -175.77755737304688, "loss": 0.0618, "rewards/accuracies": 1.0, "rewards/chosen": 0.11687927693128586, "rewards/margins": 2.034750461578369, "rewards/rejected": -1.917871117591858, "step": 2669 }, { "epoch": 0.59, "learning_rate": 8.263700697525994e-06, "logits/chosen": -0.6709709763526917, "logits/rejected": -0.5678352117538452, "logps/chosen": -182.61695861816406, "logps/rejected": -237.2834014892578, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 1.0937988758087158, "rewards/margins": 11.575019836425781, "rewards/rejected": -10.481221199035645, "step": 2670 }, { "epoch": 0.59, "learning_rate": 8.262342652585621e-06, "logits/chosen": -0.6577218174934387, "logits/rejected": -0.6577218174934387, "logps/chosen": -191.96694946289062, "logps/rejected": -191.96694946289062, "loss": 0.3493, "rewards/accuracies": 0.0, "rewards/chosen": -3.1762712001800537, "rewards/margins": 0.0, "rewards/rejected": -3.1762712001800537, "step": 2671 }, { "epoch": 0.59, "learning_rate": 8.260984188442063e-06, "logits/chosen": -1.1107197999954224, "logits/rejected": -1.12214994430542, "logps/chosen": -210.39837646484375, "logps/rejected": -216.72003173828125, "loss": 0.2405, "rewards/accuracies": 1.0, "rewards/chosen": 0.940142810344696, "rewards/margins": 4.592726230621338, "rewards/rejected": -3.652583360671997, "step": 2672 }, { "epoch": 0.59, "learning_rate": 8.259625305269873e-06, "logits/chosen": -1.0523381233215332, "logits/rejected": -0.981843113899231, "logps/chosen": -94.88321685791016, "logps/rejected": -139.5860595703125, "loss": 0.0883, "rewards/accuracies": 1.0, "rewards/chosen": -0.8078086972236633, "rewards/margins": 1.6447839736938477, "rewards/rejected": -2.452592611312866, "step": 2673 }, { "epoch": 0.59, "learning_rate": 8.258266003243667e-06, "logits/chosen": -0.8337715268135071, "logits/rejected": -0.8187583684921265, "logps/chosen": -82.35188293457031, "logps/rejected": -132.2770233154297, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": 0.4807479977607727, "rewards/margins": 3.116929531097412, "rewards/rejected": -2.636181592941284, "step": 2674 }, { "epoch": 0.59, "learning_rate": 8.256906282538113e-06, "logits/chosen": -0.9807930588722229, "logits/rejected": -1.0290461778640747, "logps/chosen": -127.17997741699219, "logps/rejected": -88.2672119140625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.050939179956912994, "rewards/margins": 4.940796852111816, "rewards/rejected": -4.991735935211182, "step": 2675 }, { "epoch": 0.59, "learning_rate": 8.25554614332793e-06, "logits/chosen": -0.6100059747695923, "logits/rejected": -0.6100059747695923, "logps/chosen": -115.56269836425781, "logps/rejected": -115.56269836425781, "loss": 0.6628, "rewards/accuracies": 0.0, "rewards/chosen": -2.7895874977111816, "rewards/margins": 0.0, "rewards/rejected": -2.7895874977111816, "step": 2676 }, { "epoch": 0.59, "learning_rate": 8.254185585787895e-06, "logits/chosen": -1.0212154388427734, "logits/rejected": -1.010473608970642, "logps/chosen": -129.67681884765625, "logps/rejected": -103.74827575683594, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": 0.7003921866416931, "rewards/margins": 3.3553032875061035, "rewards/rejected": -2.6549110412597656, "step": 2677 }, { "epoch": 0.59, "learning_rate": 8.252824610092835e-06, "logits/chosen": -0.7387437224388123, "logits/rejected": -0.6383742690086365, "logps/chosen": -109.67729187011719, "logps/rejected": -239.93966674804688, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.20904541015625, "rewards/margins": 4.59205961227417, "rewards/rejected": -4.80110502243042, "step": 2678 }, { "epoch": 0.59, "learning_rate": 8.251463216417632e-06, "logits/chosen": -0.8675079345703125, "logits/rejected": -0.8367501497268677, "logps/chosen": -81.74520111083984, "logps/rejected": -151.15213012695312, "loss": 0.0672, "rewards/accuracies": 1.0, "rewards/chosen": 0.6390830874443054, "rewards/margins": 1.9492270946502686, "rewards/rejected": -1.310144066810608, "step": 2679 }, { "epoch": 0.59, "learning_rate": 8.250101404937223e-06, "logits/chosen": -0.7620197534561157, "logits/rejected": -0.7030344605445862, "logps/chosen": -76.79930877685547, "logps/rejected": -194.4436798095703, "loss": 0.1258, "rewards/accuracies": 1.0, "rewards/chosen": 0.4168495237827301, "rewards/margins": 1.2813469171524048, "rewards/rejected": -0.8644973635673523, "step": 2680 }, { "epoch": 0.59, "learning_rate": 8.248739175826594e-06, "logits/chosen": -1.2124364376068115, "logits/rejected": -0.9964702725410461, "logps/chosen": -86.64105224609375, "logps/rejected": -303.6169738769531, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -1.223876953125, "rewards/margins": 4.138576030731201, "rewards/rejected": -5.362452983856201, "step": 2681 }, { "epoch": 0.59, "learning_rate": 8.247376529260793e-06, "logits/chosen": -1.2314409017562866, "logits/rejected": -1.2447096109390259, "logps/chosen": -110.0308837890625, "logps/rejected": -60.435028076171875, "loss": 0.511, "rewards/accuracies": 1.0, "rewards/chosen": -1.9585739374160767, "rewards/margins": 0.9434465169906616, "rewards/rejected": -2.9020204544067383, "step": 2682 }, { "epoch": 0.59, "learning_rate": 8.246013465414914e-06, "logits/chosen": -0.5714853405952454, "logits/rejected": -0.5711078643798828, "logps/chosen": -66.84806823730469, "logps/rejected": -79.47400665283203, "loss": 0.4008, "rewards/accuracies": 0.0, "rewards/chosen": -1.4636482000350952, "rewards/margins": -0.20644450187683105, "rewards/rejected": -1.2572036981582642, "step": 2683 }, { "epoch": 0.59, "learning_rate": 8.244649984464109e-06, "logits/chosen": -0.9091160893440247, "logits/rejected": -0.9091160893440247, "logps/chosen": -286.5040588378906, "logps/rejected": -286.5040588378906, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.59588623046875, "rewards/margins": 0.0, "rewards/rejected": -4.59588623046875, "step": 2684 }, { "epoch": 0.59, "learning_rate": 8.243286086583577e-06, "logits/chosen": -1.1449848413467407, "logits/rejected": -1.3010798692703247, "logps/chosen": -211.6643524169922, "logps/rejected": -88.16659545898438, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": 2.1139144897460938, "rewards/margins": 3.381979465484619, "rewards/rejected": -1.2680648565292358, "step": 2685 }, { "epoch": 0.59, "learning_rate": 8.241921771948583e-06, "logits/chosen": -1.4653328657150269, "logits/rejected": -1.3957840204238892, "logps/chosen": -48.6802864074707, "logps/rejected": -119.20510864257812, "loss": 0.282, "rewards/accuracies": 1.0, "rewards/chosen": -1.9707924127578735, "rewards/margins": 0.7574485540390015, "rewards/rejected": -2.728240966796875, "step": 2686 }, { "epoch": 0.59, "learning_rate": 8.240557040734434e-06, "logits/chosen": -1.1344804763793945, "logits/rejected": -1.1344804763793945, "logps/chosen": -110.63520812988281, "logps/rejected": -110.63520812988281, "loss": 0.347, "rewards/accuracies": 0.0, "rewards/chosen": -4.5643463134765625, "rewards/margins": 0.0, "rewards/rejected": -4.5643463134765625, "step": 2687 }, { "epoch": 0.59, "learning_rate": 8.239191893116494e-06, "logits/chosen": -0.9792228937149048, "logits/rejected": -0.931344211101532, "logps/chosen": -162.07546997070312, "logps/rejected": -336.36309814453125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 1.2329788208007812, "rewards/margins": 8.787885665893555, "rewards/rejected": -7.554907321929932, "step": 2688 }, { "epoch": 0.6, "learning_rate": 8.237826329270183e-06, "logits/chosen": -0.8725420236587524, "logits/rejected": -0.8725420236587524, "logps/chosen": -88.18522644042969, "logps/rejected": -88.18522644042969, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -1.5586098432540894, "rewards/margins": 0.0, "rewards/rejected": -1.5586098432540894, "step": 2689 }, { "epoch": 0.6, "learning_rate": 8.236460349370972e-06, "logits/chosen": -0.7910497784614563, "logits/rejected": -0.7091581225395203, "logps/chosen": -192.1732177734375, "logps/rejected": -324.05078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 2.966447591781616, "rewards/margins": 7.6884355545043945, "rewards/rejected": -4.721988201141357, "step": 2690 }, { "epoch": 0.6, "learning_rate": 8.235093953594387e-06, "logits/chosen": -0.8253140449523926, "logits/rejected": -0.7264835238456726, "logps/chosen": -204.57369995117188, "logps/rejected": -277.4629211425781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.6786162853240967, "rewards/margins": 14.133575439453125, "rewards/rejected": -11.45495891571045, "step": 2691 }, { "epoch": 0.6, "learning_rate": 8.233727142116007e-06, "logits/chosen": -0.75096595287323, "logits/rejected": -0.7667495012283325, "logps/chosen": -123.70095825195312, "logps/rejected": -107.5032730102539, "loss": 0.2752, "rewards/accuracies": 1.0, "rewards/chosen": -1.4180153608322144, "rewards/margins": 0.3090858459472656, "rewards/rejected": -1.72710120677948, "step": 2692 }, { "epoch": 0.6, "learning_rate": 8.232359915111462e-06, "logits/chosen": -0.8376532793045044, "logits/rejected": -0.8376532793045044, "logps/chosen": -244.90115356445312, "logps/rejected": -244.90115356445312, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.43402099609375, "rewards/margins": 0.0, "rewards/rejected": -2.43402099609375, "step": 2693 }, { "epoch": 0.6, "learning_rate": 8.230992272756438e-06, "logits/chosen": -1.0370712280273438, "logits/rejected": -1.04619300365448, "logps/chosen": -130.77149963378906, "logps/rejected": -66.23271179199219, "loss": 0.0749, "rewards/accuracies": 1.0, "rewards/chosen": -2.0600478649139404, "rewards/margins": 1.8241093158721924, "rewards/rejected": -3.884157180786133, "step": 2694 }, { "epoch": 0.6, "learning_rate": 8.229624215226675e-06, "logits/chosen": -1.0680047273635864, "logits/rejected": -1.0387154817581177, "logps/chosen": -97.2711181640625, "logps/rejected": -213.68972778320312, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.05847625806927681, "rewards/margins": 6.478337287902832, "rewards/rejected": -6.41986083984375, "step": 2695 }, { "epoch": 0.6, "learning_rate": 8.228255742697962e-06, "logits/chosen": -1.0564441680908203, "logits/rejected": -1.147413730621338, "logps/chosen": -197.36346435546875, "logps/rejected": -39.320037841796875, "loss": 1.1639, "rewards/accuracies": 0.0, "rewards/chosen": -4.284100532531738, "rewards/margins": -2.220210313796997, "rewards/rejected": -2.063890218734741, "step": 2696 }, { "epoch": 0.6, "learning_rate": 8.226886855346148e-06, "logits/chosen": -0.7075106501579285, "logits/rejected": -0.6927676796913147, "logps/chosen": -104.56537628173828, "logps/rejected": -102.51130676269531, "loss": 0.0448, "rewards/accuracies": 1.0, "rewards/chosen": -0.6341957449913025, "rewards/margins": 2.367462158203125, "rewards/rejected": -3.0016579627990723, "step": 2697 }, { "epoch": 0.6, "learning_rate": 8.225517553347132e-06, "logits/chosen": -0.8473604917526245, "logits/rejected": -0.8490897417068481, "logps/chosen": -100.50459289550781, "logps/rejected": -84.18254089355469, "loss": 0.2318, "rewards/accuracies": 1.0, "rewards/chosen": 0.6255089044570923, "rewards/margins": 0.7007026672363281, "rewards/rejected": -0.07519378513097763, "step": 2698 }, { "epoch": 0.6, "learning_rate": 8.224147836876861e-06, "logits/chosen": -1.257969617843628, "logits/rejected": -1.2766926288604736, "logps/chosen": -180.43038940429688, "logps/rejected": -97.17354583740234, "loss": 0.261, "rewards/accuracies": 1.0, "rewards/chosen": -1.9041870832443237, "rewards/margins": 2.7784438133239746, "rewards/rejected": -4.682631015777588, "step": 2699 }, { "epoch": 0.6, "learning_rate": 8.222777706111345e-06, "logits/chosen": -0.7925161123275757, "logits/rejected": -0.7964104413986206, "logps/chosen": -145.43060302734375, "logps/rejected": -188.0882568359375, "loss": 0.1712, "rewards/accuracies": 1.0, "rewards/chosen": -1.6524841785430908, "rewards/margins": 2.9125730991363525, "rewards/rejected": -4.565057277679443, "step": 2700 }, { "epoch": 0.6, "learning_rate": 8.221407161226641e-06, "logits/chosen": -0.8716755509376526, "logits/rejected": -0.8409132957458496, "logps/chosen": -109.24473571777344, "logps/rejected": -50.84271240234375, "loss": 0.0637, "rewards/accuracies": 1.0, "rewards/chosen": -0.4342453181743622, "rewards/margins": 1.9991497993469238, "rewards/rejected": -2.4333951473236084, "step": 2701 }, { "epoch": 0.6, "learning_rate": 8.220036202398861e-06, "logits/chosen": -0.8337860107421875, "logits/rejected": -0.7954282760620117, "logps/chosen": -85.0904541015625, "logps/rejected": -124.68970489501953, "loss": 0.1139, "rewards/accuracies": 1.0, "rewards/chosen": 0.15775452554225922, "rewards/margins": 1.362836480140686, "rewards/rejected": -1.2050819396972656, "step": 2702 }, { "epoch": 0.6, "learning_rate": 8.21866482980417e-06, "logits/chosen": -0.9798240065574646, "logits/rejected": -0.9798240065574646, "logps/chosen": -86.4100341796875, "logps/rejected": -86.4100341796875, "loss": 0.3471, "rewards/accuracies": 0.0, "rewards/chosen": 0.2508041560649872, "rewards/margins": 0.0, "rewards/rejected": 0.2508041560649872, "step": 2703 }, { "epoch": 0.6, "learning_rate": 8.217293043618786e-06, "logits/chosen": -0.683746337890625, "logits/rejected": -0.6077675819396973, "logps/chosen": -92.45491027832031, "logps/rejected": -207.21963500976562, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.360774278640747, "rewards/margins": 7.990138053894043, "rewards/rejected": -9.350912094116211, "step": 2704 }, { "epoch": 0.6, "learning_rate": 8.21592084401898e-06, "logits/chosen": -0.8862214684486389, "logits/rejected": -0.8653791546821594, "logps/chosen": -99.74552917480469, "logps/rejected": -265.59136962890625, "loss": 0.4368, "rewards/accuracies": 1.0, "rewards/chosen": -0.5233268737792969, "rewards/margins": 0.1761879324913025, "rewards/rejected": -0.6995148062705994, "step": 2705 }, { "epoch": 0.6, "learning_rate": 8.214548231181077e-06, "logits/chosen": -1.003658413887024, "logits/rejected": -1.0183217525482178, "logps/chosen": -127.96293640136719, "logps/rejected": -130.75515747070312, "loss": 0.0769, "rewards/accuracies": 1.0, "rewards/chosen": -0.4501640498638153, "rewards/margins": 1.7941031455993652, "rewards/rejected": -2.244267225265503, "step": 2706 }, { "epoch": 0.6, "learning_rate": 8.213175205281451e-06, "logits/chosen": -0.5992766618728638, "logits/rejected": -0.5721439123153687, "logps/chosen": -211.84678649902344, "logps/rejected": -230.13319396972656, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 2.7500336170196533, "rewards/margins": 7.656485557556152, "rewards/rejected": -4.90645170211792, "step": 2707 }, { "epoch": 0.6, "learning_rate": 8.211801766496537e-06, "logits/chosen": -0.8983628749847412, "logits/rejected": -0.9220743179321289, "logps/chosen": -174.91091918945312, "logps/rejected": -207.048583984375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 3.8719544410705566, "rewards/margins": 11.907354354858398, "rewards/rejected": -8.035399436950684, "step": 2708 }, { "epoch": 0.6, "learning_rate": 8.210427915002819e-06, "logits/chosen": -1.0367469787597656, "logits/rejected": -1.0475249290466309, "logps/chosen": -54.085845947265625, "logps/rejected": -49.42784881591797, "loss": 0.5086, "rewards/accuracies": 0.0, "rewards/chosen": -0.9836990237236023, "rewards/margins": -0.5682170391082764, "rewards/rejected": -0.41548195481300354, "step": 2709 }, { "epoch": 0.6, "learning_rate": 8.20905365097683e-06, "logits/chosen": -0.8335481286048889, "logits/rejected": -0.6772324442863464, "logps/chosen": -230.40826416015625, "logps/rejected": -498.0226745605469, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 3.426800489425659, "rewards/margins": 8.273348808288574, "rewards/rejected": -4.846548557281494, "step": 2710 }, { "epoch": 0.6, "learning_rate": 8.20767897459516e-06, "logits/chosen": -0.9277111291885376, "logits/rejected": -0.9386644959449768, "logps/chosen": -201.3644256591797, "logps/rejected": -153.00030517578125, "loss": 0.2668, "rewards/accuracies": 1.0, "rewards/chosen": 0.7128311395645142, "rewards/margins": 3.053964138031006, "rewards/rejected": -2.3411331176757812, "step": 2711 }, { "epoch": 0.6, "learning_rate": 8.206303886034455e-06, "logits/chosen": -0.9477423429489136, "logits/rejected": -0.9231361150741577, "logps/chosen": -110.56062316894531, "logps/rejected": -43.89641571044922, "loss": 0.4324, "rewards/accuracies": 0.0, "rewards/chosen": -3.090657949447632, "rewards/margins": -0.250521183013916, "rewards/rejected": -2.840136766433716, "step": 2712 }, { "epoch": 0.6, "learning_rate": 8.204928385471406e-06, "logits/chosen": -0.9999891519546509, "logits/rejected": -0.9646337628364563, "logps/chosen": -181.8784637451172, "logps/rejected": -71.88645935058594, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": 1.238490343093872, "rewards/margins": 5.872522354125977, "rewards/rejected": -4.634031772613525, "step": 2713 }, { "epoch": 0.6, "learning_rate": 8.203552473082766e-06, "logits/chosen": -1.071555495262146, "logits/rejected": -1.1015042066574097, "logps/chosen": -69.58363342285156, "logps/rejected": -98.7142562866211, "loss": 0.0921, "rewards/accuracies": 1.0, "rewards/chosen": 0.587554931640625, "rewards/margins": 1.600518822669983, "rewards/rejected": -1.012963891029358, "step": 2714 }, { "epoch": 0.6, "learning_rate": 8.202176149045334e-06, "logits/chosen": -0.9945744872093201, "logits/rejected": -1.0277634859085083, "logps/chosen": -200.2657928466797, "logps/rejected": -232.91004943847656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7337860465049744, "rewards/margins": 10.873059272766113, "rewards/rejected": -11.606844902038574, "step": 2715 }, { "epoch": 0.6, "learning_rate": 8.200799413535962e-06, "logits/chosen": -1.1349221467971802, "logits/rejected": -1.1004372835159302, "logps/chosen": -192.04605102539062, "logps/rejected": -173.95497131347656, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 1.508569359779358, "rewards/margins": 6.829453945159912, "rewards/rejected": -5.320884704589844, "step": 2716 }, { "epoch": 0.6, "learning_rate": 8.199422266731563e-06, "logits/chosen": -1.122685432434082, "logits/rejected": -1.0775779485702515, "logps/chosen": -79.19645690917969, "logps/rejected": -116.8919906616211, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -0.6668136715888977, "rewards/margins": 2.9472999572753906, "rewards/rejected": -3.6141135692596436, "step": 2717 }, { "epoch": 0.6, "learning_rate": 8.198044708809094e-06, "logits/chosen": -0.9951591491699219, "logits/rejected": -0.9951591491699219, "logps/chosen": -143.9293670654297, "logps/rejected": -143.9293670654297, "loss": 0.3567, "rewards/accuracies": 0.0, "rewards/chosen": -2.2899835109710693, "rewards/margins": 0.0, "rewards/rejected": -2.2899835109710693, "step": 2718 }, { "epoch": 0.6, "learning_rate": 8.196666739945566e-06, "logits/chosen": -1.0101057291030884, "logits/rejected": -0.9831020832061768, "logps/chosen": -99.25175476074219, "logps/rejected": -153.6475830078125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 1.1489372253417969, "rewards/margins": 6.048059940338135, "rewards/rejected": -4.899122714996338, "step": 2719 }, { "epoch": 0.6, "learning_rate": 8.195288360318048e-06, "logits/chosen": -0.988152801990509, "logits/rejected": -1.0682181119918823, "logps/chosen": -182.58468627929688, "logps/rejected": -176.18492126464844, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": 2.0347259044647217, "rewards/margins": 4.876983642578125, "rewards/rejected": -2.8422577381134033, "step": 2720 }, { "epoch": 0.6, "learning_rate": 8.193909570103656e-06, "logits/chosen": -0.8218833208084106, "logits/rejected": -0.3512849509716034, "logps/chosen": -166.20913696289062, "logps/rejected": -506.9400634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.07118377834558487, "rewards/margins": 17.688072204589844, "rewards/rejected": -17.61688804626465, "step": 2721 }, { "epoch": 0.6, "learning_rate": 8.192530369479562e-06, "logits/chosen": -1.024686336517334, "logits/rejected": -1.0390962362289429, "logps/chosen": -126.5374755859375, "logps/rejected": -102.04854583740234, "loss": 0.0379, "rewards/accuracies": 1.0, "rewards/chosen": 1.22520911693573, "rewards/margins": 2.5447044372558594, "rewards/rejected": -1.319495439529419, "step": 2722 }, { "epoch": 0.6, "learning_rate": 8.191150758622991e-06, "logits/chosen": -1.3765026330947876, "logits/rejected": -1.4496392011642456, "logps/chosen": -219.3143768310547, "logps/rejected": -176.57122802734375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 0.9850662350654602, "rewards/margins": 4.8834991455078125, "rewards/rejected": -3.898432970046997, "step": 2723 }, { "epoch": 0.6, "learning_rate": 8.189770737711218e-06, "logits/chosen": -0.8488835692405701, "logits/rejected": -0.8488835692405701, "logps/chosen": -214.77426147460938, "logps/rejected": -214.77426147460938, "loss": 0.4489, "rewards/accuracies": 0.0, "rewards/chosen": -10.24272632598877, "rewards/margins": 0.0, "rewards/rejected": -10.24272632598877, "step": 2724 }, { "epoch": 0.6, "learning_rate": 8.188390306921574e-06, "logits/chosen": -0.8420072197914124, "logits/rejected": -0.8319976329803467, "logps/chosen": -194.4098358154297, "logps/rejected": -124.66326904296875, "loss": 0.0687, "rewards/accuracies": 1.0, "rewards/chosen": 1.1416031122207642, "rewards/margins": 2.040187120437622, "rewards/rejected": -0.8985840082168579, "step": 2725 }, { "epoch": 0.6, "learning_rate": 8.18700946643144e-06, "logits/chosen": -0.9683261513710022, "logits/rejected": -1.0618678331375122, "logps/chosen": -148.35562133789062, "logps/rejected": -71.65371704101562, "loss": 0.1691, "rewards/accuracies": 1.0, "rewards/chosen": -3.145733594894409, "rewards/margins": 0.9140622615814209, "rewards/rejected": -4.05979585647583, "step": 2726 }, { "epoch": 0.6, "learning_rate": 8.18562821641825e-06, "logits/chosen": -0.8249046206474304, "logits/rejected": -0.6915073394775391, "logps/chosen": -179.07550048828125, "logps/rejected": -396.253662109375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.5593109130859375, "rewards/margins": 10.264810562133789, "rewards/rejected": -9.705499649047852, "step": 2727 }, { "epoch": 0.6, "learning_rate": 8.184246557059493e-06, "logits/chosen": -0.9501515626907349, "logits/rejected": -1.0357547998428345, "logps/chosen": -191.21688842773438, "logps/rejected": -143.5872344970703, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 1.6899200677871704, "rewards/margins": 5.461621284484863, "rewards/rejected": -3.7717010974884033, "step": 2728 }, { "epoch": 0.6, "learning_rate": 8.182864488532707e-06, "logits/chosen": -1.1396290063858032, "logits/rejected": -1.1567020416259766, "logps/chosen": -122.60009765625, "logps/rejected": -173.27447509765625, "loss": 0.1327, "rewards/accuracies": 1.0, "rewards/chosen": 0.09545212239027023, "rewards/margins": 7.47463846206665, "rewards/rejected": -7.379186153411865, "step": 2729 }, { "epoch": 0.6, "learning_rate": 8.181482011015488e-06, "logits/chosen": -1.1251921653747559, "logits/rejected": -0.5635157823562622, "logps/chosen": -279.0677490234375, "logps/rejected": -619.92041015625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 0.7025299072265625, "rewards/margins": 42.99869918823242, "rewards/rejected": -42.29616928100586, "step": 2730 }, { "epoch": 0.6, "learning_rate": 8.180099124685476e-06, "logits/chosen": -0.7929583787918091, "logits/rejected": -0.7579867839813232, "logps/chosen": -118.83665466308594, "logps/rejected": -261.7261962890625, "loss": 0.2986, "rewards/accuracies": 1.0, "rewards/chosen": -1.862995982170105, "rewards/margins": 0.20230567455291748, "rewards/rejected": -2.0653016567230225, "step": 2731 }, { "epoch": 0.6, "learning_rate": 8.178715829720374e-06, "logits/chosen": -0.8578457236289978, "logits/rejected": -0.7961243391036987, "logps/chosen": -165.65386962890625, "logps/rejected": -184.8507080078125, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": 0.3431945741176605, "rewards/margins": 6.413330078125, "rewards/rejected": -6.070135593414307, "step": 2732 }, { "epoch": 0.6, "learning_rate": 8.177332126297928e-06, "logits/chosen": -1.2736101150512695, "logits/rejected": -1.303911805152893, "logps/chosen": -91.40658569335938, "logps/rejected": -102.1441879272461, "loss": 0.3559, "rewards/accuracies": 0.0, "rewards/chosen": -1.241206407546997, "rewards/margins": -0.037053704261779785, "rewards/rejected": -1.2041527032852173, "step": 2733 }, { "epoch": 0.61, "learning_rate": 8.175948014595942e-06, "logits/chosen": -0.8927236199378967, "logits/rejected": -0.866355836391449, "logps/chosen": -67.16694641113281, "logps/rejected": -167.5692138671875, "loss": 0.0426, "rewards/accuracies": 1.0, "rewards/chosen": -0.1135353073477745, "rewards/margins": 4.731076717376709, "rewards/rejected": -4.844612121582031, "step": 2734 }, { "epoch": 0.61, "learning_rate": 8.17456349479227e-06, "logits/chosen": -0.8091316223144531, "logits/rejected": -0.7849403619766235, "logps/chosen": -80.90021514892578, "logps/rejected": -150.27890014648438, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": -0.19593583047389984, "rewards/margins": 5.757907390594482, "rewards/rejected": -5.953843116760254, "step": 2735 }, { "epoch": 0.61, "learning_rate": 8.17317856706482e-06, "logits/chosen": -1.0603759288787842, "logits/rejected": -1.063519835472107, "logps/chosen": -65.58299255371094, "logps/rejected": -179.989990234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.7145218253135681, "rewards/margins": 8.963166236877441, "rewards/rejected": -8.248644828796387, "step": 2736 }, { "epoch": 0.61, "learning_rate": 8.171793231591553e-06, "logits/chosen": -0.840457022190094, "logits/rejected": -0.822713315486908, "logps/chosen": -126.71534729003906, "logps/rejected": -103.25877380371094, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/chosen": 1.4136962890625, "rewards/margins": 2.939718723297119, "rewards/rejected": -1.5260223150253296, "step": 2737 }, { "epoch": 0.61, "learning_rate": 8.170407488550482e-06, "logits/chosen": -0.8229608535766602, "logits/rejected": -0.8106235265731812, "logps/chosen": -68.04132843017578, "logps/rejected": -84.74653625488281, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -0.07301407307386398, "rewards/margins": 3.2842538356781006, "rewards/rejected": -3.3572678565979004, "step": 2738 }, { "epoch": 0.61, "learning_rate": 8.169021338119669e-06, "logits/chosen": -0.9999361038208008, "logits/rejected": -0.9999361038208008, "logps/chosen": -137.7661895751953, "logps/rejected": -137.7661895751953, "loss": 0.3661, "rewards/accuracies": 0.0, "rewards/chosen": -6.056773662567139, "rewards/margins": 0.0, "rewards/rejected": -6.056773662567139, "step": 2739 }, { "epoch": 0.61, "learning_rate": 8.167634780477231e-06, "logits/chosen": -0.5635397434234619, "logits/rejected": -0.5176819562911987, "logps/chosen": -111.28184509277344, "logps/rejected": -222.41725158691406, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -4.276786804199219, "rewards/margins": 3.0214385986328125, "rewards/rejected": -7.298225402832031, "step": 2740 }, { "epoch": 0.61, "learning_rate": 8.16624781580134e-06, "logits/chosen": -1.0963873863220215, "logits/rejected": -1.1373646259307861, "logps/chosen": -122.68084716796875, "logps/rejected": -92.7606430053711, "loss": 0.1659, "rewards/accuracies": 1.0, "rewards/chosen": 0.7565643191337585, "rewards/margins": 2.6593024730682373, "rewards/rejected": -1.9027382135391235, "step": 2741 }, { "epoch": 0.61, "learning_rate": 8.164860444270217e-06, "logits/chosen": -1.0105340480804443, "logits/rejected": -1.014601469039917, "logps/chosen": -209.25341796875, "logps/rejected": -143.3661651611328, "loss": 0.0832, "rewards/accuracies": 1.0, "rewards/chosen": 1.3786834478378296, "rewards/margins": 3.744631767272949, "rewards/rejected": -2.365948438644409, "step": 2742 }, { "epoch": 0.61, "learning_rate": 8.163472666062133e-06, "logits/chosen": -0.9558122158050537, "logits/rejected": -0.9383342862129211, "logps/chosen": -90.2960433959961, "logps/rejected": -129.42169189453125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 1.721570611000061, "rewards/margins": 6.267200469970703, "rewards/rejected": -4.545629978179932, "step": 2743 }, { "epoch": 0.61, "learning_rate": 8.162084481355418e-06, "logits/chosen": -1.0732316970825195, "logits/rejected": -1.0042413473129272, "logps/chosen": -152.2086181640625, "logps/rejected": -220.75582885742188, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": -0.39198610186576843, "rewards/margins": 2.7304229736328125, "rewards/rejected": -3.1224091053009033, "step": 2744 }, { "epoch": 0.61, "learning_rate": 8.160695890328448e-06, "logits/chosen": -1.3768829107284546, "logits/rejected": -1.3658571243286133, "logps/chosen": -94.42076110839844, "logps/rejected": -99.06034851074219, "loss": 0.2806, "rewards/accuracies": 1.0, "rewards/chosen": -0.6891693472862244, "rewards/margins": 0.28402936458587646, "rewards/rejected": -0.9731987118721008, "step": 2745 }, { "epoch": 0.61, "learning_rate": 8.159306893159652e-06, "logits/chosen": -1.140859603881836, "logits/rejected": -1.050282597541809, "logps/chosen": -119.48960876464844, "logps/rejected": -301.90887451171875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 0.36477890610694885, "rewards/margins": 5.632479190826416, "rewards/rejected": -5.2677001953125, "step": 2746 }, { "epoch": 0.61, "learning_rate": 8.157917490027518e-06, "logits/chosen": -1.2406630516052246, "logits/rejected": -1.4304819107055664, "logps/chosen": -138.46981811523438, "logps/rejected": -110.86041259765625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.3382507264614105, "rewards/margins": 5.91619873046875, "rewards/rejected": -5.577948093414307, "step": 2747 }, { "epoch": 0.61, "learning_rate": 8.156527681110576e-06, "logits/chosen": -1.0273351669311523, "logits/rejected": -0.8019734621047974, "logps/chosen": -128.42953491210938, "logps/rejected": -565.4027099609375, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -0.19482193887233734, "rewards/margins": 49.10439682006836, "rewards/rejected": -49.299217224121094, "step": 2748 }, { "epoch": 0.61, "learning_rate": 8.155137466587415e-06, "logits/chosen": -0.8934357762336731, "logits/rejected": -0.7899762392044067, "logps/chosen": -176.97242736816406, "logps/rejected": -138.16677856445312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.7931840419769287, "rewards/margins": 10.473291397094727, "rewards/rejected": -7.680107116699219, "step": 2749 }, { "epoch": 0.61, "learning_rate": 8.153746846636675e-06, "logits/chosen": -0.7254616022109985, "logits/rejected": -0.7054563760757446, "logps/chosen": -157.11087036132812, "logps/rejected": -237.9144287109375, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -1.0837478637695312, "rewards/margins": 8.715202331542969, "rewards/rejected": -9.7989501953125, "step": 2750 }, { "epoch": 0.61, "learning_rate": 8.152355821437048e-06, "logits/chosen": -1.1763429641723633, "logits/rejected": -1.2596426010131836, "logps/chosen": -183.42672729492188, "logps/rejected": -99.20722961425781, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.42001038789749146, "rewards/margins": 5.4062066078186035, "rewards/rejected": -5.826217174530029, "step": 2751 }, { "epoch": 0.61, "learning_rate": 8.150964391167273e-06, "logits/chosen": -1.0234335660934448, "logits/rejected": -0.628391444683075, "logps/chosen": -127.45138549804688, "logps/rejected": -239.37942504882812, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.76360023021698, "rewards/margins": 13.285524368286133, "rewards/rejected": -15.049124717712402, "step": 2752 }, { "epoch": 0.61, "learning_rate": 8.149572556006151e-06, "logits/chosen": -0.7151641249656677, "logits/rejected": -0.6762299537658691, "logps/chosen": -270.54791259765625, "logps/rejected": -253.0059051513672, "loss": 0.0967, "rewards/accuracies": 1.0, "rewards/chosen": 0.18660278618335724, "rewards/margins": 1.5533188581466675, "rewards/rejected": -1.3667160272598267, "step": 2753 }, { "epoch": 0.61, "learning_rate": 8.148180316132526e-06, "logits/chosen": -1.0323379039764404, "logits/rejected": -0.9584479331970215, "logps/chosen": -197.6676788330078, "logps/rejected": -395.8258972167969, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": 2.2849838733673096, "rewards/margins": 15.630800247192383, "rewards/rejected": -13.345816612243652, "step": 2754 }, { "epoch": 0.61, "learning_rate": 8.146787671725299e-06, "logits/chosen": -1.161482810974121, "logits/rejected": -1.1052809953689575, "logps/chosen": -87.9825439453125, "logps/rejected": -167.13748168945312, "loss": 1.1469, "rewards/accuracies": 1.0, "rewards/chosen": -1.2922935485839844, "rewards/margins": 6.031708717346191, "rewards/rejected": -7.324002265930176, "step": 2755 }, { "epoch": 0.61, "learning_rate": 8.14539462296342e-06, "logits/chosen": -0.621905505657196, "logits/rejected": -0.621905505657196, "logps/chosen": -134.9837646484375, "logps/rejected": -134.9837646484375, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.900975227355957, "rewards/margins": 0.0, "rewards/rejected": -5.900975227355957, "step": 2756 }, { "epoch": 0.61, "learning_rate": 8.144001170025894e-06, "logits/chosen": -0.9370583295822144, "logits/rejected": -0.9369677305221558, "logps/chosen": -247.5357208251953, "logps/rejected": -212.14569091796875, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": -3.3548080921173096, "rewards/margins": 2.6095736026763916, "rewards/rejected": -5.964381694793701, "step": 2757 }, { "epoch": 0.61, "learning_rate": 8.142607313091775e-06, "logits/chosen": -0.8554998636245728, "logits/rejected": -0.8083005547523499, "logps/chosen": -110.84541320800781, "logps/rejected": -184.98008728027344, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.4399116039276123, "rewards/margins": 6.000112533569336, "rewards/rejected": -8.440024375915527, "step": 2758 }, { "epoch": 0.61, "learning_rate": 8.141213052340171e-06, "logits/chosen": -1.1490648984909058, "logits/rejected": -1.1490648984909058, "logps/chosen": -67.09268188476562, "logps/rejected": -67.09268188476562, "loss": 0.3669, "rewards/accuracies": 0.0, "rewards/chosen": 0.3204902708530426, "rewards/margins": 0.0, "rewards/rejected": 0.3204902708530426, "step": 2759 }, { "epoch": 0.61, "learning_rate": 8.13981838795024e-06, "logits/chosen": -0.9932052493095398, "logits/rejected": -1.015174388885498, "logps/chosen": -151.293701171875, "logps/rejected": -128.52992248535156, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": 1.8363693952560425, "rewards/margins": 2.7355408668518066, "rewards/rejected": -0.8991714715957642, "step": 2760 }, { "epoch": 0.61, "learning_rate": 8.138423320101196e-06, "logits/chosen": -0.9575458765029907, "logits/rejected": -0.9287378787994385, "logps/chosen": -109.10986328125, "logps/rejected": -138.95932006835938, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.34676894545555115, "rewards/margins": 5.0777153968811035, "rewards/rejected": -5.4244842529296875, "step": 2761 }, { "epoch": 0.61, "learning_rate": 8.1370278489723e-06, "logits/chosen": -0.9437323808670044, "logits/rejected": -0.4970431327819824, "logps/chosen": -174.9217987060547, "logps/rejected": -512.2053833007812, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": 0.2521865963935852, "rewards/margins": 41.660518646240234, "rewards/rejected": -41.40833282470703, "step": 2762 }, { "epoch": 0.61, "learning_rate": 8.135631974742863e-06, "logits/chosen": -1.019801378250122, "logits/rejected": -1.0101186037063599, "logps/chosen": -106.28064727783203, "logps/rejected": -185.72882080078125, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": -1.2727898359298706, "rewards/margins": 3.3816018104553223, "rewards/rejected": -4.654391765594482, "step": 2763 }, { "epoch": 0.61, "learning_rate": 8.13423569759226e-06, "logits/chosen": -0.9147160649299622, "logits/rejected": -0.929587185382843, "logps/chosen": -125.24673461914062, "logps/rejected": -262.95416259765625, "loss": 1.9015, "rewards/accuracies": 0.0, "rewards/chosen": -3.494354248046875, "rewards/margins": -3.1856536865234375, "rewards/rejected": -0.3087005615234375, "step": 2764 }, { "epoch": 0.61, "learning_rate": 8.132839017699901e-06, "logits/chosen": -1.0458505153656006, "logits/rejected": -0.9940066337585449, "logps/chosen": -166.61285400390625, "logps/rejected": -247.78756713867188, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -3.796142578125, "rewards/margins": 4.741366386413574, "rewards/rejected": -8.537508964538574, "step": 2765 }, { "epoch": 0.61, "learning_rate": 8.131441935245261e-06, "logits/chosen": -0.8537899851799011, "logits/rejected": -0.8842666149139404, "logps/chosen": -178.36105346679688, "logps/rejected": -177.39952087402344, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": 0.8973647952079773, "rewards/margins": 3.6591262817382812, "rewards/rejected": -2.761761426925659, "step": 2766 }, { "epoch": 0.61, "learning_rate": 8.13004445040786e-06, "logits/chosen": -1.1458370685577393, "logits/rejected": -1.1319141387939453, "logps/chosen": -63.69056701660156, "logps/rejected": -126.03514862060547, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/chosen": 1.120093584060669, "rewards/margins": 2.4777207374572754, "rewards/rejected": -1.3576271533966064, "step": 2767 }, { "epoch": 0.61, "learning_rate": 8.128646563367271e-06, "logits/chosen": -0.7696234583854675, "logits/rejected": -0.7013939023017883, "logps/chosen": -124.5831298828125, "logps/rejected": -175.29562377929688, "loss": 0.362, "rewards/accuracies": 1.0, "rewards/chosen": -1.322914958000183, "rewards/margins": 5.073697566986084, "rewards/rejected": -6.396612644195557, "step": 2768 }, { "epoch": 0.61, "learning_rate": 8.12724827430312e-06, "logits/chosen": -1.0152933597564697, "logits/rejected": -1.0004760026931763, "logps/chosen": -115.42317199707031, "logps/rejected": -215.95558166503906, "loss": 0.0807, "rewards/accuracies": 1.0, "rewards/chosen": -2.3081023693084717, "rewards/margins": 1.745431661605835, "rewards/rejected": -4.053534030914307, "step": 2769 }, { "epoch": 0.61, "learning_rate": 8.125849583395083e-06, "logits/chosen": -1.07954740524292, "logits/rejected": -1.0429636240005493, "logps/chosen": -110.65162658691406, "logps/rejected": -140.6409149169922, "loss": 0.0835, "rewards/accuracies": 1.0, "rewards/chosen": -3.8748703002929688, "rewards/margins": 1.7206454277038574, "rewards/rejected": -5.595515727996826, "step": 2770 }, { "epoch": 0.61, "learning_rate": 8.124450490822889e-06, "logits/chosen": -0.7781820893287659, "logits/rejected": -0.6958101391792297, "logps/chosen": -148.76907348632812, "logps/rejected": -264.9474182128906, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.8524932861328125, "rewards/margins": 5.807467460632324, "rewards/rejected": -8.659960746765137, "step": 2771 }, { "epoch": 0.61, "learning_rate": 8.123050996766317e-06, "logits/chosen": -0.759458601474762, "logits/rejected": -0.7490693926811218, "logps/chosen": -256.7025146484375, "logps/rejected": -249.1439666748047, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.05324707180261612, "rewards/margins": 6.3265533447265625, "rewards/rejected": -6.273306369781494, "step": 2772 }, { "epoch": 0.61, "learning_rate": 8.121651101405202e-06, "logits/chosen": -1.0366042852401733, "logits/rejected": -1.0777868032455444, "logps/chosen": -133.6241455078125, "logps/rejected": -100.43727111816406, "loss": 0.6436, "rewards/accuracies": 0.0, "rewards/chosen": -0.524810791015625, "rewards/margins": -0.9642670154571533, "rewards/rejected": 0.43945619463920593, "step": 2773 }, { "epoch": 0.61, "learning_rate": 8.120250804919424e-06, "logits/chosen": -0.9539883136749268, "logits/rejected": -0.9714330434799194, "logps/chosen": -83.25196838378906, "logps/rejected": -84.88872528076172, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": 0.6049087643623352, "rewards/margins": 4.436481475830078, "rewards/rejected": -3.8315727710723877, "step": 2774 }, { "epoch": 0.61, "learning_rate": 8.118850107488916e-06, "logits/chosen": -1.4470088481903076, "logits/rejected": -1.4990499019622803, "logps/chosen": -141.71278381347656, "logps/rejected": -111.90914916992188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.151509091258049, "rewards/margins": 6.163149356842041, "rewards/rejected": -6.3146586418151855, "step": 2775 }, { "epoch": 0.61, "learning_rate": 8.117449009293668e-06, "logits/chosen": -1.0110151767730713, "logits/rejected": -0.9613876938819885, "logps/chosen": -128.23760986328125, "logps/rejected": -171.11070251464844, "loss": 0.0345, "rewards/accuracies": 1.0, "rewards/chosen": 0.6268554925918579, "rewards/margins": 4.0604448318481445, "rewards/rejected": -3.433589220046997, "step": 2776 }, { "epoch": 0.61, "learning_rate": 8.116047510513718e-06, "logits/chosen": -1.1352721452713013, "logits/rejected": -1.1338675022125244, "logps/chosen": -177.70413208007812, "logps/rejected": -229.53094482421875, "loss": 0.52, "rewards/accuracies": 1.0, "rewards/chosen": 1.9796295166015625, "rewards/margins": 7.075173854827881, "rewards/rejected": -5.095544338226318, "step": 2777 }, { "epoch": 0.61, "learning_rate": 8.114645611329152e-06, "logits/chosen": -0.8937547206878662, "logits/rejected": -0.8874993324279785, "logps/chosen": -132.2628173828125, "logps/rejected": -193.0535888671875, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": 0.10642700642347336, "rewards/margins": 4.7762956619262695, "rewards/rejected": -4.669868469238281, "step": 2778 }, { "epoch": 0.62, "learning_rate": 8.113243311920113e-06, "logits/chosen": -0.9306789040565491, "logits/rejected": -0.9291335344314575, "logps/chosen": -93.14867401123047, "logps/rejected": -108.41996002197266, "loss": 0.5202, "rewards/accuracies": 0.0, "rewards/chosen": -0.3856353759765625, "rewards/margins": -0.5157852172851562, "rewards/rejected": 0.13014984130859375, "step": 2779 }, { "epoch": 0.62, "learning_rate": 8.111840612466792e-06, "logits/chosen": -1.1431374549865723, "logits/rejected": -1.106716513633728, "logps/chosen": -110.76785278320312, "logps/rejected": -133.1455841064453, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 0.6650169491767883, "rewards/margins": 5.012476444244385, "rewards/rejected": -4.347459316253662, "step": 2780 }, { "epoch": 0.62, "learning_rate": 8.110437513149433e-06, "logits/chosen": -0.9324445128440857, "logits/rejected": -0.8630049824714661, "logps/chosen": -93.16719055175781, "logps/rejected": -68.465576171875, "loss": 0.0923, "rewards/accuracies": 1.0, "rewards/chosen": -1.358758568763733, "rewards/margins": 2.59635066986084, "rewards/rejected": -3.955109119415283, "step": 2781 }, { "epoch": 0.62, "learning_rate": 8.109034014148331e-06, "logits/chosen": -1.1382445096969604, "logits/rejected": -1.1423596143722534, "logps/chosen": -77.23129272460938, "logps/rejected": -134.44155883789062, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 1.1588042974472046, "rewards/margins": 5.597508430480957, "rewards/rejected": -4.438704013824463, "step": 2782 }, { "epoch": 0.62, "learning_rate": 8.107630115643832e-06, "logits/chosen": -0.5827124714851379, "logits/rejected": -0.5846034288406372, "logps/chosen": -192.30352783203125, "logps/rejected": -144.89703369140625, "loss": 0.101, "rewards/accuracies": 1.0, "rewards/chosen": -1.9902251958847046, "rewards/margins": 1.650473952293396, "rewards/rejected": -3.6406991481781006, "step": 2783 }, { "epoch": 0.62, "learning_rate": 8.106225817816333e-06, "logits/chosen": -1.2135189771652222, "logits/rejected": -1.1616746187210083, "logps/chosen": -109.99095153808594, "logps/rejected": -217.36801147460938, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": 0.8115302920341492, "rewards/margins": 5.8582329750061035, "rewards/rejected": -5.046702861785889, "step": 2784 }, { "epoch": 0.62, "learning_rate": 8.104821120846287e-06, "logits/chosen": -1.1546837091445923, "logits/rejected": -1.178303599357605, "logps/chosen": -193.73007202148438, "logps/rejected": -178.2607421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.154118299484253, "rewards/margins": 9.368057250976562, "rewards/rejected": -7.213939189910889, "step": 2785 }, { "epoch": 0.62, "learning_rate": 8.103416024914186e-06, "logits/chosen": -0.7079489827156067, "logits/rejected": -0.7079489827156067, "logps/chosen": -254.50584411621094, "logps/rejected": -254.50584411621094, "loss": 0.3529, "rewards/accuracies": 0.0, "rewards/chosen": -1.320623755455017, "rewards/margins": 0.0, "rewards/rejected": -1.320623755455017, "step": 2786 }, { "epoch": 0.62, "learning_rate": 8.102010530200589e-06, "logits/chosen": -0.7996761798858643, "logits/rejected": -0.7996761798858643, "logps/chosen": -80.97976684570312, "logps/rejected": -80.97976684570312, "loss": 0.5823, "rewards/accuracies": 0.0, "rewards/chosen": -2.1229264736175537, "rewards/margins": 0.0, "rewards/rejected": -2.1229264736175537, "step": 2787 }, { "epoch": 0.62, "learning_rate": 8.100604636886095e-06, "logits/chosen": -0.624870777130127, "logits/rejected": -0.5663647651672363, "logps/chosen": -92.78648376464844, "logps/rejected": -59.572174072265625, "loss": 0.1044, "rewards/accuracies": 1.0, "rewards/chosen": 0.5686798095703125, "rewards/margins": 2.8017518520355225, "rewards/rejected": -2.23307204246521, "step": 2788 }, { "epoch": 0.62, "learning_rate": 8.09919834515136e-06, "logits/chosen": -1.261401891708374, "logits/rejected": -1.2294858694076538, "logps/chosen": -115.2511978149414, "logps/rejected": -175.20156860351562, "loss": 0.2821, "rewards/accuracies": 1.0, "rewards/chosen": -0.5755821466445923, "rewards/margins": 0.27693402767181396, "rewards/rejected": -0.8525161743164062, "step": 2789 }, { "epoch": 0.62, "learning_rate": 8.097791655177085e-06, "logits/chosen": -0.8503904938697815, "logits/rejected": -0.9126407504081726, "logps/chosen": -189.127197265625, "logps/rejected": -139.28924560546875, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 1.1240036487579346, "rewards/margins": 3.941105842590332, "rewards/rejected": -2.8171021938323975, "step": 2790 }, { "epoch": 0.62, "learning_rate": 8.096384567144033e-06, "logits/chosen": -0.9644814729690552, "logits/rejected": -0.9608952403068542, "logps/chosen": -85.27534484863281, "logps/rejected": -77.19084167480469, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.2185112088918686, "rewards/margins": 5.494712829589844, "rewards/rejected": -5.2762017250061035, "step": 2791 }, { "epoch": 0.62, "learning_rate": 8.094977081233006e-06, "logits/chosen": -0.8788627982139587, "logits/rejected": -0.8728294968605042, "logps/chosen": -96.19847106933594, "logps/rejected": -64.79036712646484, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.2340850830078125, "rewards/margins": 4.403134822845459, "rewards/rejected": -4.6372199058532715, "step": 2792 }, { "epoch": 0.62, "learning_rate": 8.093569197624864e-06, "logits/chosen": -1.2516400814056396, "logits/rejected": -1.3608390092849731, "logps/chosen": -208.233154296875, "logps/rejected": -84.21693420410156, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.3321700990200043, "rewards/margins": 3.390031576156616, "rewards/rejected": -3.7222015857696533, "step": 2793 }, { "epoch": 0.62, "learning_rate": 8.092160916500515e-06, "logits/chosen": -1.3046479225158691, "logits/rejected": -1.2556426525115967, "logps/chosen": -96.10399627685547, "logps/rejected": -176.22430419921875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": 0.5101707577705383, "rewards/margins": 6.738969802856445, "rewards/rejected": -6.228798866271973, "step": 2794 }, { "epoch": 0.62, "learning_rate": 8.090752238040925e-06, "logits/chosen": -0.9832186102867126, "logits/rejected": -1.0054312944412231, "logps/chosen": -163.0079803466797, "logps/rejected": -234.13543701171875, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -3.487687826156616, "rewards/margins": 3.6164491176605225, "rewards/rejected": -7.104136943817139, "step": 2795 }, { "epoch": 0.62, "learning_rate": 8.0893431624271e-06, "logits/chosen": -0.938446044921875, "logits/rejected": -0.8396304845809937, "logps/chosen": -173.812255859375, "logps/rejected": -397.79052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.06457214802503586, "rewards/margins": 12.330514907836914, "rewards/rejected": -12.265942573547363, "step": 2796 }, { "epoch": 0.62, "learning_rate": 8.087933689840107e-06, "logits/chosen": -0.9417562484741211, "logits/rejected": -0.9392510056495667, "logps/chosen": -111.63044738769531, "logps/rejected": -159.78652954101562, "loss": 0.106, "rewards/accuracies": 1.0, "rewards/chosen": -0.6895660758018494, "rewards/margins": 1.5141737461090088, "rewards/rejected": -2.203739881515503, "step": 2797 }, { "epoch": 0.62, "learning_rate": 8.086523820461057e-06, "logits/chosen": -0.8350753784179688, "logits/rejected": -0.8539211750030518, "logps/chosen": -196.9188232421875, "logps/rejected": -152.115478515625, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.3625549376010895, "rewards/margins": 4.231370449066162, "rewards/rejected": -4.593925476074219, "step": 2798 }, { "epoch": 0.62, "learning_rate": 8.085113554471115e-06, "logits/chosen": -1.37104332447052, "logits/rejected": -1.3978317975997925, "logps/chosen": -195.7401580810547, "logps/rejected": -124.91029357910156, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": 0.8410568237304688, "rewards/margins": 2.1222991943359375, "rewards/rejected": -1.2812423706054688, "step": 2799 }, { "epoch": 0.62, "learning_rate": 8.083702892051499e-06, "logits/chosen": -0.9222468733787537, "logits/rejected": -0.8585959672927856, "logps/chosen": -71.4325180053711, "logps/rejected": -118.87334442138672, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": -0.7246818542480469, "rewards/margins": 2.8046462535858154, "rewards/rejected": -3.5293281078338623, "step": 2800 }, { "epoch": 0.62, "learning_rate": 8.082291833383475e-06, "logits/chosen": -0.8324397206306458, "logits/rejected": -0.81662517786026, "logps/chosen": -140.80343627929688, "logps/rejected": -159.0235137939453, "loss": 0.1629, "rewards/accuracies": 1.0, "rewards/chosen": -3.87786865234375, "rewards/margins": 0.971196174621582, "rewards/rejected": -4.849064826965332, "step": 2801 }, { "epoch": 0.62, "learning_rate": 8.080880378648359e-06, "logits/chosen": -1.0575257539749146, "logits/rejected": -0.974884569644928, "logps/chosen": -106.5368881225586, "logps/rejected": -246.31622314453125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.21801376342773438, "rewards/margins": 11.416800498962402, "rewards/rejected": -11.198786735534668, "step": 2802 }, { "epoch": 0.62, "learning_rate": 8.079468528027519e-06, "logits/chosen": -1.1223862171173096, "logits/rejected": -1.1223862171173096, "logps/chosen": -180.1956787109375, "logps/rejected": -180.1956787109375, "loss": 0.3555, "rewards/accuracies": 0.0, "rewards/chosen": -1.0092941522598267, "rewards/margins": 0.0, "rewards/rejected": -1.0092941522598267, "step": 2803 }, { "epoch": 0.62, "learning_rate": 8.078056281702378e-06, "logits/chosen": -1.147150993347168, "logits/rejected": -1.148219347000122, "logps/chosen": -235.474609375, "logps/rejected": -181.8879852294922, "loss": 0.5303, "rewards/accuracies": 1.0, "rewards/chosen": 0.22992248833179474, "rewards/margins": 6.960106372833252, "rewards/rejected": -6.730184078216553, "step": 2804 }, { "epoch": 0.62, "learning_rate": 8.076643639854405e-06, "logits/chosen": -1.0987447500228882, "logits/rejected": -1.03190016746521, "logps/chosen": -102.67283630371094, "logps/rejected": -109.33373260498047, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6176964044570923, "rewards/margins": 3.249281406402588, "rewards/rejected": -3.8669776916503906, "step": 2805 }, { "epoch": 0.62, "learning_rate": 8.075230602665118e-06, "logits/chosen": -0.9672781229019165, "logits/rejected": -1.037273645401001, "logps/chosen": -121.9936752319336, "logps/rejected": -104.01498413085938, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -2.2736594676971436, "rewards/margins": 4.001996040344238, "rewards/rejected": -6.275655269622803, "step": 2806 }, { "epoch": 0.62, "learning_rate": 8.073817170316093e-06, "logits/chosen": -0.7792693376541138, "logits/rejected": -0.8019323348999023, "logps/chosen": -180.848388671875, "logps/rejected": -186.17715454101562, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -2.2873475551605225, "rewards/margins": 4.285223007202148, "rewards/rejected": -6.57257080078125, "step": 2807 }, { "epoch": 0.62, "learning_rate": 8.07240334298895e-06, "logits/chosen": -0.8622283339500427, "logits/rejected": -0.888141930103302, "logps/chosen": -214.88851928710938, "logps/rejected": -87.59259033203125, "loss": 0.0842, "rewards/accuracies": 1.0, "rewards/chosen": 1.427754282951355, "rewards/margins": 1.6993820667266846, "rewards/rejected": -0.271627813577652, "step": 2808 }, { "epoch": 0.62, "learning_rate": 8.070989120865362e-06, "logits/chosen": -0.43950164318084717, "logits/rejected": -0.49176153540611267, "logps/chosen": -126.74630737304688, "logps/rejected": -245.418701171875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.08762665092945099, "rewards/margins": 12.761537551879883, "rewards/rejected": -12.849164009094238, "step": 2809 }, { "epoch": 0.62, "learning_rate": 8.069574504127058e-06, "logits/chosen": -1.0193119049072266, "logits/rejected": -1.0190707445144653, "logps/chosen": -88.72962951660156, "logps/rejected": -149.81817626953125, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.8149765133857727, "rewards/margins": 4.400890827178955, "rewards/rejected": -5.215867519378662, "step": 2810 }, { "epoch": 0.62, "learning_rate": 8.068159492955806e-06, "logits/chosen": -0.9268998503684998, "logits/rejected": -0.7471007108688354, "logps/chosen": -132.95388793945312, "logps/rejected": -247.02040100097656, "loss": 2.1251, "rewards/accuracies": 0.0, "rewards/chosen": -6.205766201019287, "rewards/margins": -4.151156425476074, "rewards/rejected": -2.054609775543213, "step": 2811 }, { "epoch": 0.62, "learning_rate": 8.066744087533436e-06, "logits/chosen": -0.9724919199943542, "logits/rejected": -0.9241620302200317, "logps/chosen": -122.89747619628906, "logps/rejected": -236.93519592285156, "loss": 0.0425, "rewards/accuracies": 1.0, "rewards/chosen": -2.210186004638672, "rewards/margins": 7.956347465515137, "rewards/rejected": -10.166533470153809, "step": 2812 }, { "epoch": 0.62, "learning_rate": 8.065328288041823e-06, "logits/chosen": -1.0001699924468994, "logits/rejected": -1.00102698802948, "logps/chosen": -153.6795654296875, "logps/rejected": -79.15044403076172, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 2.1577956676483154, "rewards/margins": 6.798366546630859, "rewards/rejected": -4.640570640563965, "step": 2813 }, { "epoch": 0.62, "learning_rate": 8.063912094662893e-06, "logits/chosen": -0.8426228165626526, "logits/rejected": -0.8373627066612244, "logps/chosen": -111.75991821289062, "logps/rejected": -105.3819580078125, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": 0.5352874994277954, "rewards/margins": 4.864394187927246, "rewards/rejected": -4.32910680770874, "step": 2814 }, { "epoch": 0.62, "learning_rate": 8.062495507578628e-06, "logits/chosen": -0.8594239950180054, "logits/rejected": -0.7852660417556763, "logps/chosen": -201.01849365234375, "logps/rejected": -126.47461700439453, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": 2.4679017066955566, "rewards/margins": 6.153652191162109, "rewards/rejected": -3.685750722885132, "step": 2815 }, { "epoch": 0.62, "learning_rate": 8.061078526971048e-06, "logits/chosen": -1.5991370677947998, "logits/rejected": -1.4694737195968628, "logps/chosen": -159.47171020507812, "logps/rejected": -236.8721160888672, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.08030243217945099, "rewards/margins": 5.800958156585693, "rewards/rejected": -5.720655918121338, "step": 2816 }, { "epoch": 0.62, "learning_rate": 8.059661153022236e-06, "logits/chosen": -0.7453880310058594, "logits/rejected": -0.7944506406784058, "logps/chosen": -166.13633728027344, "logps/rejected": -125.97750854492188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 3.3349578380584717, "rewards/margins": 6.977179527282715, "rewards/rejected": -3.6422219276428223, "step": 2817 }, { "epoch": 0.62, "learning_rate": 8.058243385914324e-06, "logits/chosen": -0.7400029897689819, "logits/rejected": -0.7283238172531128, "logps/chosen": -111.67362976074219, "logps/rejected": -149.70108032226562, "loss": 0.0481, "rewards/accuracies": 1.0, "rewards/chosen": -3.5065019130706787, "rewards/margins": 2.586120367050171, "rewards/rejected": -6.09262228012085, "step": 2818 }, { "epoch": 0.62, "learning_rate": 8.056825225829486e-06, "logits/chosen": -1.1192988157272339, "logits/rejected": -1.1683155298233032, "logps/chosen": -218.9146270751953, "logps/rejected": -106.44155883789062, "loss": 0.1562, "rewards/accuracies": 1.0, "rewards/chosen": -0.9523483514785767, "rewards/margins": 1.0051536560058594, "rewards/rejected": -1.957502007484436, "step": 2819 }, { "epoch": 0.62, "learning_rate": 8.055406672949957e-06, "logits/chosen": -1.1781806945800781, "logits/rejected": -1.021793007850647, "logps/chosen": -120.95361328125, "logps/rejected": -307.895263671875, "loss": 0.2152, "rewards/accuracies": 1.0, "rewards/chosen": -0.7849746942520142, "rewards/margins": 0.6202437877655029, "rewards/rejected": -1.405218482017517, "step": 2820 }, { "epoch": 0.62, "learning_rate": 8.053987727458013e-06, "logits/chosen": -1.0023521184921265, "logits/rejected": -0.996345579624176, "logps/chosen": -110.56251525878906, "logps/rejected": -150.88479614257812, "loss": 0.1363, "rewards/accuracies": 1.0, "rewards/chosen": -0.46350860595703125, "rewards/margins": 1.235321044921875, "rewards/rejected": -1.6988296508789062, "step": 2821 }, { "epoch": 0.62, "learning_rate": 8.05256838953599e-06, "logits/chosen": -1.0347652435302734, "logits/rejected": -1.057936668395996, "logps/chosen": -199.82171630859375, "logps/rejected": -173.532958984375, "loss": 0.0673, "rewards/accuracies": 1.0, "rewards/chosen": -0.946636974811554, "rewards/margins": 1.9368836879730225, "rewards/rejected": -2.8835206031799316, "step": 2822 }, { "epoch": 0.62, "learning_rate": 8.051148659366265e-06, "logits/chosen": -0.8341772556304932, "logits/rejected": -0.8069897890090942, "logps/chosen": -149.7301788330078, "logps/rejected": -193.86248779296875, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": 0.3550003170967102, "rewards/margins": 3.578448534011841, "rewards/rejected": -3.2234482765197754, "step": 2823 }, { "epoch": 0.63, "learning_rate": 8.049728537131275e-06, "logits/chosen": -0.8012943267822266, "logits/rejected": -0.57711261510849, "logps/chosen": -125.13243103027344, "logps/rejected": -501.2882385253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.929835557937622, "rewards/margins": 9.650389671325684, "rewards/rejected": -6.720553874969482, "step": 2824 }, { "epoch": 0.63, "learning_rate": 8.048308023013498e-06, "logits/chosen": -0.9328866600990295, "logits/rejected": -0.9025569558143616, "logps/chosen": -97.01948547363281, "logps/rejected": -192.76324462890625, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -0.5613495111465454, "rewards/margins": 6.9263529777526855, "rewards/rejected": -7.487702369689941, "step": 2825 }, { "epoch": 0.63, "learning_rate": 8.046887117195467e-06, "logits/chosen": -1.1786094903945923, "logits/rejected": -1.1773630380630493, "logps/chosen": -160.20962524414062, "logps/rejected": -121.744384765625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.3592376708984375, "rewards/margins": 4.552952766418457, "rewards/rejected": -4.9121904373168945, "step": 2826 }, { "epoch": 0.63, "learning_rate": 8.045465819859766e-06, "logits/chosen": -0.8444434404373169, "logits/rejected": -0.8444434404373169, "logps/chosen": -120.53794860839844, "logps/rejected": -120.53794860839844, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.9063987731933594, "rewards/margins": 0.0, "rewards/rejected": -2.9063987731933594, "step": 2827 }, { "epoch": 0.63, "learning_rate": 8.044044131189029e-06, "logits/chosen": -1.1619597673416138, "logits/rejected": -1.1659523248672485, "logps/chosen": -127.24562072753906, "logps/rejected": -123.01832580566406, "loss": 0.0479, "rewards/accuracies": 1.0, "rewards/chosen": 0.879040539264679, "rewards/margins": 2.3012237548828125, "rewards/rejected": -1.4221832752227783, "step": 2828 }, { "epoch": 0.63, "learning_rate": 8.042622051365938e-06, "logits/chosen": -0.9468860626220703, "logits/rejected": -0.84557044506073, "logps/chosen": -185.73008728027344, "logps/rejected": -201.47640991210938, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 1.5845520496368408, "rewards/margins": 4.321023941040039, "rewards/rejected": -2.736471652984619, "step": 2829 }, { "epoch": 0.63, "learning_rate": 8.041199580573229e-06, "logits/chosen": -0.8096157908439636, "logits/rejected": -0.8196871876716614, "logps/chosen": -133.14393615722656, "logps/rejected": -109.62653350830078, "loss": 0.0425, "rewards/accuracies": 1.0, "rewards/chosen": 0.5221115350723267, "rewards/margins": 2.8241934776306152, "rewards/rejected": -2.302082061767578, "step": 2830 }, { "epoch": 0.63, "learning_rate": 8.039776718993683e-06, "logits/chosen": -0.9885366559028625, "logits/rejected": -1.0697113275527954, "logps/chosen": -212.64114379882812, "logps/rejected": -76.64944458007812, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": 0.6558181643486023, "rewards/margins": 4.918278217315674, "rewards/rejected": -4.262460231781006, "step": 2831 }, { "epoch": 0.63, "learning_rate": 8.038353466810137e-06, "logits/chosen": -0.9070435762405396, "logits/rejected": -0.8750612139701843, "logps/chosen": -157.58065795898438, "logps/rejected": -192.52764892578125, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.5959869623184204, "rewards/margins": 3.776272773742676, "rewards/rejected": -5.372259616851807, "step": 2832 }, { "epoch": 0.63, "learning_rate": 8.036929824205476e-06, "logits/chosen": -1.189699411392212, "logits/rejected": -1.1934964656829834, "logps/chosen": -96.09732055664062, "logps/rejected": -117.62199401855469, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -0.009173584170639515, "rewards/margins": 3.363905429840088, "rewards/rejected": -3.3730790615081787, "step": 2833 }, { "epoch": 0.63, "learning_rate": 8.03550579136263e-06, "logits/chosen": -1.2169733047485352, "logits/rejected": -1.250091314315796, "logps/chosen": -163.38076782226562, "logps/rejected": -129.24444580078125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.04015503078699112, "rewards/margins": 5.677102565765381, "rewards/rejected": -5.6369476318359375, "step": 2834 }, { "epoch": 0.63, "learning_rate": 8.03408136846459e-06, "logits/chosen": -1.3156404495239258, "logits/rejected": -1.2456351518630981, "logps/chosen": -91.89593505859375, "logps/rejected": -145.22918701171875, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -0.676129162311554, "rewards/margins": 3.048065185546875, "rewards/rejected": -3.724194288253784, "step": 2835 }, { "epoch": 0.63, "learning_rate": 8.032656555694388e-06, "logits/chosen": -1.2581698894500732, "logits/rejected": -1.117400884628296, "logps/chosen": -137.97323608398438, "logps/rejected": -299.840087890625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.17355041205883026, "rewards/margins": 6.2311859130859375, "rewards/rejected": -6.404736518859863, "step": 2836 }, { "epoch": 0.63, "learning_rate": 8.031231353235104e-06, "logits/chosen": -0.9535568356513977, "logits/rejected": -0.9918332099914551, "logps/chosen": -249.39395141601562, "logps/rejected": -179.36062622070312, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -3.273935079574585, "rewards/margins": 3.1487228870391846, "rewards/rejected": -6.4226579666137695, "step": 2837 }, { "epoch": 0.63, "learning_rate": 8.029805761269881e-06, "logits/chosen": -0.8003710508346558, "logits/rejected": -0.8109464049339294, "logps/chosen": -106.52740478515625, "logps/rejected": -66.86029052734375, "loss": 0.4013, "rewards/accuracies": 1.0, "rewards/chosen": -3.7936418056488037, "rewards/margins": 0.15127015113830566, "rewards/rejected": -3.9449119567871094, "step": 2838 }, { "epoch": 0.63, "learning_rate": 8.028379779981902e-06, "logits/chosen": -1.4096754789352417, "logits/rejected": -1.3090234994888306, "logps/chosen": -79.990234375, "logps/rejected": -235.56768798828125, "loss": 0.0774, "rewards/accuracies": 1.0, "rewards/chosen": -0.3069198727607727, "rewards/margins": 5.34519624710083, "rewards/rejected": -5.652116298675537, "step": 2839 }, { "epoch": 0.63, "learning_rate": 8.026953409554402e-06, "logits/chosen": -0.8872115612030029, "logits/rejected": -0.950445294380188, "logps/chosen": -256.9634704589844, "logps/rejected": -88.14158630371094, "loss": 0.3013, "rewards/accuracies": 1.0, "rewards/chosen": -0.875537097454071, "rewards/margins": 0.19000250101089478, "rewards/rejected": -1.0655395984649658, "step": 2840 }, { "epoch": 0.63, "learning_rate": 8.025526650170665e-06, "logits/chosen": -0.8640393018722534, "logits/rejected": -0.8635578751564026, "logps/chosen": -63.94364929199219, "logps/rejected": -91.22863006591797, "loss": 0.3751, "rewards/accuracies": 0.0, "rewards/chosen": -1.7906761169433594, "rewards/margins": -0.10854637622833252, "rewards/rejected": -1.6821297407150269, "step": 2841 }, { "epoch": 0.63, "learning_rate": 8.024099502014024e-06, "logits/chosen": -1.2047680616378784, "logits/rejected": -1.2532577514648438, "logps/chosen": -155.03562927246094, "logps/rejected": -86.74828338623047, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -2.2029082775115967, "rewards/margins": 3.0397136211395264, "rewards/rejected": -5.242621898651123, "step": 2842 }, { "epoch": 0.63, "learning_rate": 8.02267196526787e-06, "logits/chosen": -0.8015134334564209, "logits/rejected": -0.7985497117042542, "logps/chosen": -216.47378540039062, "logps/rejected": -261.8970947265625, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": 2.1190078258514404, "rewards/margins": 3.8502440452575684, "rewards/rejected": -1.7312363386154175, "step": 2843 }, { "epoch": 0.63, "learning_rate": 8.021244040115634e-06, "logits/chosen": -0.5498713850975037, "logits/rejected": -0.4910743832588196, "logps/chosen": -126.67200469970703, "logps/rejected": -167.81716918945312, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": 1.5386269092559814, "rewards/margins": 5.074723243713379, "rewards/rejected": -3.5360963344573975, "step": 2844 }, { "epoch": 0.63, "learning_rate": 8.019815726740801e-06, "logits/chosen": -1.092468500137329, "logits/rejected": -1.156757116317749, "logps/chosen": -156.986572265625, "logps/rejected": -169.4302215576172, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": 0.5893722772598267, "rewards/margins": 7.08001708984375, "rewards/rejected": -6.490644931793213, "step": 2845 }, { "epoch": 0.63, "learning_rate": 8.018387025326906e-06, "logits/chosen": -0.9126536250114441, "logits/rejected": -0.9150182008743286, "logps/chosen": -77.46072387695312, "logps/rejected": -126.22355651855469, "loss": 0.3542, "rewards/accuracies": 1.0, "rewards/chosen": -1.9988666772842407, "rewards/margins": 4.169460773468018, "rewards/rejected": -6.168327331542969, "step": 2846 }, { "epoch": 0.63, "learning_rate": 8.016957936057535e-06, "logits/chosen": -1.0438693761825562, "logits/rejected": -1.0507711172103882, "logps/chosen": -81.10321807861328, "logps/rejected": -98.7777099609375, "loss": 0.4225, "rewards/accuracies": 0.0, "rewards/chosen": -0.8833793997764587, "rewards/margins": -0.2835090756416321, "rewards/rejected": -0.5998703241348267, "step": 2847 }, { "epoch": 0.63, "learning_rate": 8.015528459116321e-06, "logits/chosen": -1.0039006471633911, "logits/rejected": -1.0297354459762573, "logps/chosen": -138.63352966308594, "logps/rejected": -85.78962707519531, "loss": 0.5885, "rewards/accuracies": 0.0, "rewards/chosen": -5.94280481338501, "rewards/margins": -0.8004617691040039, "rewards/rejected": -5.142343044281006, "step": 2848 }, { "epoch": 0.63, "learning_rate": 8.014098594686951e-06, "logits/chosen": -0.7522710561752319, "logits/rejected": -0.6397482752799988, "logps/chosen": -173.52093505859375, "logps/rejected": -249.5596923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 5.172996520996094, "rewards/margins": 14.975520133972168, "rewards/rejected": -9.802523612976074, "step": 2849 }, { "epoch": 0.63, "learning_rate": 8.012668342953155e-06, "logits/chosen": -1.092609167098999, "logits/rejected": -1.0618878602981567, "logps/chosen": -53.65065002441406, "logps/rejected": -197.5699005126953, "loss": 1.6596, "rewards/accuracies": 1.0, "rewards/chosen": -1.3955726623535156, "rewards/margins": 0.9976785182952881, "rewards/rejected": -2.3932511806488037, "step": 2850 }, { "epoch": 0.63, "learning_rate": 8.011237704098721e-06, "logits/chosen": -0.7240099310874939, "logits/rejected": -0.7240099310874939, "logps/chosen": -218.183837890625, "logps/rejected": -218.183837890625, "loss": 0.3548, "rewards/accuracies": 0.0, "rewards/chosen": -10.211651802062988, "rewards/margins": 0.0, "rewards/rejected": -10.211651802062988, "step": 2851 }, { "epoch": 0.63, "learning_rate": 8.00980667830748e-06, "logits/chosen": -1.2109239101409912, "logits/rejected": -1.2399622201919556, "logps/chosen": -109.33435821533203, "logps/rejected": -81.72067260742188, "loss": 0.0896, "rewards/accuracies": 1.0, "rewards/chosen": -0.8804527521133423, "rewards/margins": 1.627902626991272, "rewards/rejected": -2.5083553791046143, "step": 2852 }, { "epoch": 0.63, "learning_rate": 8.008375265763317e-06, "logits/chosen": -1.1112765073776245, "logits/rejected": -1.0582211017608643, "logps/chosen": -101.25777435302734, "logps/rejected": -279.007080078125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0782005786895752, "rewards/margins": 10.722843170166016, "rewards/rejected": -11.801043510437012, "step": 2853 }, { "epoch": 0.63, "learning_rate": 8.006943466650163e-06, "logits/chosen": -0.45705974102020264, "logits/rejected": -0.45705974102020264, "logps/chosen": -69.57119750976562, "logps/rejected": -69.57119750976562, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.017148017883301, "rewards/margins": 0.0, "rewards/rejected": -4.017148017883301, "step": 2854 }, { "epoch": 0.63, "learning_rate": 8.005511281152004e-06, "logits/chosen": -0.6254395842552185, "logits/rejected": -0.6144183278083801, "logps/chosen": -167.10585021972656, "logps/rejected": -173.88011169433594, "loss": 1.3095, "rewards/accuracies": 0.0, "rewards/chosen": -0.8428894281387329, "rewards/margins": -2.5432159900665283, "rewards/rejected": 1.7003265619277954, "step": 2855 }, { "epoch": 0.63, "learning_rate": 8.004078709452869e-06, "logits/chosen": -1.2400740385055542, "logits/rejected": -1.3069608211517334, "logps/chosen": -107.74275207519531, "logps/rejected": -49.85279846191406, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": 0.024497224017977715, "rewards/margins": 2.171332597732544, "rewards/rejected": -2.1468353271484375, "step": 2856 }, { "epoch": 0.63, "learning_rate": 8.002645751736841e-06, "logits/chosen": -0.7098968625068665, "logits/rejected": -0.7068449258804321, "logps/chosen": -201.7366180419922, "logps/rejected": -247.6811981201172, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": 1.4417022466659546, "rewards/margins": 4.5147385597229, "rewards/rejected": -3.0730361938476562, "step": 2857 }, { "epoch": 0.63, "learning_rate": 8.001212408188052e-06, "logits/chosen": -0.8852559328079224, "logits/rejected": -0.8852559328079224, "logps/chosen": -186.62864685058594, "logps/rejected": -186.62864685058594, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -4.159213542938232, "rewards/margins": 0.0, "rewards/rejected": -4.159213542938232, "step": 2858 }, { "epoch": 0.63, "learning_rate": 7.999778678990685e-06, "logits/chosen": -1.027229905128479, "logits/rejected": -0.9434059858322144, "logps/chosen": -49.3739128112793, "logps/rejected": -124.3885269165039, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.4981178343296051, "rewards/margins": 6.394935607910156, "rewards/rejected": -5.896817684173584, "step": 2859 }, { "epoch": 0.63, "learning_rate": 7.998344564328967e-06, "logits/chosen": -1.2568520307540894, "logits/rejected": -1.2895621061325073, "logps/chosen": -97.25065612792969, "logps/rejected": -103.12569427490234, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": 0.9910416007041931, "rewards/margins": 5.047002792358398, "rewards/rejected": -4.0559611320495605, "step": 2860 }, { "epoch": 0.63, "learning_rate": 7.996910064387181e-06, "logits/chosen": -0.7989429235458374, "logits/rejected": -0.7327091693878174, "logps/chosen": -84.82769012451172, "logps/rejected": -168.26258850097656, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": 0.6015540957450867, "rewards/margins": 4.257235050201416, "rewards/rejected": -3.6556808948516846, "step": 2861 }, { "epoch": 0.63, "learning_rate": 7.995475179349657e-06, "logits/chosen": -0.7873143553733826, "logits/rejected": -0.8124932646751404, "logps/chosen": -261.68817138671875, "logps/rejected": -246.35853576660156, "loss": 0.1052, "rewards/accuracies": 1.0, "rewards/chosen": 2.0979371070861816, "rewards/margins": 1.4519394636154175, "rewards/rejected": 0.6459976434707642, "step": 2862 }, { "epoch": 0.63, "learning_rate": 7.994039909400773e-06, "logits/chosen": -1.0663785934448242, "logits/rejected": -1.0282150506973267, "logps/chosen": -119.19557189941406, "logps/rejected": -196.8944854736328, "loss": 0.744, "rewards/accuracies": 1.0, "rewards/chosen": -0.8058013916015625, "rewards/margins": 9.158368110656738, "rewards/rejected": -9.9641695022583, "step": 2863 }, { "epoch": 0.63, "learning_rate": 7.992604254724957e-06, "logits/chosen": -0.9342334270477295, "logits/rejected": -0.887675404548645, "logps/chosen": -70.94000244140625, "logps/rejected": -123.56875610351562, "loss": 0.6034, "rewards/accuracies": 1.0, "rewards/chosen": -1.1105461120605469, "rewards/margins": 4.184964656829834, "rewards/rejected": -5.295510768890381, "step": 2864 }, { "epoch": 0.63, "learning_rate": 7.991168215506688e-06, "logits/chosen": -0.7767302989959717, "logits/rejected": -0.761718213558197, "logps/chosen": -166.42449951171875, "logps/rejected": -114.4149398803711, "loss": 0.1147, "rewards/accuracies": 1.0, "rewards/chosen": -2.0149292945861816, "rewards/margins": 1.866140604019165, "rewards/rejected": -3.8810698986053467, "step": 2865 }, { "epoch": 0.63, "learning_rate": 7.989731791930497e-06, "logits/chosen": -0.5382991433143616, "logits/rejected": -0.5447971820831299, "logps/chosen": -188.294189453125, "logps/rejected": -130.8105010986328, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.6603683829307556, "rewards/margins": 3.6518030166625977, "rewards/rejected": -4.312171459197998, "step": 2866 }, { "epoch": 0.63, "learning_rate": 7.988294984180956e-06, "logits/chosen": -0.9177500605583191, "logits/rejected": -0.9077668786048889, "logps/chosen": -100.13629150390625, "logps/rejected": -101.53516387939453, "loss": 0.2522, "rewards/accuracies": 1.0, "rewards/chosen": -0.5232376456260681, "rewards/margins": 0.43207472562789917, "rewards/rejected": -0.9553123712539673, "step": 2867 }, { "epoch": 0.63, "learning_rate": 7.986857792442692e-06, "logits/chosen": -1.0670326948165894, "logits/rejected": -0.848908543586731, "logps/chosen": -111.59967041015625, "logps/rejected": -342.9423522949219, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 2.0177292823791504, "rewards/margins": 16.544166564941406, "rewards/rejected": -14.526437759399414, "step": 2868 }, { "epoch": 0.64, "learning_rate": 7.985420216900384e-06, "logits/chosen": -0.9520381689071655, "logits/rejected": -1.0286638736724854, "logps/chosen": -197.33392333984375, "logps/rejected": -124.1670150756836, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 1.588653564453125, "rewards/margins": 6.778873443603516, "rewards/rejected": -5.190219879150391, "step": 2869 }, { "epoch": 0.64, "learning_rate": 7.983982257738752e-06, "logits/chosen": -1.1364641189575195, "logits/rejected": -1.1709195375442505, "logps/chosen": -143.0694580078125, "logps/rejected": -120.1878662109375, "loss": 0.6104, "rewards/accuracies": 0.0, "rewards/chosen": -0.44177553057670593, "rewards/margins": -0.869976818561554, "rewards/rejected": 0.428201287984848, "step": 2870 }, { "epoch": 0.64, "learning_rate": 7.982543915142575e-06, "logits/chosen": -1.2826416492462158, "logits/rejected": -1.1794053316116333, "logps/chosen": -163.86947631835938, "logps/rejected": -223.39187622070312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 1.9070205688476562, "rewards/margins": 5.876234531402588, "rewards/rejected": -3.9692139625549316, "step": 2871 }, { "epoch": 0.64, "learning_rate": 7.981105189296676e-06, "logits/chosen": -1.053369402885437, "logits/rejected": -1.0774790048599243, "logps/chosen": -147.61703491210938, "logps/rejected": -114.40838623046875, "loss": 0.0867, "rewards/accuracies": 1.0, "rewards/chosen": -5.727622985839844, "rewards/margins": 1.6908202171325684, "rewards/rejected": -7.418443202972412, "step": 2872 }, { "epoch": 0.64, "learning_rate": 7.979666080385923e-06, "logits/chosen": -1.066866159439087, "logits/rejected": -1.0698801279067993, "logps/chosen": -133.2027587890625, "logps/rejected": -162.26882934570312, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.7869591116905212, "rewards/margins": 6.89492130279541, "rewards/rejected": -7.681880474090576, "step": 2873 }, { "epoch": 0.64, "learning_rate": 7.978226588595245e-06, "logits/chosen": -0.5190832614898682, "logits/rejected": -0.5186574459075928, "logps/chosen": -93.71533203125, "logps/rejected": -110.26884460449219, "loss": 0.5928, "rewards/accuracies": 0.0, "rewards/chosen": -0.7348266839981079, "rewards/margins": -0.5514450073242188, "rewards/rejected": -0.18338166177272797, "step": 2874 }, { "epoch": 0.64, "learning_rate": 7.976786714109608e-06, "logits/chosen": -0.8127195239067078, "logits/rejected": -0.7487823963165283, "logps/chosen": -88.34721374511719, "logps/rejected": -156.22955322265625, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -0.04176178202033043, "rewards/margins": 3.399491786956787, "rewards/rejected": -3.441253662109375, "step": 2875 }, { "epoch": 0.64, "learning_rate": 7.975346457114034e-06, "logits/chosen": -1.183523416519165, "logits/rejected": -1.2339354753494263, "logps/chosen": -165.5660858154297, "logps/rejected": -151.05801391601562, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 1.801599144935608, "rewards/margins": 6.8276166915893555, "rewards/rejected": -5.026017665863037, "step": 2876 }, { "epoch": 0.64, "learning_rate": 7.973905817793594e-06, "logits/chosen": -0.6206070780754089, "logits/rejected": -0.6175100207328796, "logps/chosen": -92.72251892089844, "logps/rejected": -118.43910217285156, "loss": 0.093, "rewards/accuracies": 1.0, "rewards/chosen": -0.197998046875, "rewards/margins": 1.7286102771759033, "rewards/rejected": -1.9266083240509033, "step": 2877 }, { "epoch": 0.64, "learning_rate": 7.972464796333408e-06, "logits/chosen": -0.9410565495491028, "logits/rejected": -0.9389843344688416, "logps/chosen": -102.6287841796875, "logps/rejected": -133.75177001953125, "loss": 0.5479, "rewards/accuracies": 0.0, "rewards/chosen": 0.8909698724746704, "rewards/margins": -0.6792739629745483, "rewards/rejected": 1.5702438354492188, "step": 2878 }, { "epoch": 0.64, "learning_rate": 7.971023392918637e-06, "logits/chosen": -1.271919846534729, "logits/rejected": -1.217373013496399, "logps/chosen": -125.59852600097656, "logps/rejected": -178.7427215576172, "loss": 0.0847, "rewards/accuracies": 1.0, "rewards/chosen": 1.7557541131973267, "rewards/margins": 5.821627616882324, "rewards/rejected": -4.065873622894287, "step": 2879 }, { "epoch": 0.64, "learning_rate": 7.969581607734504e-06, "logits/chosen": -0.7589253187179565, "logits/rejected": -0.7477127909660339, "logps/chosen": -151.76564025878906, "logps/rejected": -100.59441375732422, "loss": 0.1219, "rewards/accuracies": 1.0, "rewards/chosen": -2.439505100250244, "rewards/margins": 1.2905833721160889, "rewards/rejected": -3.730088472366333, "step": 2880 }, { "epoch": 0.64, "learning_rate": 7.968139440966271e-06, "logits/chosen": -0.7811911106109619, "logits/rejected": -0.7826316356658936, "logps/chosen": -124.9852523803711, "logps/rejected": -151.8544921875, "loss": 0.6918, "rewards/accuracies": 0.0, "rewards/chosen": -3.9508132934570312, "rewards/margins": -1.092221736907959, "rewards/rejected": -2.8585915565490723, "step": 2881 }, { "epoch": 0.64, "learning_rate": 7.966696892799257e-06, "logits/chosen": -1.1240804195404053, "logits/rejected": -1.1048792600631714, "logps/chosen": -118.781982421875, "logps/rejected": -135.53643798828125, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": -0.6287811398506165, "rewards/margins": 2.809095859527588, "rewards/rejected": -3.4378769397735596, "step": 2882 }, { "epoch": 0.64, "learning_rate": 7.965253963418825e-06, "logits/chosen": -1.3903740644454956, "logits/rejected": -1.2406933307647705, "logps/chosen": -89.9962158203125, "logps/rejected": -271.6031799316406, "loss": 1.079, "rewards/accuracies": 0.0, "rewards/chosen": -1.1781914234161377, "rewards/margins": -1.9714164733886719, "rewards/rejected": 0.793225109577179, "step": 2883 }, { "epoch": 0.64, "learning_rate": 7.963810653010385e-06, "logits/chosen": -1.0620454549789429, "logits/rejected": -1.0270835161209106, "logps/chosen": -113.89263153076172, "logps/rejected": -165.69149780273438, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": -0.47451552748680115, "rewards/margins": 3.461402177810669, "rewards/rejected": -3.935917615890503, "step": 2884 }, { "epoch": 0.64, "learning_rate": 7.962366961759402e-06, "logits/chosen": -0.9751052856445312, "logits/rejected": -0.9963207244873047, "logps/chosen": -146.98257446289062, "logps/rejected": -143.16717529296875, "loss": 0.0988, "rewards/accuracies": 1.0, "rewards/chosen": -2.6378371715545654, "rewards/margins": 1.5219805240631104, "rewards/rejected": -4.159817695617676, "step": 2885 }, { "epoch": 0.64, "learning_rate": 7.960922889851386e-06, "logits/chosen": -1.2349244356155396, "logits/rejected": -1.216342806816101, "logps/chosen": -81.39447784423828, "logps/rejected": -126.72515869140625, "loss": 0.127, "rewards/accuracies": 1.0, "rewards/chosen": -0.943067193031311, "rewards/margins": 1.3478232622146606, "rewards/rejected": -2.2908904552459717, "step": 2886 }, { "epoch": 0.64, "learning_rate": 7.959478437471894e-06, "logits/chosen": -0.7952301502227783, "logits/rejected": -0.7952301502227783, "logps/chosen": -115.97090911865234, "logps/rejected": -115.97090911865234, "loss": 0.3478, "rewards/accuracies": 0.0, "rewards/chosen": -1.442785620689392, "rewards/margins": 0.0, "rewards/rejected": -1.442785620689392, "step": 2887 }, { "epoch": 0.64, "learning_rate": 7.95803360480654e-06, "logits/chosen": -0.8288241028785706, "logits/rejected": -0.7370126843452454, "logps/chosen": -86.57832336425781, "logps/rejected": -221.28518676757812, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": 0.2746841609477997, "rewards/margins": 4.329156398773193, "rewards/rejected": -4.05447244644165, "step": 2888 }, { "epoch": 0.64, "learning_rate": 7.956588392040978e-06, "logits/chosen": -0.8875437378883362, "logits/rejected": -0.8629834651947021, "logps/chosen": -181.06304931640625, "logps/rejected": -141.693603515625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 4.957557678222656, "rewards/margins": 6.799736022949219, "rewards/rejected": -1.8421783447265625, "step": 2889 }, { "epoch": 0.64, "learning_rate": 7.955142799360914e-06, "logits/chosen": -0.7812393307685852, "logits/rejected": -0.49388831853866577, "logps/chosen": -166.03494262695312, "logps/rejected": -709.9364624023438, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.69342041015625, "rewards/margins": 56.79972457885742, "rewards/rejected": -58.49314498901367, "step": 2890 }, { "epoch": 0.64, "learning_rate": 7.953696826952106e-06, "logits/chosen": -0.58254474401474, "logits/rejected": -0.5083381533622742, "logps/chosen": -197.7783203125, "logps/rejected": -333.331787109375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.7177978754043579, "rewards/margins": 8.16992473602295, "rewards/rejected": -8.887722969055176, "step": 2891 }, { "epoch": 0.64, "learning_rate": 7.952250475000354e-06, "logits/chosen": -0.7225756049156189, "logits/rejected": -0.7098877429962158, "logps/chosen": -98.61788940429688, "logps/rejected": -116.91386413574219, "loss": 0.103, "rewards/accuracies": 1.0, "rewards/chosen": -1.6416946649551392, "rewards/margins": 1.4856690168380737, "rewards/rejected": -3.127363681793213, "step": 2892 }, { "epoch": 0.64, "learning_rate": 7.950803743691516e-06, "logits/chosen": -1.0207695960998535, "logits/rejected": -0.7413634061813354, "logps/chosen": -177.1480712890625, "logps/rejected": -709.0965576171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.8506195545196533, "rewards/margins": 46.885597229003906, "rewards/rejected": -48.7362174987793, "step": 2893 }, { "epoch": 0.64, "learning_rate": 7.949356633211487e-06, "logits/chosen": -1.1453429460525513, "logits/rejected": -1.1398169994354248, "logps/chosen": -91.3144760131836, "logps/rejected": -76.88851928710938, "loss": 0.0342, "rewards/accuracies": 1.0, "rewards/chosen": -2.3566415309906006, "rewards/margins": 2.67488694190979, "rewards/rejected": -5.031528472900391, "step": 2894 }, { "epoch": 0.64, "learning_rate": 7.947909143746221e-06, "logits/chosen": -0.6934060454368591, "logits/rejected": -0.7267675399780273, "logps/chosen": -203.8459930419922, "logps/rejected": -234.88609313964844, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": 1.9505356550216675, "rewards/margins": 3.899838447570801, "rewards/rejected": -1.9493026733398438, "step": 2895 }, { "epoch": 0.64, "learning_rate": 7.946461275481719e-06, "logits/chosen": -0.6363298296928406, "logits/rejected": -0.2748833894729614, "logps/chosen": -103.46422576904297, "logps/rejected": -590.739990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1788886785507202, "rewards/margins": 26.714427947998047, "rewards/rejected": -27.8933162689209, "step": 2896 }, { "epoch": 0.64, "learning_rate": 7.945013028604026e-06, "logits/chosen": -1.6541637182235718, "logits/rejected": -1.8112019300460815, "logps/chosen": -224.39663696289062, "logps/rejected": -244.88885498046875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.0858917236328125, "rewards/margins": 6.590368747711182, "rewards/rejected": -6.676260471343994, "step": 2897 }, { "epoch": 0.64, "learning_rate": 7.943564403299238e-06, "logits/chosen": -1.0455565452575684, "logits/rejected": -1.1291881799697876, "logps/chosen": -260.61273193359375, "logps/rejected": -149.4307861328125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 2.5282745361328125, "rewards/margins": 6.784079074859619, "rewards/rejected": -4.255804538726807, "step": 2898 }, { "epoch": 0.64, "learning_rate": 7.9421153997535e-06, "logits/chosen": -0.5929265022277832, "logits/rejected": -0.5929265022277832, "logps/chosen": -96.02789306640625, "logps/rejected": -96.02789306640625, "loss": 0.3666, "rewards/accuracies": 0.0, "rewards/chosen": -3.7300846576690674, "rewards/margins": 0.0, "rewards/rejected": -3.7300846576690674, "step": 2899 }, { "epoch": 0.64, "learning_rate": 7.940666018153004e-06, "logits/chosen": -0.9726245403289795, "logits/rejected": -0.9231983423233032, "logps/chosen": -122.60453033447266, "logps/rejected": -210.39129638671875, "loss": 0.1479, "rewards/accuracies": 1.0, "rewards/chosen": -0.08906173706054688, "rewards/margins": 1.256060004234314, "rewards/rejected": -1.3451217412948608, "step": 2900 }, { "epoch": 0.64, "learning_rate": 7.939216258683997e-06, "logits/chosen": -1.015573501586914, "logits/rejected": -1.0322986841201782, "logps/chosen": -88.85682678222656, "logps/rejected": -94.83740234375, "loss": 0.9816, "rewards/accuracies": 1.0, "rewards/chosen": -0.09554672241210938, "rewards/margins": 6.476128578186035, "rewards/rejected": -6.5716753005981445, "step": 2901 }, { "epoch": 0.64, "learning_rate": 7.937766121532766e-06, "logits/chosen": -1.2092961072921753, "logits/rejected": -1.222716212272644, "logps/chosen": -96.09087371826172, "logps/rejected": -133.431640625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.695185124874115, "rewards/margins": 6.918938636779785, "rewards/rejected": -6.223753452301025, "step": 2902 }, { "epoch": 0.64, "learning_rate": 7.936315606885649e-06, "logits/chosen": -0.7149838805198669, "logits/rejected": -0.7573965191841125, "logps/chosen": -175.21209716796875, "logps/rejected": -69.23089599609375, "loss": 0.3269, "rewards/accuracies": 1.0, "rewards/chosen": -4.528637886047363, "rewards/margins": 0.14649534225463867, "rewards/rejected": -4.675133228302002, "step": 2903 }, { "epoch": 0.64, "learning_rate": 7.934864714929036e-06, "logits/chosen": -0.8600563406944275, "logits/rejected": -0.8345215916633606, "logps/chosen": -183.3208770751953, "logps/rejected": -171.61985778808594, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 1.2070831060409546, "rewards/margins": 6.905780792236328, "rewards/rejected": -5.698697566986084, "step": 2904 }, { "epoch": 0.64, "learning_rate": 7.933413445849361e-06, "logits/chosen": -0.9332708716392517, "logits/rejected": -0.8701993227005005, "logps/chosen": -119.1924819946289, "logps/rejected": -83.95339965820312, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": 0.5324363708496094, "rewards/margins": 4.624753475189209, "rewards/rejected": -4.0923171043396, "step": 2905 }, { "epoch": 0.64, "learning_rate": 7.931961799833112e-06, "logits/chosen": -0.6972605586051941, "logits/rejected": -0.6467715501785278, "logps/chosen": -145.81982421875, "logps/rejected": -171.57176208496094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.812530517578125, "rewards/margins": 8.825721740722656, "rewards/rejected": -8.013191223144531, "step": 2906 }, { "epoch": 0.64, "learning_rate": 7.930509777066819e-06, "logits/chosen": -0.942603349685669, "logits/rejected": -0.9450850486755371, "logps/chosen": -126.72419738769531, "logps/rejected": -205.14926147460938, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.217121884226799, "rewards/margins": 4.5736494064331055, "rewards/rejected": -4.790771484375, "step": 2907 }, { "epoch": 0.64, "learning_rate": 7.929057377737064e-06, "logits/chosen": -0.9680383205413818, "logits/rejected": -0.9550239443778992, "logps/chosen": -136.056884765625, "logps/rejected": -185.59828186035156, "loss": 0.4816, "rewards/accuracies": 0.0, "rewards/chosen": -0.281149297952652, "rewards/margins": -0.4818832278251648, "rewards/rejected": 0.200733944773674, "step": 2908 }, { "epoch": 0.64, "learning_rate": 7.92760460203048e-06, "logits/chosen": -0.8379306793212891, "logits/rejected": -0.7442957758903503, "logps/chosen": -157.13575744628906, "logps/rejected": -228.171142578125, "loss": 0.0816, "rewards/accuracies": 1.0, "rewards/chosen": 0.5853241086006165, "rewards/margins": 1.7306976318359375, "rewards/rejected": -1.1453735828399658, "step": 2909 }, { "epoch": 0.64, "learning_rate": 7.926151450133738e-06, "logits/chosen": -0.9465588927268982, "logits/rejected": -0.9465588927268982, "logps/chosen": -98.93720245361328, "logps/rejected": -98.93720245361328, "loss": 0.9743, "rewards/accuracies": 0.0, "rewards/chosen": -2.9161629676818848, "rewards/margins": 0.0, "rewards/rejected": -2.9161629676818848, "step": 2910 }, { "epoch": 0.64, "learning_rate": 7.924697922233571e-06, "logits/chosen": -0.9238054752349854, "logits/rejected": -0.899245023727417, "logps/chosen": -104.31900024414062, "logps/rejected": -158.56028747558594, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": 1.3753128051757812, "rewards/margins": 7.123105049133301, "rewards/rejected": -5.7477922439575195, "step": 2911 }, { "epoch": 0.64, "learning_rate": 7.923244018516751e-06, "logits/chosen": -1.0893152952194214, "logits/rejected": -1.0369160175323486, "logps/chosen": -96.98373413085938, "logps/rejected": -126.49517059326172, "loss": 0.0351, "rewards/accuracies": 1.0, "rewards/chosen": -0.3949119746685028, "rewards/margins": 3.5388550758361816, "rewards/rejected": -3.933767080307007, "step": 2912 }, { "epoch": 0.64, "learning_rate": 7.921789739170102e-06, "logits/chosen": -0.8781237006187439, "logits/rejected": -0.8614316582679749, "logps/chosen": -111.50789642333984, "logps/rejected": -96.84717559814453, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -1.8700141906738281, "rewards/margins": 3.5858888626098633, "rewards/rejected": -5.455903053283691, "step": 2913 }, { "epoch": 0.64, "learning_rate": 7.920335084380497e-06, "logits/chosen": -0.6677987575531006, "logits/rejected": -0.6634114980697632, "logps/chosen": -139.542236328125, "logps/rejected": -156.52012634277344, "loss": 0.0588, "rewards/accuracies": 1.0, "rewards/chosen": -1.9184516668319702, "rewards/margins": 5.072591304779053, "rewards/rejected": -6.9910430908203125, "step": 2914 }, { "epoch": 0.65, "learning_rate": 7.918880054334853e-06, "logits/chosen": -1.092031478881836, "logits/rejected": -1.1091928482055664, "logps/chosen": -106.60201263427734, "logps/rejected": -113.9541244506836, "loss": 0.0315, "rewards/accuracies": 1.0, "rewards/chosen": -0.10745620727539062, "rewards/margins": 4.860128879547119, "rewards/rejected": -4.96758508682251, "step": 2915 }, { "epoch": 0.65, "learning_rate": 7.91742464922014e-06, "logits/chosen": -1.0773639678955078, "logits/rejected": -1.1182360649108887, "logps/chosen": -114.38821411132812, "logps/rejected": -67.24639129638672, "loss": 0.5993, "rewards/accuracies": 0.0, "rewards/chosen": -0.5838829278945923, "rewards/margins": -0.837056040763855, "rewards/rejected": 0.2531730830669403, "step": 2916 }, { "epoch": 0.65, "learning_rate": 7.915968869223372e-06, "logits/chosen": -1.153185248374939, "logits/rejected": -1.2632503509521484, "logps/chosen": -186.50762939453125, "logps/rejected": -136.1083526611328, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 1.4207473993301392, "rewards/margins": 4.591184139251709, "rewards/rejected": -3.1704368591308594, "step": 2917 }, { "epoch": 0.65, "learning_rate": 7.914512714531612e-06, "logits/chosen": -0.93869549036026, "logits/rejected": -0.8787915706634521, "logps/chosen": -127.21192932128906, "logps/rejected": -156.4745635986328, "loss": 0.0499, "rewards/accuracies": 1.0, "rewards/chosen": -0.44541627168655396, "rewards/margins": 2.9148483276367188, "rewards/rejected": -3.360264539718628, "step": 2918 }, { "epoch": 0.65, "learning_rate": 7.913056185331978e-06, "logits/chosen": -1.047302484512329, "logits/rejected": -1.0531283617019653, "logps/chosen": -77.67045593261719, "logps/rejected": -155.96421813964844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.9453323483467102, "rewards/margins": 9.164023399353027, "rewards/rejected": -8.218690872192383, "step": 2919 }, { "epoch": 0.65, "learning_rate": 7.911599281811624e-06, "logits/chosen": -0.7051474452018738, "logits/rejected": -0.7125147581100464, "logps/chosen": -97.07249450683594, "logps/rejected": -122.83489990234375, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": 0.5611290335655212, "rewards/margins": 3.012871742248535, "rewards/rejected": -2.451742649078369, "step": 2920 }, { "epoch": 0.65, "learning_rate": 7.910142004157762e-06, "logits/chosen": -1.0834243297576904, "logits/rejected": -1.0427831411361694, "logps/chosen": -87.28775787353516, "logps/rejected": -83.54573059082031, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.6937736868858337, "rewards/margins": 4.908602237701416, "rewards/rejected": -5.6023759841918945, "step": 2921 }, { "epoch": 0.65, "learning_rate": 7.90868435255765e-06, "logits/chosen": -0.9844300150871277, "logits/rejected": -0.9584322571754456, "logps/chosen": -101.81965637207031, "logps/rejected": -150.80145263671875, "loss": 0.2224, "rewards/accuracies": 1.0, "rewards/chosen": 0.05778656154870987, "rewards/margins": 2.204524278640747, "rewards/rejected": -2.146737813949585, "step": 2922 }, { "epoch": 0.65, "learning_rate": 7.90722632719859e-06, "logits/chosen": -0.6613540649414062, "logits/rejected": -0.6421185731887817, "logps/chosen": -74.4242935180664, "logps/rejected": -61.67597961425781, "loss": 0.264, "rewards/accuracies": 1.0, "rewards/chosen": -1.6001938581466675, "rewards/margins": 0.43184053897857666, "rewards/rejected": -2.032034397125244, "step": 2923 }, { "epoch": 0.65, "learning_rate": 7.905767928267936e-06, "logits/chosen": -1.2737641334533691, "logits/rejected": -1.260090708732605, "logps/chosen": -124.477783203125, "logps/rejected": -128.32623291015625, "loss": 0.1717, "rewards/accuracies": 1.0, "rewards/chosen": -0.938593327999115, "rewards/margins": 1.929447889328003, "rewards/rejected": -2.8680412769317627, "step": 2924 }, { "epoch": 0.65, "learning_rate": 7.904309155953087e-06, "logits/chosen": -0.7874115109443665, "logits/rejected": -0.738440215587616, "logps/chosen": -63.20330810546875, "logps/rejected": -188.04034423828125, "loss": 0.1894, "rewards/accuracies": 1.0, "rewards/chosen": -0.32288703322410583, "rewards/margins": 1.1497920751571655, "rewards/rejected": -1.4726791381835938, "step": 2925 }, { "epoch": 0.65, "learning_rate": 7.902850010441494e-06, "logits/chosen": -0.9375854730606079, "logits/rejected": -0.9364219903945923, "logps/chosen": -115.34532928466797, "logps/rejected": -162.18112182617188, "loss": 0.1433, "rewards/accuracies": 1.0, "rewards/chosen": -0.7899002432823181, "rewards/margins": 1.5360443592071533, "rewards/rejected": -2.325944662094116, "step": 2926 }, { "epoch": 0.65, "learning_rate": 7.901390491920655e-06, "logits/chosen": -0.7490600347518921, "logits/rejected": -0.7695667743682861, "logps/chosen": -132.59881591796875, "logps/rejected": -125.97505950927734, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": -1.6009254455566406, "rewards/margins": 3.518629550933838, "rewards/rejected": -5.1195549964904785, "step": 2927 }, { "epoch": 0.65, "learning_rate": 7.899930600578112e-06, "logits/chosen": -0.9309468865394592, "logits/rejected": -0.9309468865394592, "logps/chosen": -100.144287109375, "logps/rejected": -100.144287109375, "loss": 0.392, "rewards/accuracies": 0.0, "rewards/chosen": -3.570986270904541, "rewards/margins": 0.0, "rewards/rejected": -3.570986270904541, "step": 2928 }, { "epoch": 0.65, "learning_rate": 7.898470336601456e-06, "logits/chosen": -0.9331273436546326, "logits/rejected": -0.910003125667572, "logps/chosen": -154.35244750976562, "logps/rejected": -286.06951904296875, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": 1.5147064924240112, "rewards/margins": 3.646109104156494, "rewards/rejected": -2.1314027309417725, "step": 2929 }, { "epoch": 0.65, "learning_rate": 7.897009700178331e-06, "logits/chosen": -0.6333540081977844, "logits/rejected": -0.5334282517433167, "logps/chosen": -152.19525146484375, "logps/rejected": -300.91888427734375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 2.3445587158203125, "rewards/margins": 5.455938816070557, "rewards/rejected": -3.111380100250244, "step": 2930 }, { "epoch": 0.65, "learning_rate": 7.895548691496421e-06, "logits/chosen": -0.8298865556716919, "logits/rejected": -0.8457852005958557, "logps/chosen": -129.7866973876953, "logps/rejected": -121.52098083496094, "loss": 0.8017, "rewards/accuracies": 0.0, "rewards/chosen": -3.229172468185425, "rewards/margins": -1.1781210899353027, "rewards/rejected": -2.051051378250122, "step": 2931 }, { "epoch": 0.65, "learning_rate": 7.894087310743468e-06, "logits/chosen": -0.9953309893608093, "logits/rejected": -0.9953309893608093, "logps/chosen": -275.32684326171875, "logps/rejected": -275.32684326171875, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -6.1383819580078125, "rewards/margins": 0.0, "rewards/rejected": -6.1383819580078125, "step": 2932 }, { "epoch": 0.65, "learning_rate": 7.892625558107252e-06, "logits/chosen": -0.972576916217804, "logits/rejected": -0.9732886552810669, "logps/chosen": -120.60552978515625, "logps/rejected": -105.28616333007812, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 1.7200714349746704, "rewards/margins": 6.947691440582275, "rewards/rejected": -5.2276201248168945, "step": 2933 }, { "epoch": 0.65, "learning_rate": 7.891163433775605e-06, "logits/chosen": -0.812194287776947, "logits/rejected": -0.4538540542125702, "logps/chosen": -109.14924621582031, "logps/rejected": -702.8106689453125, "loss": 0.0529, "rewards/accuracies": 1.0, "rewards/chosen": -0.6715561151504517, "rewards/margins": 56.20527648925781, "rewards/rejected": -56.8768310546875, "step": 2934 }, { "epoch": 0.65, "learning_rate": 7.889700937936408e-06, "logits/chosen": -0.8905785083770752, "logits/rejected": -0.9550687670707703, "logps/chosen": -159.28982543945312, "logps/rejected": -272.326416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2561706602573395, "rewards/margins": 9.542872428894043, "rewards/rejected": -9.286702156066895, "step": 2935 }, { "epoch": 0.65, "learning_rate": 7.888238070777586e-06, "logits/chosen": -1.0381320714950562, "logits/rejected": -0.9846899509429932, "logps/chosen": -113.13813018798828, "logps/rejected": -171.42926025390625, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.8579925298690796, "rewards/margins": 4.847170352935791, "rewards/rejected": -6.70516300201416, "step": 2936 }, { "epoch": 0.65, "learning_rate": 7.886774832487116e-06, "logits/chosen": -1.1298679113388062, "logits/rejected": -1.1833128929138184, "logps/chosen": -170.4923858642578, "logps/rejected": -121.83526611328125, "loss": 0.1054, "rewards/accuracies": 1.0, "rewards/chosen": 1.5971771478652954, "rewards/margins": 1.4503357410430908, "rewards/rejected": 0.14684143662452698, "step": 2937 }, { "epoch": 0.65, "learning_rate": 7.885311223253018e-06, "logits/chosen": -0.7810457944869995, "logits/rejected": -0.8058252930641174, "logps/chosen": -193.1373291015625, "logps/rejected": -77.54330444335938, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.404611200094223, "rewards/margins": 4.839418411254883, "rewards/rejected": -5.244029521942139, "step": 2938 }, { "epoch": 0.65, "learning_rate": 7.883847243263366e-06, "logits/chosen": -0.9873757362365723, "logits/rejected": -0.9954802989959717, "logps/chosen": -120.23846435546875, "logps/rejected": -152.81613159179688, "loss": 0.1231, "rewards/accuracies": 1.0, "rewards/chosen": -1.2507492303848267, "rewards/margins": 2.3390259742736816, "rewards/rejected": -3.5897750854492188, "step": 2939 }, { "epoch": 0.65, "learning_rate": 7.882382892706273e-06, "logits/chosen": -0.9248656034469604, "logits/rejected": -0.9339649081230164, "logps/chosen": -64.51438903808594, "logps/rejected": -69.0205078125, "loss": 0.0842, "rewards/accuracies": 1.0, "rewards/chosen": 0.13839569687843323, "rewards/margins": 1.8134151697158813, "rewards/rejected": -1.6750195026397705, "step": 2940 }, { "epoch": 0.65, "learning_rate": 7.88091817176991e-06, "logits/chosen": -0.9645204544067383, "logits/rejected": -0.8858131766319275, "logps/chosen": -97.65274047851562, "logps/rejected": -76.538330078125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": 0.8408569693565369, "rewards/margins": 4.796940803527832, "rewards/rejected": -3.9560840129852295, "step": 2941 }, { "epoch": 0.65, "learning_rate": 7.879453080642486e-06, "logits/chosen": -1.1853387355804443, "logits/rejected": -1.1853387355804443, "logps/chosen": -201.3306884765625, "logps/rejected": -201.3306884765625, "loss": 0.586, "rewards/accuracies": 0.0, "rewards/chosen": -8.858206748962402, "rewards/margins": 0.0, "rewards/rejected": -8.858206748962402, "step": 2942 }, { "epoch": 0.65, "learning_rate": 7.877987619512263e-06, "logits/chosen": -0.9806810021400452, "logits/rejected": -0.9130589962005615, "logps/chosen": -170.4120635986328, "logps/rejected": -366.87371826171875, "loss": 0.088, "rewards/accuracies": 1.0, "rewards/chosen": 0.41182708740234375, "rewards/margins": 9.388890266418457, "rewards/rejected": -8.977063179016113, "step": 2943 }, { "epoch": 0.65, "learning_rate": 7.87652178856755e-06, "logits/chosen": -1.029238224029541, "logits/rejected": -0.9478363990783691, "logps/chosen": -80.39625549316406, "logps/rejected": -192.22119140625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.36023637652397156, "rewards/margins": 5.5168633460998535, "rewards/rejected": -5.877099514007568, "step": 2944 }, { "epoch": 0.65, "learning_rate": 7.875055587996703e-06, "logits/chosen": -1.0363034009933472, "logits/rejected": -1.0363034009933472, "logps/chosen": -223.8944549560547, "logps/rejected": -223.8944549560547, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -3.389238119125366, "rewards/margins": 0.0, "rewards/rejected": -3.389238119125366, "step": 2945 }, { "epoch": 0.65, "learning_rate": 7.873589017988124e-06, "logits/chosen": -0.708816647529602, "logits/rejected": -0.6249126195907593, "logps/chosen": -203.15269470214844, "logps/rejected": -305.20623779296875, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.6229782104492188, "rewards/margins": 5.733777046203613, "rewards/rejected": -6.356755256652832, "step": 2946 }, { "epoch": 0.65, "learning_rate": 7.872122078730263e-06, "logits/chosen": -1.1260693073272705, "logits/rejected": -1.157447338104248, "logps/chosen": -168.44375610351562, "logps/rejected": -158.91879272460938, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": 0.13924255967140198, "rewards/margins": 3.655860185623169, "rewards/rejected": -3.5166175365448, "step": 2947 }, { "epoch": 0.65, "learning_rate": 7.87065477041162e-06, "logits/chosen": -0.8357592821121216, "logits/rejected": -0.8357592821121216, "logps/chosen": -96.19563293457031, "logps/rejected": -96.19563293457031, "loss": 0.3788, "rewards/accuracies": 0.0, "rewards/chosen": 0.4760490357875824, "rewards/margins": 0.0, "rewards/rejected": 0.4760490357875824, "step": 2948 }, { "epoch": 0.65, "learning_rate": 7.86918709322074e-06, "logits/chosen": -0.9468111991882324, "logits/rejected": -0.9524661302566528, "logps/chosen": -84.2300796508789, "logps/rejected": -104.13825988769531, "loss": 0.2794, "rewards/accuracies": 1.0, "rewards/chosen": -0.24271850287914276, "rewards/margins": 0.28956907987594604, "rewards/rejected": -0.53228759765625, "step": 2949 }, { "epoch": 0.65, "learning_rate": 7.867719047346216e-06, "logits/chosen": -0.735975980758667, "logits/rejected": -0.7425535321235657, "logps/chosen": -154.3551025390625, "logps/rejected": -249.69271850585938, "loss": 0.064, "rewards/accuracies": 1.0, "rewards/chosen": -2.599308729171753, "rewards/margins": 1.9959032535552979, "rewards/rejected": -4.595211982727051, "step": 2950 }, { "epoch": 0.65, "learning_rate": 7.86625063297669e-06, "logits/chosen": -1.3200303316116333, "logits/rejected": -0.7290237545967102, "logps/chosen": -138.491455078125, "logps/rejected": -765.3629150390625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -6.2973504066467285, "rewards/margins": 62.17751693725586, "rewards/rejected": -68.47486877441406, "step": 2951 }, { "epoch": 0.65, "learning_rate": 7.864781850300844e-06, "logits/chosen": -0.9603059887886047, "logits/rejected": -0.9480409026145935, "logps/chosen": -79.1717300415039, "logps/rejected": -161.515625, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": 1.2525871992111206, "rewards/margins": 5.0290374755859375, "rewards/rejected": -3.7764503955841064, "step": 2952 }, { "epoch": 0.65, "learning_rate": 7.863312699507419e-06, "logits/chosen": -1.112084150314331, "logits/rejected": -1.186076045036316, "logps/chosen": -195.44290161132812, "logps/rejected": -94.16242980957031, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 1.3074432611465454, "rewards/margins": 6.36181640625, "rewards/rejected": -5.054373264312744, "step": 2953 }, { "epoch": 0.65, "learning_rate": 7.861843180785196e-06, "logits/chosen": -0.6752545237541199, "logits/rejected": -0.6840105652809143, "logps/chosen": -79.94972229003906, "logps/rejected": -93.09866333007812, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -1.2159401178359985, "rewards/margins": 3.04219388961792, "rewards/rejected": -4.258133888244629, "step": 2954 }, { "epoch": 0.65, "learning_rate": 7.860373294323002e-06, "logits/chosen": -1.194972276687622, "logits/rejected": -1.2983659505844116, "logps/chosen": -184.71600341796875, "logps/rejected": -129.55084228515625, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": 0.35322266817092896, "rewards/margins": 3.1930618286132812, "rewards/rejected": -2.839839220046997, "step": 2955 }, { "epoch": 0.65, "learning_rate": 7.858903040309717e-06, "logits/chosen": -1.0976005792617798, "logits/rejected": -1.2040587663650513, "logps/chosen": -106.09049987792969, "logps/rejected": -65.6201171875, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -0.12039031833410263, "rewards/margins": 2.9112589359283447, "rewards/rejected": -3.031649351119995, "step": 2956 }, { "epoch": 0.65, "learning_rate": 7.857432418934264e-06, "logits/chosen": -0.8287686705589294, "logits/rejected": -0.8287686705589294, "logps/chosen": -111.56904602050781, "logps/rejected": -111.56904602050781, "loss": 1.5977, "rewards/accuracies": 0.0, "rewards/chosen": -1.1308716535568237, "rewards/margins": 0.0, "rewards/rejected": -1.1308716535568237, "step": 2957 }, { "epoch": 0.65, "learning_rate": 7.855961430385615e-06, "logits/chosen": -0.9290018677711487, "logits/rejected": -0.8223009705543518, "logps/chosen": -249.49838256835938, "logps/rejected": -284.1470642089844, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": -0.7273483276367188, "rewards/margins": 10.843111991882324, "rewards/rejected": -11.570460319519043, "step": 2958 }, { "epoch": 0.65, "learning_rate": 7.854490074852784e-06, "logits/chosen": -1.266119360923767, "logits/rejected": -1.2116414308547974, "logps/chosen": -67.58716583251953, "logps/rejected": -137.18560791015625, "loss": 0.0673, "rewards/accuracies": 1.0, "rewards/chosen": -1.3295719623565674, "rewards/margins": 2.0384578704833984, "rewards/rejected": -3.368029832839966, "step": 2959 }, { "epoch": 0.66, "learning_rate": 7.853018352524845e-06, "logits/chosen": -0.852533757686615, "logits/rejected": -0.88397216796875, "logps/chosen": -83.48167419433594, "logps/rejected": -94.39222717285156, "loss": 1.2721, "rewards/accuracies": 0.0, "rewards/chosen": 0.02857208251953125, "rewards/margins": -0.3741813600063324, "rewards/rejected": 0.40275344252586365, "step": 2960 }, { "epoch": 0.66, "learning_rate": 7.851546263590905e-06, "logits/chosen": -0.9421808123588562, "logits/rejected": -0.958799421787262, "logps/chosen": -199.19680786132812, "logps/rejected": -140.41041564941406, "loss": 0.9013, "rewards/accuracies": 0.0, "rewards/chosen": -2.854496717453003, "rewards/margins": -1.6222472190856934, "rewards/rejected": -1.2322494983673096, "step": 2961 }, { "epoch": 0.66, "learning_rate": 7.850073808240125e-06, "logits/chosen": -1.2395086288452148, "logits/rejected": -1.2135260105133057, "logps/chosen": -142.3644561767578, "logps/rejected": -133.21881103515625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.38861390948295593, "rewards/margins": 5.868836879730225, "rewards/rejected": -5.480223178863525, "step": 2962 }, { "epoch": 0.66, "learning_rate": 7.84860098666171e-06, "logits/chosen": -1.3105725049972534, "logits/rejected": -1.1907798051834106, "logps/chosen": -111.0152587890625, "logps/rejected": -250.79150390625, "loss": 1.7616, "rewards/accuracies": 0.0, "rewards/chosen": -4.367082118988037, "rewards/margins": -3.491807460784912, "rewards/rejected": -0.875274658203125, "step": 2963 }, { "epoch": 0.66, "learning_rate": 7.847127799044918e-06, "logits/chosen": -0.9456075429916382, "logits/rejected": -0.9537215232849121, "logps/chosen": -103.17231750488281, "logps/rejected": -112.98749542236328, "loss": 0.342, "rewards/accuracies": 1.0, "rewards/chosen": -1.7568718194961548, "rewards/margins": 0.039165496826171875, "rewards/rejected": -1.7960373163223267, "step": 2964 }, { "epoch": 0.66, "learning_rate": 7.845654245579047e-06, "logits/chosen": -0.7466536164283752, "logits/rejected": -0.7507669925689697, "logps/chosen": -88.72004699707031, "logps/rejected": -141.27928161621094, "loss": 0.0803, "rewards/accuracies": 1.0, "rewards/chosen": 0.5931968688964844, "rewards/margins": 1.838636040687561, "rewards/rejected": -1.2454391717910767, "step": 2965 }, { "epoch": 0.66, "learning_rate": 7.844180326453447e-06, "logits/chosen": -1.025433897972107, "logits/rejected": -1.0543133020401, "logps/chosen": -93.88371276855469, "logps/rejected": -84.75624084472656, "loss": 0.177, "rewards/accuracies": 1.0, "rewards/chosen": -0.6105003356933594, "rewards/margins": 1.0931084156036377, "rewards/rejected": -1.703608751296997, "step": 2966 }, { "epoch": 0.66, "learning_rate": 7.842706041857512e-06, "logits/chosen": -1.042176365852356, "logits/rejected": -1.0661081075668335, "logps/chosen": -79.4012451171875, "logps/rejected": -97.70549011230469, "loss": 0.1111, "rewards/accuracies": 1.0, "rewards/chosen": 0.4731636047363281, "rewards/margins": 1.4228432178497314, "rewards/rejected": -0.9496795535087585, "step": 2967 }, { "epoch": 0.66, "learning_rate": 7.841231391980687e-06, "logits/chosen": -1.2729891538619995, "logits/rejected": -1.2667813301086426, "logps/chosen": -176.0360870361328, "logps/rejected": -173.3681640625, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": 0.7802658081054688, "rewards/margins": 3.3444244861602783, "rewards/rejected": -2.5641586780548096, "step": 2968 }, { "epoch": 0.66, "learning_rate": 7.839756377012453e-06, "logits/chosen": -0.9629050493240356, "logits/rejected": -0.9312624335289001, "logps/chosen": -220.6927490234375, "logps/rejected": -272.8448486328125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 2.5611846446990967, "rewards/margins": 6.684652328491211, "rewards/rejected": -4.123467922210693, "step": 2969 }, { "epoch": 0.66, "learning_rate": 7.838280997142355e-06, "logits/chosen": -0.5569874048233032, "logits/rejected": -0.29708176851272583, "logps/chosen": -111.61955261230469, "logps/rejected": -468.0384521484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5984031558036804, "rewards/margins": 34.66988754272461, "rewards/rejected": -35.26829147338867, "step": 2970 }, { "epoch": 0.66, "learning_rate": 7.836805252559971e-06, "logits/chosen": -1.1859322786331177, "logits/rejected": -1.1717392206192017, "logps/chosen": -120.00849914550781, "logps/rejected": -114.70542907714844, "loss": 0.2072, "rewards/accuracies": 1.0, "rewards/chosen": -0.0073532103560864925, "rewards/margins": 0.6667953729629517, "rewards/rejected": -0.6741485595703125, "step": 2971 }, { "epoch": 0.66, "learning_rate": 7.83532914345493e-06, "logits/chosen": -1.097570776939392, "logits/rejected": -1.1004773378372192, "logps/chosen": -242.96209716796875, "logps/rejected": -185.84803771972656, "loss": 2.0763, "rewards/accuracies": 0.0, "rewards/chosen": -2.5999298095703125, "rewards/margins": -4.119981288909912, "rewards/rejected": 1.5200515985488892, "step": 2972 }, { "epoch": 0.66, "learning_rate": 7.833852670016912e-06, "logits/chosen": -0.8805880546569824, "logits/rejected": -0.8763954639434814, "logps/chosen": -130.69927978515625, "logps/rejected": -85.06598663330078, "loss": 0.2074, "rewards/accuracies": 1.0, "rewards/chosen": -2.226698398590088, "rewards/margins": 1.0210952758789062, "rewards/rejected": -3.247793674468994, "step": 2973 }, { "epoch": 0.66, "learning_rate": 7.832375832435637e-06, "logits/chosen": -0.8903453350067139, "logits/rejected": -0.885724663734436, "logps/chosen": -115.55448913574219, "logps/rejected": -90.84220886230469, "loss": 0.2265, "rewards/accuracies": 1.0, "rewards/chosen": -3.803488254547119, "rewards/margins": 0.5599689483642578, "rewards/rejected": -4.363457202911377, "step": 2974 }, { "epoch": 0.66, "learning_rate": 7.830898630900877e-06, "logits/chosen": -0.8741587400436401, "logits/rejected": -0.7271606922149658, "logps/chosen": -167.88658142089844, "logps/rejected": -286.6365966796875, "loss": 0.0689, "rewards/accuracies": 1.0, "rewards/chosen": 3.3185136318206787, "rewards/margins": 4.290196418762207, "rewards/rejected": -0.9716827273368835, "step": 2975 }, { "epoch": 0.66, "learning_rate": 7.829421065602448e-06, "logits/chosen": -1.0650781393051147, "logits/rejected": -1.09425950050354, "logps/chosen": -34.8397216796875, "logps/rejected": -6.462243556976318, "loss": 0.4185, "rewards/accuracies": 0.0, "rewards/chosen": -0.35971909761428833, "rewards/margins": -0.26810967922210693, "rewards/rejected": -0.09160943329334259, "step": 2976 }, { "epoch": 0.66, "learning_rate": 7.827943136730214e-06, "logits/chosen": -0.969437837600708, "logits/rejected": -0.9579087495803833, "logps/chosen": -93.97113037109375, "logps/rejected": -112.14823913574219, "loss": 0.0662, "rewards/accuracies": 1.0, "rewards/chosen": 0.5292328000068665, "rewards/margins": 1.9583511352539062, "rewards/rejected": -1.4291183948516846, "step": 2977 }, { "epoch": 0.66, "learning_rate": 7.826464844474086e-06, "logits/chosen": -1.0350863933563232, "logits/rejected": -0.9660605192184448, "logps/chosen": -108.95098876953125, "logps/rejected": -227.60226440429688, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": 0.8227638602256775, "rewards/margins": 4.826694488525391, "rewards/rejected": -4.003930568695068, "step": 2978 }, { "epoch": 0.66, "learning_rate": 7.82498618902402e-06, "logits/chosen": -0.5710976123809814, "logits/rejected": -0.6262860894203186, "logps/chosen": -274.8664245605469, "logps/rejected": -259.1602783203125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.289419561624527, "rewards/margins": 11.901139259338379, "rewards/rejected": -11.611720085144043, "step": 2979 }, { "epoch": 0.66, "learning_rate": 7.823507170570018e-06, "logits/chosen": -0.8695296049118042, "logits/rejected": -0.958182692527771, "logps/chosen": -199.9241943359375, "logps/rejected": -163.25970458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.516033887863159, "rewards/margins": 10.035923957824707, "rewards/rejected": -6.519889831542969, "step": 2980 }, { "epoch": 0.66, "learning_rate": 7.822027789302134e-06, "logits/chosen": -1.009819507598877, "logits/rejected": -0.9874334335327148, "logps/chosen": -250.7542724609375, "logps/rejected": -199.72203063964844, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -3.18499755859375, "rewards/margins": 4.440663814544678, "rewards/rejected": -7.625661373138428, "step": 2981 }, { "epoch": 0.66, "learning_rate": 7.820548045410462e-06, "logits/chosen": -0.8253246545791626, "logits/rejected": -0.7711575031280518, "logps/chosen": -77.230712890625, "logps/rejected": -99.39515686035156, "loss": 0.352, "rewards/accuracies": 1.0, "rewards/chosen": -1.345483422279358, "rewards/margins": 4.521213531494141, "rewards/rejected": -5.866696834564209, "step": 2982 }, { "epoch": 0.66, "learning_rate": 7.819067939085145e-06, "logits/chosen": -0.9962791204452515, "logits/rejected": -1.021376609802246, "logps/chosen": -86.60533905029297, "logps/rejected": -68.05167388916016, "loss": 0.0498, "rewards/accuracies": 1.0, "rewards/chosen": -0.3948463499546051, "rewards/margins": 2.3194801807403564, "rewards/rejected": -2.7143266201019287, "step": 2983 }, { "epoch": 0.66, "learning_rate": 7.817587470516378e-06, "logits/chosen": -0.7255080938339233, "logits/rejected": -0.7167130708694458, "logps/chosen": -82.64087677001953, "logps/rejected": -97.51085662841797, "loss": 0.3694, "rewards/accuracies": 0.0, "rewards/chosen": -2.17716383934021, "rewards/margins": -0.07819151878356934, "rewards/rejected": -2.0989723205566406, "step": 2984 }, { "epoch": 0.66, "learning_rate": 7.816106639894392e-06, "logits/chosen": -1.3966208696365356, "logits/rejected": -1.2440698146820068, "logps/chosen": -118.76754760742188, "logps/rejected": -388.9415283203125, "loss": 0.3245, "rewards/accuracies": 1.0, "rewards/chosen": -2.0648200511932373, "rewards/margins": 9.00600528717041, "rewards/rejected": -11.070825576782227, "step": 2985 }, { "epoch": 0.66, "learning_rate": 7.814625447409474e-06, "logits/chosen": -0.6941741108894348, "logits/rejected": -0.7156314849853516, "logps/chosen": -175.62142944335938, "logps/rejected": -87.4211196899414, "loss": 0.0984, "rewards/accuracies": 1.0, "rewards/chosen": 0.03894348070025444, "rewards/margins": 1.8252984285354614, "rewards/rejected": -1.786354899406433, "step": 2986 }, { "epoch": 0.66, "learning_rate": 7.813143893251951e-06, "logits/chosen": -0.8957527875900269, "logits/rejected": -0.8976951241493225, "logps/chosen": -117.80947875976562, "logps/rejected": -129.01138305664062, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.6328933835029602, "rewards/margins": 5.154146671295166, "rewards/rejected": -5.7870402336120605, "step": 2987 }, { "epoch": 0.66, "learning_rate": 7.811661977612202e-06, "logits/chosen": -0.8361455202102661, "logits/rejected": -0.7479121088981628, "logps/chosen": -103.84805297851562, "logps/rejected": -162.85447692871094, "loss": 0.2249, "rewards/accuracies": 1.0, "rewards/chosen": 2.4438889026641846, "rewards/margins": 8.713345527648926, "rewards/rejected": -6.269456386566162, "step": 2988 }, { "epoch": 0.66, "learning_rate": 7.810179700680646e-06, "logits/chosen": -0.9210368394851685, "logits/rejected": -0.7457312345504761, "logps/chosen": -146.37437438964844, "logps/rejected": -320.23004150390625, "loss": 0.2263, "rewards/accuracies": 1.0, "rewards/chosen": -3.0347466468811035, "rewards/margins": 0.558783769607544, "rewards/rejected": -3.5935304164886475, "step": 2989 }, { "epoch": 0.66, "learning_rate": 7.808697062647755e-06, "logits/chosen": -1.213317632675171, "logits/rejected": -1.1478914022445679, "logps/chosen": -101.30616760253906, "logps/rejected": -168.33482360839844, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -0.5700012445449829, "rewards/margins": 3.058122158050537, "rewards/rejected": -3.6281235218048096, "step": 2990 }, { "epoch": 0.66, "learning_rate": 7.807214063704042e-06, "logits/chosen": -1.1848642826080322, "logits/rejected": -1.0785735845565796, "logps/chosen": -157.74478149414062, "logps/rejected": -258.24981689453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.046838402748108, "rewards/margins": 8.043448448181152, "rewards/rejected": -6.996609687805176, "step": 2991 }, { "epoch": 0.66, "learning_rate": 7.805730704040072e-06, "logits/chosen": -1.1006629467010498, "logits/rejected": -1.068422794342041, "logps/chosen": -97.79908752441406, "logps/rejected": -133.66868591308594, "loss": 0.1599, "rewards/accuracies": 1.0, "rewards/chosen": 0.34747314453125, "rewards/margins": 1.8781951665878296, "rewards/rejected": -1.5307220220565796, "step": 2992 }, { "epoch": 0.66, "learning_rate": 7.804246983846449e-06, "logits/chosen": -1.2536466121673584, "logits/rejected": -1.248368740081787, "logps/chosen": -63.71661376953125, "logps/rejected": -72.20556640625, "loss": 0.2709, "rewards/accuracies": 1.0, "rewards/chosen": 0.013675308786332607, "rewards/margins": 0.3298099637031555, "rewards/rejected": -0.31613466143608093, "step": 2993 }, { "epoch": 0.66, "learning_rate": 7.802762903313831e-06, "logits/chosen": -0.9255411624908447, "logits/rejected": -0.9255411624908447, "logps/chosen": -84.79449462890625, "logps/rejected": -84.79449462890625, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -0.09335632622241974, "rewards/margins": 0.0, "rewards/rejected": -0.09335632622241974, "step": 2994 }, { "epoch": 0.66, "learning_rate": 7.80127846263292e-06, "logits/chosen": -1.2503377199172974, "logits/rejected": -1.1693058013916016, "logps/chosen": -111.46192169189453, "logps/rejected": -184.27676391601562, "loss": 0.5489, "rewards/accuracies": 0.0, "rewards/chosen": -2.5413429737091064, "rewards/margins": -0.6785576343536377, "rewards/rejected": -1.8627853393554688, "step": 2995 }, { "epoch": 0.66, "learning_rate": 7.799793661994457e-06, "logits/chosen": -1.2455250024795532, "logits/rejected": -1.9894025325775146, "logps/chosen": -100.03944396972656, "logps/rejected": -129.4605712890625, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.9560836553573608, "rewards/margins": 4.902604103088379, "rewards/rejected": -6.858687877655029, "step": 2996 }, { "epoch": 0.66, "learning_rate": 7.79830850158924e-06, "logits/chosen": -1.1045830249786377, "logits/rejected": -0.9831957221031189, "logps/chosen": -167.01390075683594, "logps/rejected": -361.1158752441406, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": 3.979374647140503, "rewards/margins": 6.900382995605469, "rewards/rejected": -2.921008348464966, "step": 2997 }, { "epoch": 0.66, "learning_rate": 7.796822981608109e-06, "logits/chosen": -1.1311525106430054, "logits/rejected": -1.0808693170547485, "logps/chosen": -138.29672241210938, "logps/rejected": -267.7449951171875, "loss": 0.9512, "rewards/accuracies": 0.0, "rewards/chosen": -0.511340320110321, "rewards/margins": -1.7146849632263184, "rewards/rejected": 1.203344702720642, "step": 2998 }, { "epoch": 0.66, "learning_rate": 7.795337102241948e-06, "logits/chosen": -0.7586482167243958, "logits/rejected": -0.7532597184181213, "logps/chosen": -233.85989379882812, "logps/rejected": -242.15379333496094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 1.202412486076355, "rewards/margins": 9.035067558288574, "rewards/rejected": -7.832655429840088, "step": 2999 }, { "epoch": 0.66, "learning_rate": 7.793850863681688e-06, "logits/chosen": -1.069178819656372, "logits/rejected": -1.0701059103012085, "logps/chosen": -124.46321105957031, "logps/rejected": -131.26315307617188, "loss": 0.0509, "rewards/accuracies": 1.0, "rewards/chosen": 1.2622909545898438, "rewards/margins": 2.832871913909912, "rewards/rejected": -1.570581078529358, "step": 3000 }, { "epoch": 0.66, "learning_rate": 3.6764705882352945e-08, "logits/chosen": -1.0707221031188965, "logits/rejected": -1.1441798210144043, "logps/chosen": -162.72601318359375, "logps/rejected": -103.6856460571289, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": 1.53399658203125, "rewards/margins": 3.515610694885254, "rewards/rejected": -1.9816139936447144, "step": 3001 }, { "epoch": 0.66, "learning_rate": 7.352941176470589e-08, "logits/chosen": -1.1431434154510498, "logits/rejected": -1.1005040407180786, "logps/chosen": -134.50418090820312, "logps/rejected": -169.67568969726562, "loss": 0.1924, "rewards/accuracies": 1.0, "rewards/chosen": -1.087104082107544, "rewards/margins": 0.7591636180877686, "rewards/rejected": -1.8462677001953125, "step": 3002 }, { "epoch": 0.66, "learning_rate": 1.1029411764705884e-07, "logits/chosen": -0.9968104958534241, "logits/rejected": -0.956206738948822, "logps/chosen": -98.6890869140625, "logps/rejected": -177.18682861328125, "loss": 0.1023, "rewards/accuracies": 1.0, "rewards/chosen": -0.3674049377441406, "rewards/margins": 1.727781057357788, "rewards/rejected": -2.0951859951019287, "step": 3003 }, { "epoch": 0.66, "learning_rate": 1.4705882352941178e-07, "logits/chosen": -0.9695714116096497, "logits/rejected": -0.9246718287467957, "logps/chosen": -68.11172485351562, "logps/rejected": -112.13072967529297, "loss": 0.0614, "rewards/accuracies": 1.0, "rewards/chosen": 0.8478248715400696, "rewards/margins": 4.597740173339844, "rewards/rejected": -3.749915361404419, "step": 3004 }, { "epoch": 0.67, "learning_rate": 1.8382352941176472e-07, "logits/chosen": -0.8828922510147095, "logits/rejected": -0.8731976747512817, "logps/chosen": -90.29997253417969, "logps/rejected": -140.96762084960938, "loss": 0.0678, "rewards/accuracies": 1.0, "rewards/chosen": 0.08637543022632599, "rewards/margins": 3.275742530822754, "rewards/rejected": -3.1893670558929443, "step": 3005 }, { "epoch": 0.67, "learning_rate": 2.2058823529411768e-07, "logits/chosen": -1.2752569913864136, "logits/rejected": -1.354980230331421, "logps/chosen": -191.74871826171875, "logps/rejected": -166.30711364746094, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": -0.453591912984848, "rewards/margins": 2.8087494373321533, "rewards/rejected": -3.262341260910034, "step": 3006 }, { "epoch": 0.67, "learning_rate": 2.573529411764706e-07, "logits/chosen": -1.0091960430145264, "logits/rejected": -0.899273157119751, "logps/chosen": -198.47500610351562, "logps/rejected": -309.51025390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 2.417924642562866, "rewards/margins": 13.390242576599121, "rewards/rejected": -10.972317695617676, "step": 3007 }, { "epoch": 0.67, "learning_rate": 2.9411764705882356e-07, "logits/chosen": -1.0382301807403564, "logits/rejected": -1.0746335983276367, "logps/chosen": -176.8651885986328, "logps/rejected": -198.92965698242188, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": 0.32630157470703125, "rewards/margins": 6.407452583312988, "rewards/rejected": -6.081151008605957, "step": 3008 }, { "epoch": 0.67, "learning_rate": 3.308823529411765e-07, "logits/chosen": -1.076021671295166, "logits/rejected": -1.0140327215194702, "logps/chosen": -139.97598266601562, "logps/rejected": -153.03976440429688, "loss": 0.066, "rewards/accuracies": 1.0, "rewards/chosen": -0.65789794921875, "rewards/margins": 1.9580848217010498, "rewards/rejected": -2.6159827709198, "step": 3009 }, { "epoch": 0.67, "learning_rate": 3.6764705882352943e-07, "logits/chosen": -1.0570651292800903, "logits/rejected": -1.0979044437408447, "logps/chosen": -93.6292724609375, "logps/rejected": -199.85415649414062, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.522137463092804, "rewards/margins": 4.802046298980713, "rewards/rejected": -5.324183940887451, "step": 3010 }, { "epoch": 0.67, "learning_rate": 4.044117647058824e-07, "logits/chosen": -1.0064926147460938, "logits/rejected": -0.9991889595985413, "logps/chosen": -126.39137268066406, "logps/rejected": -142.32431030273438, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 0.9842087030410767, "rewards/margins": 4.049232006072998, "rewards/rejected": -3.065023183822632, "step": 3011 }, { "epoch": 0.67, "learning_rate": 4.4117647058823536e-07, "logits/chosen": -0.9974825978279114, "logits/rejected": -0.9974825978279114, "logps/chosen": -163.0702362060547, "logps/rejected": -163.0702362060547, "loss": 0.3538, "rewards/accuracies": 0.0, "rewards/chosen": -6.97726583480835, "rewards/margins": 0.0, "rewards/rejected": -6.97726583480835, "step": 3012 }, { "epoch": 0.67, "learning_rate": 4.779411764705882e-07, "logits/chosen": -1.0115952491760254, "logits/rejected": -1.0115952491760254, "logps/chosen": -94.39198303222656, "logps/rejected": -94.39198303222656, "loss": 0.3628, "rewards/accuracies": 0.0, "rewards/chosen": -6.494323253631592, "rewards/margins": 0.0, "rewards/rejected": -6.494323253631592, "step": 3013 }, { "epoch": 0.67, "learning_rate": 5.147058823529412e-07, "logits/chosen": -0.8632073998451233, "logits/rejected": -0.8632073998451233, "logps/chosen": -118.87225341796875, "logps/rejected": -118.87225341796875, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": 0.147929385304451, "rewards/margins": 0.0, "rewards/rejected": 0.147929385304451, "step": 3014 }, { "epoch": 0.67, "learning_rate": 5.514705882352942e-07, "logits/chosen": -1.1515710353851318, "logits/rejected": -1.1529529094696045, "logps/chosen": -148.7397918701172, "logps/rejected": -165.86383056640625, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": -0.0072006224654614925, "rewards/margins": 5.5302581787109375, "rewards/rejected": -5.537458896636963, "step": 3015 }, { "epoch": 0.67, "learning_rate": 5.882352941176471e-07, "logits/chosen": -1.1757932901382446, "logits/rejected": -1.2984575033187866, "logps/chosen": -213.46533203125, "logps/rejected": -178.7740478515625, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": 4.195813179016113, "rewards/margins": 3.1719605922698975, "rewards/rejected": 1.0238525867462158, "step": 3016 }, { "epoch": 0.67, "learning_rate": 6.25e-07, "logits/chosen": -0.7366555333137512, "logits/rejected": -0.7147725820541382, "logps/chosen": -91.18263244628906, "logps/rejected": -177.9185333251953, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -1.1686416864395142, "rewards/margins": 4.1071929931640625, "rewards/rejected": -5.275834560394287, "step": 3017 }, { "epoch": 0.67, "learning_rate": 6.61764705882353e-07, "logits/chosen": -1.4023324251174927, "logits/rejected": -1.3660964965820312, "logps/chosen": -111.7921142578125, "logps/rejected": -215.13983154296875, "loss": 0.2146, "rewards/accuracies": 1.0, "rewards/chosen": 0.0021469115745276213, "rewards/margins": 3.8837051391601562, "rewards/rejected": -3.8815581798553467, "step": 3018 }, { "epoch": 0.67, "learning_rate": 6.985294117647059e-07, "logits/chosen": -0.8344406485557556, "logits/rejected": -0.9466139674186707, "logps/chosen": -145.2516632080078, "logps/rejected": -168.35153198242188, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": 0.050872802734375, "rewards/margins": 3.1811187267303467, "rewards/rejected": -3.1302459239959717, "step": 3019 }, { "epoch": 0.67, "learning_rate": 7.352941176470589e-07, "logits/chosen": -0.5738524794578552, "logits/rejected": -0.5291654467582703, "logps/chosen": -200.64682006835938, "logps/rejected": -277.77557373046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.0406402349472046, "rewards/margins": 10.60693645477295, "rewards/rejected": -9.566296577453613, "step": 3020 }, { "epoch": 0.67, "learning_rate": 7.720588235294119e-07, "logits/chosen": -1.128314733505249, "logits/rejected": -1.2314426898956299, "logps/chosen": -152.93650817871094, "logps/rejected": -151.56137084960938, "loss": 0.1451, "rewards/accuracies": 1.0, "rewards/chosen": 1.34532630443573, "rewards/margins": 6.656824588775635, "rewards/rejected": -5.311498165130615, "step": 3021 }, { "epoch": 0.67, "learning_rate": 8.088235294117648e-07, "logits/chosen": -0.7298019528388977, "logits/rejected": -0.7004086971282959, "logps/chosen": -283.71075439453125, "logps/rejected": -283.4055480957031, "loss": 0.2342, "rewards/accuracies": 1.0, "rewards/chosen": -3.7788009643554688, "rewards/margins": 0.5153579711914062, "rewards/rejected": -4.294158935546875, "step": 3022 }, { "epoch": 0.67, "learning_rate": 8.455882352941178e-07, "logits/chosen": -0.7799885272979736, "logits/rejected": -0.7538939118385315, "logps/chosen": -97.30770874023438, "logps/rejected": -149.04598999023438, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -1.3286598920822144, "rewards/margins": 3.928597927093506, "rewards/rejected": -5.25725793838501, "step": 3023 }, { "epoch": 0.67, "learning_rate": 8.823529411764707e-07, "logits/chosen": -0.8162825107574463, "logits/rejected": -0.863507091999054, "logps/chosen": -198.1175994873047, "logps/rejected": -182.62167358398438, "loss": 0.2736, "rewards/accuracies": 1.0, "rewards/chosen": -1.158528208732605, "rewards/margins": 4.5858941078186035, "rewards/rejected": -5.744422435760498, "step": 3024 }, { "epoch": 0.67, "learning_rate": 9.191176470588237e-07, "logits/chosen": -1.0435808897018433, "logits/rejected": -1.0448424816131592, "logps/chosen": -133.67503356933594, "logps/rejected": -135.08694458007812, "loss": 0.0937, "rewards/accuracies": 1.0, "rewards/chosen": -4.227504253387451, "rewards/margins": 1.5897488594055176, "rewards/rejected": -5.817253112792969, "step": 3025 }, { "epoch": 0.67, "learning_rate": 9.558823529411764e-07, "logits/chosen": -1.001344919204712, "logits/rejected": -1.02791166305542, "logps/chosen": -117.52498626708984, "logps/rejected": -94.98128509521484, "loss": 0.1343, "rewards/accuracies": 1.0, "rewards/chosen": -3.102940320968628, "rewards/margins": 1.1893794536590576, "rewards/rejected": -4.2923197746276855, "step": 3026 }, { "epoch": 0.67, "learning_rate": 9.926470588235295e-07, "logits/chosen": -0.7479828596115112, "logits/rejected": -0.6995792984962463, "logps/chosen": -101.112548828125, "logps/rejected": -98.59054565429688, "loss": 0.3046, "rewards/accuracies": 1.0, "rewards/chosen": -0.6122154593467712, "rewards/margins": 2.6853179931640625, "rewards/rejected": -3.2975335121154785, "step": 3027 }, { "epoch": 0.67, "learning_rate": 1.0294117647058825e-06, "logits/chosen": -0.9684284329414368, "logits/rejected": -0.9601582288742065, "logps/chosen": -92.36820983886719, "logps/rejected": -184.9632568359375, "loss": 0.2333, "rewards/accuracies": 1.0, "rewards/chosen": -3.1978611946105957, "rewards/margins": 0.550950288772583, "rewards/rejected": -3.7488114833831787, "step": 3028 }, { "epoch": 0.67, "learning_rate": 1.0661764705882354e-06, "logits/chosen": -0.920584499835968, "logits/rejected": -0.9396792650222778, "logps/chosen": -113.76226806640625, "logps/rejected": -122.51307678222656, "loss": 0.0935, "rewards/accuracies": 1.0, "rewards/chosen": -1.319982886314392, "rewards/margins": 2.3138937950134277, "rewards/rejected": -3.6338768005371094, "step": 3029 }, { "epoch": 0.67, "learning_rate": 1.1029411764705884e-06, "logits/chosen": -0.7123556137084961, "logits/rejected": -0.7978853583335876, "logps/chosen": -136.3206787109375, "logps/rejected": -41.330413818359375, "loss": 1.155, "rewards/accuracies": 0.0, "rewards/chosen": -4.509761333465576, "rewards/margins": -2.1966023445129395, "rewards/rejected": -2.3131589889526367, "step": 3030 }, { "epoch": 0.67, "learning_rate": 1.1397058823529413e-06, "logits/chosen": -0.9806317090988159, "logits/rejected": -0.9434365034103394, "logps/chosen": -105.17269897460938, "logps/rejected": -240.99224853515625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.9936996698379517, "rewards/margins": 6.041539669036865, "rewards/rejected": -8.035239219665527, "step": 3031 }, { "epoch": 0.67, "learning_rate": 1.1764705882352942e-06, "logits/chosen": -1.0180985927581787, "logits/rejected": -1.1289722919464111, "logps/chosen": -218.2801513671875, "logps/rejected": -170.3765869140625, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": 0.6861282587051392, "rewards/margins": 6.357093811035156, "rewards/rejected": -5.670965671539307, "step": 3032 }, { "epoch": 0.67, "learning_rate": 1.2132352941176472e-06, "logits/chosen": -1.0003588199615479, "logits/rejected": -1.0476020574569702, "logps/chosen": -130.28970336914062, "logps/rejected": -77.51033782958984, "loss": 0.9472, "rewards/accuracies": 0.0, "rewards/chosen": -5.660330295562744, "rewards/margins": -1.7311508655548096, "rewards/rejected": -3.9291794300079346, "step": 3033 }, { "epoch": 0.67, "learning_rate": 1.25e-06, "logits/chosen": -0.6845185160636902, "logits/rejected": -0.682912290096283, "logps/chosen": -72.32406616210938, "logps/rejected": -49.94257354736328, "loss": 0.1855, "rewards/accuracies": 1.0, "rewards/chosen": -2.3215548992156982, "rewards/margins": 1.013486623764038, "rewards/rejected": -3.3350415229797363, "step": 3034 }, { "epoch": 0.67, "learning_rate": 1.2867647058823528e-06, "logits/chosen": -0.7030870914459229, "logits/rejected": -0.7137606143951416, "logps/chosen": -160.59951782226562, "logps/rejected": -68.52095031738281, "loss": 0.7142, "rewards/accuracies": 0.0, "rewards/chosen": -4.390838146209717, "rewards/margins": -0.9967629909515381, "rewards/rejected": -3.3940751552581787, "step": 3035 }, { "epoch": 0.67, "learning_rate": 1.323529411764706e-06, "logits/chosen": -1.0690851211547852, "logits/rejected": -1.069177508354187, "logps/chosen": -141.25096130371094, "logps/rejected": -189.92822265625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -2.6721787452697754, "rewards/margins": 4.152853965759277, "rewards/rejected": -6.825032711029053, "step": 3036 }, { "epoch": 0.67, "learning_rate": 1.360294117647059e-06, "logits/chosen": -0.7538594007492065, "logits/rejected": -0.6749128699302673, "logps/chosen": -164.22830200195312, "logps/rejected": -144.93438720703125, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -2.701342821121216, "rewards/margins": 3.5812971591949463, "rewards/rejected": -6.282639980316162, "step": 3037 }, { "epoch": 0.67, "learning_rate": 1.3970588235294119e-06, "logits/chosen": -1.0079553127288818, "logits/rejected": -0.5136085152626038, "logps/chosen": -279.3403625488281, "logps/rejected": -546.7080078125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -13.304132461547852, "rewards/margins": 19.989076614379883, "rewards/rejected": -33.293209075927734, "step": 3038 }, { "epoch": 0.67, "learning_rate": 1.4338235294117648e-06, "logits/chosen": -0.9666189551353455, "logits/rejected": -0.699009120464325, "logps/chosen": -65.84930419921875, "logps/rejected": -425.8812255859375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.5651108026504517, "rewards/margins": 34.61235427856445, "rewards/rejected": -35.17746353149414, "step": 3039 }, { "epoch": 0.67, "learning_rate": 1.4705882352941177e-06, "logits/chosen": -0.9657694697380066, "logits/rejected": -1.0300474166870117, "logps/chosen": -181.37997436523438, "logps/rejected": -142.3797607421875, "loss": 0.1484, "rewards/accuracies": 1.0, "rewards/chosen": 0.727935791015625, "rewards/margins": 1.063165307044983, "rewards/rejected": -0.3352294862270355, "step": 3040 }, { "epoch": 0.67, "learning_rate": 1.5073529411764707e-06, "logits/chosen": -0.8001271486282349, "logits/rejected": -0.8539530038833618, "logps/chosen": -112.00312805175781, "logps/rejected": -56.1760139465332, "loss": 0.815, "rewards/accuracies": 1.0, "rewards/chosen": -2.1365089416503906, "rewards/margins": 1.1768276691436768, "rewards/rejected": -3.3133366107940674, "step": 3041 }, { "epoch": 0.67, "learning_rate": 1.5441176470588238e-06, "logits/chosen": -0.9647305011749268, "logits/rejected": -0.9897202253341675, "logps/chosen": -141.29269409179688, "logps/rejected": -129.2770233154297, "loss": 0.2214, "rewards/accuracies": 1.0, "rewards/chosen": -0.5912933349609375, "rewards/margins": 0.5853233337402344, "rewards/rejected": -1.1766166687011719, "step": 3042 }, { "epoch": 0.67, "learning_rate": 1.5808823529411765e-06, "logits/chosen": -0.8743122220039368, "logits/rejected": -0.9169484376907349, "logps/chosen": -152.45101928710938, "logps/rejected": -147.66433715820312, "loss": 0.354, "rewards/accuracies": 1.0, "rewards/chosen": -2.0539748668670654, "rewards/margins": 2.9431488513946533, "rewards/rejected": -4.997123718261719, "step": 3043 }, { "epoch": 0.67, "learning_rate": 1.6176470588235297e-06, "logits/chosen": -1.3592289686203003, "logits/rejected": -1.378232717514038, "logps/chosen": -79.8260269165039, "logps/rejected": -79.4798355102539, "loss": 0.0982, "rewards/accuracies": 1.0, "rewards/chosen": -1.4455665349960327, "rewards/margins": 2.343137264251709, "rewards/rejected": -3.7887039184570312, "step": 3044 }, { "epoch": 0.67, "learning_rate": 1.6544117647058824e-06, "logits/chosen": -1.0248749256134033, "logits/rejected": -0.378309041261673, "logps/chosen": -216.57156372070312, "logps/rejected": -479.16796875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.47400209307670593, "rewards/margins": 34.284393310546875, "rewards/rejected": -33.81039047241211, "step": 3045 }, { "epoch": 0.67, "learning_rate": 1.6911764705882356e-06, "logits/chosen": -1.091285228729248, "logits/rejected": -0.9964322447776794, "logps/chosen": -87.4781265258789, "logps/rejected": -228.25494384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.4720810055732727, "rewards/margins": 10.872272491455078, "rewards/rejected": -10.400191307067871, "step": 3046 }, { "epoch": 0.67, "learning_rate": 1.7279411764705883e-06, "logits/chosen": -1.0442427396774292, "logits/rejected": -0.978441059589386, "logps/chosen": -107.36863708496094, "logps/rejected": -199.670166015625, "loss": 0.3954, "rewards/accuracies": 0.0, "rewards/chosen": -0.42154693603515625, "rewards/margins": -0.18670348823070526, "rewards/rejected": -0.234843447804451, "step": 3047 }, { "epoch": 0.67, "learning_rate": 1.7647058823529414e-06, "logits/chosen": -0.8464856147766113, "logits/rejected": -0.7968419790267944, "logps/chosen": -82.07328796386719, "logps/rejected": -195.00167846679688, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.5703323483467102, "rewards/margins": 5.369300842285156, "rewards/rejected": -5.939633369445801, "step": 3048 }, { "epoch": 0.67, "learning_rate": 1.8014705882352942e-06, "logits/chosen": -0.8056222200393677, "logits/rejected": -0.8249988555908203, "logps/chosen": -206.15090942382812, "logps/rejected": -229.67828369140625, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -3.3564438819885254, "rewards/margins": 6.1109089851379395, "rewards/rejected": -9.467352867126465, "step": 3049 }, { "epoch": 0.68, "learning_rate": 1.8382352941176473e-06, "logits/chosen": -0.7170007228851318, "logits/rejected": -0.6879087090492249, "logps/chosen": -206.12945556640625, "logps/rejected": -145.90025329589844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.402416944503784, "rewards/margins": 10.254384994506836, "rewards/rejected": -7.851968288421631, "step": 3050 }, { "epoch": 0.68, "learning_rate": 1.8750000000000003e-06, "logits/chosen": -1.0278631448745728, "logits/rejected": -1.0278631448745728, "logps/chosen": -223.56887817382812, "logps/rejected": -223.56887817382812, "loss": 0.434, "rewards/accuracies": 0.0, "rewards/chosen": -8.479924201965332, "rewards/margins": 0.0, "rewards/rejected": -8.479924201965332, "step": 3051 }, { "epoch": 0.68, "learning_rate": 1.9117647058823528e-06, "logits/chosen": -0.8593024015426636, "logits/rejected": -0.8559763431549072, "logps/chosen": -136.55105590820312, "logps/rejected": -157.29537963867188, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": 1.1053603887557983, "rewards/margins": 3.609030246734619, "rewards/rejected": -2.5036697387695312, "step": 3052 }, { "epoch": 0.68, "learning_rate": 1.948529411764706e-06, "logits/chosen": -1.1758487224578857, "logits/rejected": -1.1886954307556152, "logps/chosen": -69.50664520263672, "logps/rejected": -84.75679016113281, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.050823211669921875, "rewards/margins": 5.304114818572998, "rewards/rejected": -5.35493803024292, "step": 3053 }, { "epoch": 0.68, "learning_rate": 1.985294117647059e-06, "logits/chosen": -0.8646470308303833, "logits/rejected": -0.8844856023788452, "logps/chosen": -134.6817169189453, "logps/rejected": -117.44033813476562, "loss": 1.7679, "rewards/accuracies": 0.0, "rewards/chosen": -5.27416467666626, "rewards/margins": -3.4902000427246094, "rewards/rejected": -1.7839645147323608, "step": 3054 }, { "epoch": 0.68, "learning_rate": 2.022058823529412e-06, "logits/chosen": -1.1154437065124512, "logits/rejected": -1.115813136100769, "logps/chosen": -165.761474609375, "logps/rejected": -195.90170288085938, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": 2.329693555831909, "rewards/margins": 8.199722290039062, "rewards/rejected": -5.870028972625732, "step": 3055 }, { "epoch": 0.68, "learning_rate": 2.058823529411765e-06, "logits/chosen": -1.0566223859786987, "logits/rejected": -1.0622731447219849, "logps/chosen": -103.99559020996094, "logps/rejected": -138.2195587158203, "loss": 0.1233, "rewards/accuracies": 1.0, "rewards/chosen": -1.8768821954727173, "rewards/margins": 1.3235169649124146, "rewards/rejected": -3.200399160385132, "step": 3056 }, { "epoch": 0.68, "learning_rate": 2.095588235294118e-06, "logits/chosen": -1.134116768836975, "logits/rejected": -1.0927475690841675, "logps/chosen": -66.5718002319336, "logps/rejected": -106.47792053222656, "loss": 0.3952, "rewards/accuracies": 1.0, "rewards/chosen": 0.48757249116897583, "rewards/margins": 2.2819573879241943, "rewards/rejected": -1.7943848371505737, "step": 3057 }, { "epoch": 0.68, "learning_rate": 2.132352941176471e-06, "logits/chosen": -1.140289068222046, "logits/rejected": -1.1141979694366455, "logps/chosen": -118.08650207519531, "logps/rejected": -133.55267333984375, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -2.294454336166382, "rewards/margins": 3.096651315689087, "rewards/rejected": -5.391105651855469, "step": 3058 }, { "epoch": 0.68, "learning_rate": 2.1691176470588238e-06, "logits/chosen": -1.043574571609497, "logits/rejected": -1.0086164474487305, "logps/chosen": -169.8040771484375, "logps/rejected": -179.8489990234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.6253632307052612, "rewards/margins": 9.319704055786133, "rewards/rejected": -7.694340705871582, "step": 3059 }, { "epoch": 0.68, "learning_rate": 2.2058823529411767e-06, "logits/chosen": -1.2800060510635376, "logits/rejected": -1.3360731601715088, "logps/chosen": -136.71078491210938, "logps/rejected": -152.39772033691406, "loss": 0.3842, "rewards/accuracies": 1.0, "rewards/chosen": 0.3203796446323395, "rewards/margins": 2.550224542617798, "rewards/rejected": -2.229844808578491, "step": 3060 }, { "epoch": 0.68, "learning_rate": 2.2426470588235296e-06, "logits/chosen": -1.2794235944747925, "logits/rejected": -1.247741937637329, "logps/chosen": -137.52700805664062, "logps/rejected": -125.11906433105469, "loss": 0.0592, "rewards/accuracies": 1.0, "rewards/chosen": 0.806011974811554, "rewards/margins": 2.120143175125122, "rewards/rejected": -1.3141311407089233, "step": 3061 }, { "epoch": 0.68, "learning_rate": 2.2794117647058826e-06, "logits/chosen": -1.0262374877929688, "logits/rejected": -1.0647295713424683, "logps/chosen": -247.0443878173828, "logps/rejected": -176.88827514648438, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -0.7795730829238892, "rewards/margins": 2.820096015930176, "rewards/rejected": -3.5996689796447754, "step": 3062 }, { "epoch": 0.68, "learning_rate": 2.3161764705882355e-06, "logits/chosen": -1.1178442239761353, "logits/rejected": -1.0503261089324951, "logps/chosen": -162.38279724121094, "logps/rejected": -301.1986083984375, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.1687958240509033, "rewards/margins": 7.794506072998047, "rewards/rejected": -8.963301658630371, "step": 3063 }, { "epoch": 0.68, "learning_rate": 2.3529411764705885e-06, "logits/chosen": -0.7074077725410461, "logits/rejected": -0.8554500341415405, "logps/chosen": -249.65553283691406, "logps/rejected": -181.94517517089844, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": 1.8015578985214233, "rewards/margins": 3.5084564685821533, "rewards/rejected": -1.70689857006073, "step": 3064 }, { "epoch": 0.68, "learning_rate": 2.3897058823529414e-06, "logits/chosen": -0.6628347039222717, "logits/rejected": -0.6919779181480408, "logps/chosen": -227.74325561523438, "logps/rejected": -167.6278839111328, "loss": 0.0417, "rewards/accuracies": 1.0, "rewards/chosen": -0.44734498858451843, "rewards/margins": 2.699633836746216, "rewards/rejected": -3.1469788551330566, "step": 3065 }, { "epoch": 0.68, "learning_rate": 2.4264705882352943e-06, "logits/chosen": -0.7735698819160461, "logits/rejected": -0.7351880669593811, "logps/chosen": -163.85337829589844, "logps/rejected": -162.01718139648438, "loss": 0.2296, "rewards/accuracies": 1.0, "rewards/chosen": 0.13943329453468323, "rewards/margins": 6.75297737121582, "rewards/rejected": -6.61354398727417, "step": 3066 }, { "epoch": 0.68, "learning_rate": 2.4632352941176473e-06, "logits/chosen": -0.6148224472999573, "logits/rejected": -0.5982720255851746, "logps/chosen": -175.76312255859375, "logps/rejected": -138.4832763671875, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -0.31278687715530396, "rewards/margins": 3.677915334701538, "rewards/rejected": -3.9907021522521973, "step": 3067 }, { "epoch": 0.68, "learning_rate": 2.5e-06, "logits/chosen": -0.9521642327308655, "logits/rejected": -1.09366774559021, "logps/chosen": -204.40325927734375, "logps/rejected": -228.5986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.182244896888733, "rewards/margins": 13.356255531311035, "rewards/rejected": -12.174010276794434, "step": 3068 }, { "epoch": 0.68, "learning_rate": 2.536764705882353e-06, "logits/chosen": -0.7346285581588745, "logits/rejected": -0.7629613280296326, "logps/chosen": -78.57015991210938, "logps/rejected": -71.73345184326172, "loss": 0.1949, "rewards/accuracies": 1.0, "rewards/chosen": -1.2198059558868408, "rewards/margins": 0.9326980113983154, "rewards/rejected": -2.1525039672851562, "step": 3069 }, { "epoch": 0.68, "learning_rate": 2.5735294117647057e-06, "logits/chosen": -0.7469369173049927, "logits/rejected": -0.7333526611328125, "logps/chosen": -320.8778991699219, "logps/rejected": -302.40667724609375, "loss": 1.2414, "rewards/accuracies": 0.0, "rewards/chosen": -7.280401706695557, "rewards/margins": -2.3955202102661133, "rewards/rejected": -4.884881496429443, "step": 3070 }, { "epoch": 0.68, "learning_rate": 2.610294117647059e-06, "logits/chosen": -0.6298366189002991, "logits/rejected": -0.616922914981842, "logps/chosen": -12.374045372009277, "logps/rejected": -44.44589614868164, "loss": 0.2497, "rewards/accuracies": 1.0, "rewards/chosen": -0.2391311675310135, "rewards/margins": 2.645155429840088, "rewards/rejected": -2.884286642074585, "step": 3071 }, { "epoch": 0.68, "learning_rate": 2.647058823529412e-06, "logits/chosen": -1.1929097175598145, "logits/rejected": -1.1836133003234863, "logps/chosen": -63.292022705078125, "logps/rejected": -103.10526275634766, "loss": 0.0497, "rewards/accuracies": 1.0, "rewards/chosen": -1.3824859857559204, "rewards/margins": 2.3248343467712402, "rewards/rejected": -3.70732045173645, "step": 3072 }, { "epoch": 0.68, "learning_rate": 2.683823529411765e-06, "logits/chosen": -0.8090550303459167, "logits/rejected": -0.8909702897071838, "logps/chosen": -191.87789916992188, "logps/rejected": -157.2349853515625, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": -0.04036560282111168, "rewards/margins": 4.3840203285217285, "rewards/rejected": -4.424386024475098, "step": 3073 }, { "epoch": 0.68, "learning_rate": 2.720588235294118e-06, "logits/chosen": -0.9896512031555176, "logits/rejected": -0.9723324179649353, "logps/chosen": -134.3518524169922, "logps/rejected": -203.24771118164062, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.5270522832870483, "rewards/margins": 5.991087436676025, "rewards/rejected": -7.518139839172363, "step": 3074 }, { "epoch": 0.68, "learning_rate": 2.757352941176471e-06, "logits/chosen": -1.0101910829544067, "logits/rejected": -1.0044738054275513, "logps/chosen": -162.28750610351562, "logps/rejected": -86.4047622680664, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.38152772188186646, "rewards/margins": 4.456211090087891, "rewards/rejected": -4.837738990783691, "step": 3075 }, { "epoch": 0.68, "learning_rate": 2.7941176470588237e-06, "logits/chosen": -0.9643141627311707, "logits/rejected": -0.9273501038551331, "logps/chosen": -195.39242553710938, "logps/rejected": -356.4639892578125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.03888855129480362, "rewards/margins": 5.902075290679932, "rewards/rejected": -5.9409637451171875, "step": 3076 }, { "epoch": 0.68, "learning_rate": 2.8308823529411766e-06, "logits/chosen": -0.9297380447387695, "logits/rejected": -0.9362356662750244, "logps/chosen": -101.4447250366211, "logps/rejected": -98.23295593261719, "loss": 0.3345, "rewards/accuracies": 1.0, "rewards/chosen": -0.2785499691963196, "rewards/margins": 6.308670520782471, "rewards/rejected": -6.587220668792725, "step": 3077 }, { "epoch": 0.68, "learning_rate": 2.8676470588235296e-06, "logits/chosen": -1.1038118600845337, "logits/rejected": -1.2076687812805176, "logps/chosen": -172.38214111328125, "logps/rejected": -131.1721649169922, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 2.277822971343994, "rewards/margins": 6.033838272094727, "rewards/rejected": -3.7560150623321533, "step": 3078 }, { "epoch": 0.68, "learning_rate": 2.904411764705883e-06, "logits/chosen": -1.1423977613449097, "logits/rejected": -1.1216440200805664, "logps/chosen": -100.14461517333984, "logps/rejected": -127.16831970214844, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.34496232867240906, "rewards/margins": 5.038769245147705, "rewards/rejected": -5.383731365203857, "step": 3079 }, { "epoch": 0.68, "learning_rate": 2.9411764705882355e-06, "logits/chosen": -0.9984297156333923, "logits/rejected": -0.9984297156333923, "logps/chosen": -173.20176696777344, "logps/rejected": -173.20176696777344, "loss": 0.3864, "rewards/accuracies": 0.0, "rewards/chosen": -7.5504913330078125, "rewards/margins": 0.0, "rewards/rejected": -7.5504913330078125, "step": 3080 }, { "epoch": 0.68, "learning_rate": 2.9779411764705884e-06, "logits/chosen": -0.9642345905303955, "logits/rejected": -0.8823085427284241, "logps/chosen": -96.98410034179688, "logps/rejected": -287.3043212890625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5938400626182556, "rewards/margins": 8.364150047302246, "rewards/rejected": -8.957989692687988, "step": 3081 }, { "epoch": 0.68, "learning_rate": 3.0147058823529413e-06, "logits/chosen": -1.025960922241211, "logits/rejected": -1.0356892347335815, "logps/chosen": -94.51243591308594, "logps/rejected": -69.8897476196289, "loss": 0.061, "rewards/accuracies": 1.0, "rewards/chosen": -0.858563244342804, "rewards/margins": 2.4723756313323975, "rewards/rejected": -3.3309388160705566, "step": 3082 }, { "epoch": 0.68, "learning_rate": 3.0514705882352947e-06, "logits/chosen": -0.9420439600944519, "logits/rejected": -0.7828255891799927, "logps/chosen": -258.4580383300781, "logps/rejected": -571.1063232421875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.7177886962890625, "rewards/margins": 23.93739891052246, "rewards/rejected": -23.2196102142334, "step": 3083 }, { "epoch": 0.68, "learning_rate": 3.0882352941176476e-06, "logits/chosen": -0.9250953197479248, "logits/rejected": -0.9736415147781372, "logps/chosen": -103.4239501953125, "logps/rejected": -111.87619018554688, "loss": 0.0636, "rewards/accuracies": 1.0, "rewards/chosen": -1.2030487060546875, "rewards/margins": 2.0128486156463623, "rewards/rejected": -3.21589732170105, "step": 3084 }, { "epoch": 0.68, "learning_rate": 3.125e-06, "logits/chosen": -0.9008113741874695, "logits/rejected": -0.9863556027412415, "logps/chosen": -246.22149658203125, "logps/rejected": -199.88519287109375, "loss": 0.0402, "rewards/accuracies": 1.0, "rewards/chosen": 3.153881788253784, "rewards/margins": 12.247730255126953, "rewards/rejected": -9.09384822845459, "step": 3085 }, { "epoch": 0.68, "learning_rate": 3.161764705882353e-06, "logits/chosen": -1.1117507219314575, "logits/rejected": -1.1115964651107788, "logps/chosen": -119.94002532958984, "logps/rejected": -169.29080200195312, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.4230018854141235, "rewards/margins": 4.312985420227051, "rewards/rejected": -5.735987186431885, "step": 3086 }, { "epoch": 0.68, "learning_rate": 3.198529411764706e-06, "logits/chosen": -0.8709015250205994, "logits/rejected": -0.9165629148483276, "logps/chosen": -248.66763305664062, "logps/rejected": -195.13754272460938, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.013417053036391735, "rewards/margins": 6.326551914215088, "rewards/rejected": -6.313134670257568, "step": 3087 }, { "epoch": 0.68, "learning_rate": 3.2352941176470594e-06, "logits/chosen": -1.079999327659607, "logits/rejected": -1.1474775075912476, "logps/chosen": -302.3783874511719, "logps/rejected": -195.18710327148438, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.5196197628974915, "rewards/margins": 7.419887542724609, "rewards/rejected": -7.939507484436035, "step": 3088 }, { "epoch": 0.68, "learning_rate": 3.272058823529412e-06, "logits/chosen": -1.1902133226394653, "logits/rejected": -1.1483885049819946, "logps/chosen": -172.78427124023438, "logps/rejected": -305.71514892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.8695312738418579, "rewards/margins": 12.279702186584473, "rewards/rejected": -11.410170555114746, "step": 3089 }, { "epoch": 0.68, "learning_rate": 3.308823529411765e-06, "logits/chosen": -1.2630252838134766, "logits/rejected": -1.234763741493225, "logps/chosen": -82.28031921386719, "logps/rejected": -147.59420776367188, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": -0.8267974853515625, "rewards/margins": 2.578744649887085, "rewards/rejected": -3.4055421352386475, "step": 3090 }, { "epoch": 0.68, "learning_rate": 3.3455882352941178e-06, "logits/chosen": -1.0866241455078125, "logits/rejected": -1.0837804079055786, "logps/chosen": -92.60940551757812, "logps/rejected": -139.58885192871094, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.3668716549873352, "rewards/margins": 5.152709007263184, "rewards/rejected": -5.519580841064453, "step": 3091 }, { "epoch": 0.68, "learning_rate": 3.382352941176471e-06, "logits/chosen": -1.0740101337432861, "logits/rejected": -1.0150368213653564, "logps/chosen": -70.27711486816406, "logps/rejected": -191.73602294921875, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -1.1287994384765625, "rewards/margins": 7.868803024291992, "rewards/rejected": -8.997602462768555, "step": 3092 }, { "epoch": 0.68, "learning_rate": 3.419117647058824e-06, "logits/chosen": -1.0827556848526, "logits/rejected": -1.156557559967041, "logps/chosen": -178.85992431640625, "logps/rejected": -100.58808898925781, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": 0.9293548464775085, "rewards/margins": 2.790074110031128, "rewards/rejected": -1.8607193231582642, "step": 3093 }, { "epoch": 0.68, "learning_rate": 3.4558823529411766e-06, "logits/chosen": -1.1222763061523438, "logits/rejected": -0.9809726476669312, "logps/chosen": -109.744873046875, "logps/rejected": -258.6146240234375, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -4.116772651672363, "rewards/margins": 2.7916808128356934, "rewards/rejected": -6.908453464508057, "step": 3094 }, { "epoch": 0.69, "learning_rate": 3.4926470588235295e-06, "logits/chosen": -0.6943929195404053, "logits/rejected": -0.6943929195404053, "logps/chosen": -137.62161254882812, "logps/rejected": -137.62161254882812, "loss": 0.3639, "rewards/accuracies": 0.0, "rewards/chosen": -2.648590087890625, "rewards/margins": 0.0, "rewards/rejected": -2.648590087890625, "step": 3095 }, { "epoch": 0.69, "learning_rate": 3.529411764705883e-06, "logits/chosen": -0.7803906798362732, "logits/rejected": -0.8228456974029541, "logps/chosen": -193.84732055664062, "logps/rejected": -241.50631713867188, "loss": 0.6145, "rewards/accuracies": 1.0, "rewards/chosen": 2.2714767456054688, "rewards/margins": 1.523718237876892, "rewards/rejected": 0.7477585077285767, "step": 3096 }, { "epoch": 0.69, "learning_rate": 3.566176470588236e-06, "logits/chosen": -0.8188579082489014, "logits/rejected": -0.9234175682067871, "logps/chosen": -281.7957458496094, "logps/rejected": -124.54792785644531, "loss": 0.1027, "rewards/accuracies": 1.0, "rewards/chosen": -0.7778900265693665, "rewards/margins": 1.4793992042541504, "rewards/rejected": -2.257289171218872, "step": 3097 }, { "epoch": 0.69, "learning_rate": 3.6029411764705883e-06, "logits/chosen": -0.910666286945343, "logits/rejected": -0.8950684070587158, "logps/chosen": -127.41950988769531, "logps/rejected": -201.3687286376953, "loss": 0.1299, "rewards/accuracies": 1.0, "rewards/chosen": -5.129032135009766, "rewards/margins": 1.2297463417053223, "rewards/rejected": -6.358778476715088, "step": 3098 }, { "epoch": 0.69, "learning_rate": 3.6397058823529413e-06, "logits/chosen": -0.9728584885597229, "logits/rejected": -1.1019502878189087, "logps/chosen": -147.99365234375, "logps/rejected": -85.47517395019531, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": -0.18135376274585724, "rewards/margins": 3.99147629737854, "rewards/rejected": -4.172830104827881, "step": 3099 }, { "epoch": 0.69, "learning_rate": 3.6764705882352946e-06, "logits/chosen": -1.3054696321487427, "logits/rejected": -1.3369239568710327, "logps/chosen": -105.47718048095703, "logps/rejected": -212.4351348876953, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.9670753479003906, "rewards/margins": 7.168885231018066, "rewards/rejected": -8.135960578918457, "step": 3100 }, { "epoch": 0.69, "learning_rate": 3.7132352941176476e-06, "logits/chosen": -1.090903639793396, "logits/rejected": -0.4944306015968323, "logps/chosen": -88.16361999511719, "logps/rejected": -606.5738525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.384521484375, "rewards/margins": 31.773147583007812, "rewards/rejected": -32.15766906738281, "step": 3101 }, { "epoch": 0.69, "learning_rate": 3.7500000000000005e-06, "logits/chosen": -1.1057556867599487, "logits/rejected": -1.1057556867599487, "logps/chosen": -146.3138427734375, "logps/rejected": -146.3138427734375, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -6.075845241546631, "rewards/margins": 0.0, "rewards/rejected": -6.075845241546631, "step": 3102 }, { "epoch": 0.69, "learning_rate": 3.786764705882353e-06, "logits/chosen": -0.6884680986404419, "logits/rejected": -0.6538298726081848, "logps/chosen": -62.01506042480469, "logps/rejected": -101.18496704101562, "loss": 0.3981, "rewards/accuracies": 1.0, "rewards/chosen": -0.004932403564453125, "rewards/margins": 4.152581214904785, "rewards/rejected": -4.157513618469238, "step": 3103 }, { "epoch": 0.69, "learning_rate": 3.8235294117647055e-06, "logits/chosen": -1.2221754789352417, "logits/rejected": -1.1801363229751587, "logps/chosen": -66.08778381347656, "logps/rejected": -152.48968505859375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.019557237625122, "rewards/margins": 4.294101715087891, "rewards/rejected": -5.313659191131592, "step": 3104 }, { "epoch": 0.69, "learning_rate": 3.860294117647059e-06, "logits/chosen": -1.038666844367981, "logits/rejected": -1.050178050994873, "logps/chosen": -170.05628967285156, "logps/rejected": -141.27362060546875, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.5069259405136108, "rewards/margins": 4.3944220542907715, "rewards/rejected": -5.901348114013672, "step": 3105 }, { "epoch": 0.69, "learning_rate": 3.897058823529412e-06, "logits/chosen": -0.8680463433265686, "logits/rejected": -0.8402847647666931, "logps/chosen": -177.8996124267578, "logps/rejected": -200.635498046875, "loss": 0.4891, "rewards/accuracies": 0.0, "rewards/chosen": 0.3243240416049957, "rewards/margins": -0.5053161382675171, "rewards/rejected": 0.8296402096748352, "step": 3106 }, { "epoch": 0.69, "learning_rate": 3.933823529411765e-06, "logits/chosen": -1.224373459815979, "logits/rejected": -1.364795446395874, "logps/chosen": -233.920166015625, "logps/rejected": -62.42018127441406, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": 0.9552642703056335, "rewards/margins": 3.2789525985717773, "rewards/rejected": -2.323688268661499, "step": 3107 }, { "epoch": 0.69, "learning_rate": 3.970588235294118e-06, "logits/chosen": -0.9839031100273132, "logits/rejected": -0.9620583653450012, "logps/chosen": -66.13728332519531, "logps/rejected": -121.42877197265625, "loss": 0.2039, "rewards/accuracies": 1.0, "rewards/chosen": -0.335183709859848, "rewards/margins": 1.1303001642227173, "rewards/rejected": -1.4654839038848877, "step": 3108 }, { "epoch": 0.69, "learning_rate": 4.007352941176471e-06, "logits/chosen": -0.9594123363494873, "logits/rejected": -0.9535594582557678, "logps/chosen": -92.73120880126953, "logps/rejected": -82.84513854980469, "loss": 2.2789, "rewards/accuracies": 0.0, "rewards/chosen": -3.8182785511016846, "rewards/margins": -1.4164073467254639, "rewards/rejected": -2.4018712043762207, "step": 3109 }, { "epoch": 0.69, "learning_rate": 4.044117647058824e-06, "logits/chosen": -1.3176013231277466, "logits/rejected": -1.3820500373840332, "logps/chosen": -100.39804077148438, "logps/rejected": -26.18161392211914, "loss": 1.0957, "rewards/accuracies": 0.0, "rewards/chosen": -2.5735154151916504, "rewards/margins": -1.4183801412582397, "rewards/rejected": -1.1551352739334106, "step": 3110 }, { "epoch": 0.69, "learning_rate": 4.080882352941177e-06, "logits/chosen": -0.7717631459236145, "logits/rejected": -0.7577641010284424, "logps/chosen": -159.76983642578125, "logps/rejected": -245.5664825439453, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": -2.248983860015869, "rewards/margins": 8.061494827270508, "rewards/rejected": -10.310478210449219, "step": 3111 }, { "epoch": 0.69, "learning_rate": 4.11764705882353e-06, "logits/chosen": -1.166130542755127, "logits/rejected": -1.166032314300537, "logps/chosen": -79.45541381835938, "logps/rejected": -128.71121215820312, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": 0.6764137148857117, "rewards/margins": 4.048856258392334, "rewards/rejected": -3.3724427223205566, "step": 3112 }, { "epoch": 0.69, "learning_rate": 4.154411764705883e-06, "logits/chosen": -0.9275719523429871, "logits/rejected": -0.8938500881195068, "logps/chosen": -255.73178100585938, "logps/rejected": -139.303955078125, "loss": 0.1111, "rewards/accuracies": 1.0, "rewards/chosen": 0.486367791891098, "rewards/margins": 1.3917007446289062, "rewards/rejected": -0.9053329825401306, "step": 3113 }, { "epoch": 0.69, "learning_rate": 4.191176470588236e-06, "logits/chosen": -0.8776255249977112, "logits/rejected": -0.9092395901679993, "logps/chosen": -244.23699951171875, "logps/rejected": -66.09454345703125, "loss": 1.0541, "rewards/accuracies": 1.0, "rewards/chosen": 0.48411256074905396, "rewards/margins": 4.267117977142334, "rewards/rejected": -3.783005475997925, "step": 3114 }, { "epoch": 0.69, "learning_rate": 4.227941176470589e-06, "logits/chosen": -0.760516345500946, "logits/rejected": -0.758387565612793, "logps/chosen": -71.90118408203125, "logps/rejected": -130.025634765625, "loss": 0.0303, "rewards/accuracies": 1.0, "rewards/chosen": -0.5541626214981079, "rewards/margins": 2.9865293502807617, "rewards/rejected": -3.540692090988159, "step": 3115 }, { "epoch": 0.69, "learning_rate": 4.264705882352942e-06, "logits/chosen": -0.8306203484535217, "logits/rejected": -0.7991371750831604, "logps/chosen": -95.68672943115234, "logps/rejected": -130.3385009765625, "loss": 0.0835, "rewards/accuracies": 1.0, "rewards/chosen": -1.0170799493789673, "rewards/margins": 1.7045608758926392, "rewards/rejected": -2.7216408252716064, "step": 3116 }, { "epoch": 0.69, "learning_rate": 4.301470588235295e-06, "logits/chosen": -1.2760425806045532, "logits/rejected": -1.2434083223342896, "logps/chosen": -122.88232421875, "logps/rejected": -190.71438598632812, "loss": 0.3107, "rewards/accuracies": 1.0, "rewards/chosen": -3.213855028152466, "rewards/margins": 3.4205825328826904, "rewards/rejected": -6.634437561035156, "step": 3117 }, { "epoch": 0.69, "learning_rate": 4.3382352941176475e-06, "logits/chosen": -0.6757495403289795, "logits/rejected": -0.5755006670951843, "logps/chosen": -185.0830535888672, "logps/rejected": -224.7533416748047, "loss": 0.0706, "rewards/accuracies": 1.0, "rewards/chosen": 0.3072769343852997, "rewards/margins": 7.867750644683838, "rewards/rejected": -7.560473918914795, "step": 3118 }, { "epoch": 0.69, "learning_rate": 4.3750000000000005e-06, "logits/chosen": -0.7166536450386047, "logits/rejected": -0.5746296048164368, "logps/chosen": -203.5092315673828, "logps/rejected": -326.5042724609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 3.78350830078125, "rewards/margins": 7.562835693359375, "rewards/rejected": -3.779327392578125, "step": 3119 }, { "epoch": 0.69, "learning_rate": 4.411764705882353e-06, "logits/chosen": -0.8911516070365906, "logits/rejected": -0.9192511439323425, "logps/chosen": -114.9482192993164, "logps/rejected": -75.90689086914062, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -1.8590850830078125, "rewards/margins": 3.1982688903808594, "rewards/rejected": -5.057353973388672, "step": 3120 }, { "epoch": 0.69, "learning_rate": 4.448529411764706e-06, "logits/chosen": -0.8262438774108887, "logits/rejected": -0.7833959460258484, "logps/chosen": -82.96835327148438, "logps/rejected": -131.78346252441406, "loss": 0.1846, "rewards/accuracies": 1.0, "rewards/chosen": 0.25896531343460083, "rewards/margins": 0.8113136291503906, "rewards/rejected": -0.5523483157157898, "step": 3121 }, { "epoch": 0.69, "learning_rate": 4.485294117647059e-06, "logits/chosen": -0.8214178085327148, "logits/rejected": -0.8306557536125183, "logps/chosen": -106.3089370727539, "logps/rejected": -131.93572998046875, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -1.0455093383789062, "rewards/margins": 4.6478142738342285, "rewards/rejected": -5.693323612213135, "step": 3122 }, { "epoch": 0.69, "learning_rate": 4.522058823529412e-06, "logits/chosen": -1.034929871559143, "logits/rejected": -1.0428694486618042, "logps/chosen": -216.66102600097656, "logps/rejected": -188.91860961914062, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": 2.0184738636016846, "rewards/margins": 5.2192487716674805, "rewards/rejected": -3.200775146484375, "step": 3123 }, { "epoch": 0.69, "learning_rate": 4.558823529411765e-06, "logits/chosen": -1.1553761959075928, "logits/rejected": -1.2175484895706177, "logps/chosen": -162.92242431640625, "logps/rejected": -143.40480041503906, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.1471405029296875, "rewards/margins": 6.027308940887451, "rewards/rejected": -6.174449443817139, "step": 3124 }, { "epoch": 0.69, "learning_rate": 4.595588235294118e-06, "logits/chosen": -1.138628602027893, "logits/rejected": -1.1334614753723145, "logps/chosen": -55.348793029785156, "logps/rejected": -68.88033294677734, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/chosen": 0.509891927242279, "rewards/margins": 2.567324638366699, "rewards/rejected": -2.0574326515197754, "step": 3125 }, { "epoch": 0.69, "learning_rate": 4.632352941176471e-06, "logits/chosen": -0.8961686491966248, "logits/rejected": -0.7955193519592285, "logps/chosen": -202.83761596679688, "logps/rejected": -309.6940002441406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.972076416015625, "rewards/margins": 8.835348129272461, "rewards/rejected": -7.863272190093994, "step": 3126 }, { "epoch": 0.69, "learning_rate": 4.669117647058824e-06, "logits/chosen": -1.0618027448654175, "logits/rejected": -1.0345579385757446, "logps/chosen": -198.73976135253906, "logps/rejected": -306.24310302734375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.8357834219932556, "rewards/margins": 8.380546569824219, "rewards/rejected": -7.544763088226318, "step": 3127 }, { "epoch": 0.69, "learning_rate": 4.705882352941177e-06, "logits/chosen": -1.1813219785690308, "logits/rejected": -1.1745493412017822, "logps/chosen": -86.09268188476562, "logps/rejected": -184.42568969726562, "loss": 1.2435, "rewards/accuracies": 1.0, "rewards/chosen": 0.884516179561615, "rewards/margins": 3.491370439529419, "rewards/rejected": -2.606854200363159, "step": 3128 }, { "epoch": 0.69, "learning_rate": 4.74264705882353e-06, "logits/chosen": -1.1469266414642334, "logits/rejected": -1.0838348865509033, "logps/chosen": -91.93456268310547, "logps/rejected": -148.91336059570312, "loss": 0.688, "rewards/accuracies": 0.0, "rewards/chosen": -1.3948814868927002, "rewards/margins": -1.0847618579864502, "rewards/rejected": -0.31011962890625, "step": 3129 }, { "epoch": 0.69, "learning_rate": 4.779411764705883e-06, "logits/chosen": -1.1979291439056396, "logits/rejected": -1.2285658121109009, "logps/chosen": -110.7041015625, "logps/rejected": -147.70977783203125, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -0.4030517637729645, "rewards/margins": 3.4959349632263184, "rewards/rejected": -3.89898681640625, "step": 3130 }, { "epoch": 0.69, "learning_rate": 4.816176470588236e-06, "logits/chosen": -1.2426202297210693, "logits/rejected": -1.2426202297210693, "logps/chosen": -112.64508819580078, "logps/rejected": -112.64508819580078, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -3.236696720123291, "rewards/margins": 0.0, "rewards/rejected": -3.236696720123291, "step": 3131 }, { "epoch": 0.69, "learning_rate": 4.852941176470589e-06, "logits/chosen": -1.1086523532867432, "logits/rejected": -1.078445553779602, "logps/chosen": -63.821075439453125, "logps/rejected": -145.20388793945312, "loss": 0.2164, "rewards/accuracies": 1.0, "rewards/chosen": 0.21943512558937073, "rewards/margins": 3.9739937782287598, "rewards/rejected": -3.754558563232422, "step": 3132 }, { "epoch": 0.69, "learning_rate": 4.889705882352942e-06, "logits/chosen": -1.016890048980713, "logits/rejected": -0.884997546672821, "logps/chosen": -137.66607666015625, "logps/rejected": -232.91925048828125, "loss": 0.3988, "rewards/accuracies": 1.0, "rewards/chosen": 1.8927520513534546, "rewards/margins": 7.853065490722656, "rewards/rejected": -5.960313320159912, "step": 3133 }, { "epoch": 0.69, "learning_rate": 4.9264705882352945e-06, "logits/chosen": -1.4563401937484741, "logits/rejected": -1.5038650035858154, "logps/chosen": -112.64179229736328, "logps/rejected": -96.54605102539062, "loss": 0.1891, "rewards/accuracies": 1.0, "rewards/chosen": -1.2911872863769531, "rewards/margins": 0.7773826122283936, "rewards/rejected": -2.0685698986053467, "step": 3134 }, { "epoch": 0.69, "learning_rate": 4.9632352941176475e-06, "logits/chosen": -0.941501796245575, "logits/rejected": -0.9350411295890808, "logps/chosen": -101.55484008789062, "logps/rejected": -128.717529296875, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": -0.21023865044116974, "rewards/margins": 3.235321044921875, "rewards/rejected": -3.4455597400665283, "step": 3135 }, { "epoch": 0.69, "learning_rate": 5e-06, "logits/chosen": -1.076839804649353, "logits/rejected": -0.9847000241279602, "logps/chosen": -125.81340026855469, "logps/rejected": -187.62649536132812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.45657655596733093, "rewards/margins": 8.401158332824707, "rewards/rejected": -8.857734680175781, "step": 3136 }, { "epoch": 0.69, "learning_rate": 5.036764705882353e-06, "logits/chosen": -0.6508832573890686, "logits/rejected": -0.6265999674797058, "logps/chosen": -111.68901062011719, "logps/rejected": -266.8872985839844, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -3.4593873023986816, "rewards/margins": 7.162527561187744, "rewards/rejected": -10.621914863586426, "step": 3137 }, { "epoch": 0.69, "learning_rate": 5.073529411764706e-06, "logits/chosen": -1.0789835453033447, "logits/rejected": -1.066025972366333, "logps/chosen": -136.80169677734375, "logps/rejected": -265.4123229980469, "loss": 0.1662, "rewards/accuracies": 1.0, "rewards/chosen": -3.8866913318634033, "rewards/margins": 1.2493317127227783, "rewards/rejected": -5.136023044586182, "step": 3138 }, { "epoch": 0.69, "learning_rate": 5.110294117647059e-06, "logits/chosen": -1.1510839462280273, "logits/rejected": -1.121280550956726, "logps/chosen": -143.68023681640625, "logps/rejected": -208.05435180664062, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.8481704592704773, "rewards/margins": 4.616063117980957, "rewards/rejected": -5.4642333984375, "step": 3139 }, { "epoch": 0.69, "learning_rate": 5.147058823529411e-06, "logits/chosen": -0.7734741568565369, "logits/rejected": -0.7348949909210205, "logps/chosen": -85.97549438476562, "logps/rejected": -199.54083251953125, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.8207122683525085, "rewards/margins": 5.682629585266113, "rewards/rejected": -6.5033416748046875, "step": 3140 }, { "epoch": 0.7, "learning_rate": 5.183823529411766e-06, "logits/chosen": -0.6045148968696594, "logits/rejected": -0.7494759559631348, "logps/chosen": -169.54861450195312, "logps/rejected": -91.38236236572266, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": 1.2858688831329346, "rewards/margins": 4.694490909576416, "rewards/rejected": -3.4086220264434814, "step": 3141 }, { "epoch": 0.7, "learning_rate": 5.220588235294118e-06, "logits/chosen": -0.9796523451805115, "logits/rejected": -1.0153709650039673, "logps/chosen": -270.977294921875, "logps/rejected": -166.03720092773438, "loss": 0.4353, "rewards/accuracies": 0.0, "rewards/chosen": 1.6549224853515625, "rewards/margins": -0.3246643543243408, "rewards/rejected": 1.9795868396759033, "step": 3142 }, { "epoch": 0.7, "learning_rate": 5.257352941176471e-06, "logits/chosen": -1.361961007118225, "logits/rejected": -1.4536840915679932, "logps/chosen": -166.05999755859375, "logps/rejected": -106.41075134277344, "loss": 0.1875, "rewards/accuracies": 1.0, "rewards/chosen": 0.20498351752758026, "rewards/margins": 1.351261854171753, "rewards/rejected": -1.1462783813476562, "step": 3143 }, { "epoch": 0.7, "learning_rate": 5.294117647058824e-06, "logits/chosen": -0.9497627019882202, "logits/rejected": -0.9912118315696716, "logps/chosen": -182.36839294433594, "logps/rejected": -167.91038513183594, "loss": 0.344, "rewards/accuracies": 1.0, "rewards/chosen": 0.22908782958984375, "rewards/margins": 0.9485718011856079, "rewards/rejected": -0.7194839715957642, "step": 3144 }, { "epoch": 0.7, "learning_rate": 5.330882352941177e-06, "logits/chosen": -0.7029873728752136, "logits/rejected": -0.6089796423912048, "logps/chosen": -112.6854248046875, "logps/rejected": -211.17196655273438, "loss": 0.3937, "rewards/accuracies": 1.0, "rewards/chosen": -1.1261314153671265, "rewards/margins": 1.117209792137146, "rewards/rejected": -2.2433412075042725, "step": 3145 }, { "epoch": 0.7, "learning_rate": 5.36764705882353e-06, "logits/chosen": -1.0427278280258179, "logits/rejected": -1.0410523414611816, "logps/chosen": -138.567138671875, "logps/rejected": -242.70111083984375, "loss": 0.6331, "rewards/accuracies": 0.0, "rewards/chosen": -0.9036880731582642, "rewards/margins": -0.9350509643554688, "rewards/rejected": 0.03136291727423668, "step": 3146 }, { "epoch": 0.7, "learning_rate": 5.404411764705883e-06, "logits/chosen": -0.902467668056488, "logits/rejected": -0.8805622458457947, "logps/chosen": -87.24116516113281, "logps/rejected": -95.90028381347656, "loss": 0.2449, "rewards/accuracies": 1.0, "rewards/chosen": -1.8358535766601562, "rewards/margins": 0.45868682861328125, "rewards/rejected": -2.2945404052734375, "step": 3147 }, { "epoch": 0.7, "learning_rate": 5.441176470588236e-06, "logits/chosen": -1.3704777956008911, "logits/rejected": -1.3412808179855347, "logps/chosen": -106.19303894042969, "logps/rejected": -250.54052734375, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": -0.1509193480014801, "rewards/margins": 9.132369995117188, "rewards/rejected": -9.283288955688477, "step": 3148 }, { "epoch": 0.7, "learning_rate": 5.4779411764705894e-06, "logits/chosen": -0.9921111464500427, "logits/rejected": -1.0129084587097168, "logps/chosen": -97.6024169921875, "logps/rejected": -60.43233871459961, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": 0.1734825223684311, "rewards/margins": 3.5355663299560547, "rewards/rejected": -3.362083911895752, "step": 3149 }, { "epoch": 0.7, "learning_rate": 5.514705882352942e-06, "logits/chosen": -0.9056115746498108, "logits/rejected": -0.826248049736023, "logps/chosen": -119.66238403320312, "logps/rejected": -231.03265380859375, "loss": 0.1746, "rewards/accuracies": 1.0, "rewards/chosen": 1.1985015869140625, "rewards/margins": 0.9647140502929688, "rewards/rejected": 0.23378753662109375, "step": 3150 }, { "epoch": 0.7, "learning_rate": 5.5514705882352945e-06, "logits/chosen": -1.0247197151184082, "logits/rejected": -0.9188573956489563, "logps/chosen": -119.87630462646484, "logps/rejected": -206.29425048828125, "loss": 0.7561, "rewards/accuracies": 0.0, "rewards/chosen": -1.3237183094024658, "rewards/margins": -1.2619400024414062, "rewards/rejected": -0.06177825853228569, "step": 3151 }, { "epoch": 0.7, "learning_rate": 5.588235294117647e-06, "logits/chosen": -0.8617535829544067, "logits/rejected": -0.9009653925895691, "logps/chosen": -138.19357299804688, "logps/rejected": -110.17375183105469, "loss": 0.0864, "rewards/accuracies": 1.0, "rewards/chosen": 0.7530517578125, "rewards/margins": 6.223618507385254, "rewards/rejected": -5.470566749572754, "step": 3152 }, { "epoch": 0.7, "learning_rate": 5.625e-06, "logits/chosen": -0.7128980159759521, "logits/rejected": -0.7128980159759521, "logps/chosen": -185.77622985839844, "logps/rejected": -185.77622985839844, "loss": 0.9245, "rewards/accuracies": 0.0, "rewards/chosen": -8.530077934265137, "rewards/margins": 0.0, "rewards/rejected": -8.530077934265137, "step": 3153 }, { "epoch": 0.7, "learning_rate": 5.661764705882353e-06, "logits/chosen": -1.2106213569641113, "logits/rejected": -1.2408169507980347, "logps/chosen": -137.28012084960938, "logps/rejected": -147.309326171875, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": 0.520642101764679, "rewards/margins": 7.605836868286133, "rewards/rejected": -7.0851945877075195, "step": 3154 }, { "epoch": 0.7, "learning_rate": 5.698529411764706e-06, "logits/chosen": -1.0171699523925781, "logits/rejected": -1.0171699523925781, "logps/chosen": -100.9850845336914, "logps/rejected": -100.9850845336914, "loss": 0.3488, "rewards/accuracies": 0.0, "rewards/chosen": -1.7784744501113892, "rewards/margins": 0.0, "rewards/rejected": -1.7784744501113892, "step": 3155 }, { "epoch": 0.7, "learning_rate": 5.735294117647059e-06, "logits/chosen": -0.7619173526763916, "logits/rejected": -0.7783661484718323, "logps/chosen": -105.89888000488281, "logps/rejected": -96.45498657226562, "loss": 0.0982, "rewards/accuracies": 1.0, "rewards/chosen": -0.9126739501953125, "rewards/margins": 1.5615060329437256, "rewards/rejected": -2.474179983139038, "step": 3156 }, { "epoch": 0.7, "learning_rate": 5.772058823529412e-06, "logits/chosen": -0.8805381655693054, "logits/rejected": -0.8426333069801331, "logps/chosen": -211.91737365722656, "logps/rejected": -250.9993896484375, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": 2.459669589996338, "rewards/margins": 3.8027148246765137, "rewards/rejected": -1.3430451154708862, "step": 3157 }, { "epoch": 0.7, "learning_rate": 5.808823529411766e-06, "logits/chosen": -1.087051510810852, "logits/rejected": -1.2075742483139038, "logps/chosen": -188.6514892578125, "logps/rejected": -125.58061218261719, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 3.5712616443634033, "rewards/margins": 7.181305885314941, "rewards/rejected": -3.610044240951538, "step": 3158 }, { "epoch": 0.7, "learning_rate": 5.845588235294119e-06, "logits/chosen": -1.15226149559021, "logits/rejected": -1.1896564960479736, "logps/chosen": -94.18534851074219, "logps/rejected": -143.52178955078125, "loss": 0.1951, "rewards/accuracies": 1.0, "rewards/chosen": -2.253398895263672, "rewards/margins": 1.815925121307373, "rewards/rejected": -4.069324016571045, "step": 3159 }, { "epoch": 0.7, "learning_rate": 5.882352941176471e-06, "logits/chosen": -0.94037264585495, "logits/rejected": -0.9758515357971191, "logps/chosen": -137.04812622070312, "logps/rejected": -98.46592712402344, "loss": 0.2118, "rewards/accuracies": 1.0, "rewards/chosen": 0.406045526266098, "rewards/margins": 2.422192335128784, "rewards/rejected": -2.0161468982696533, "step": 3160 }, { "epoch": 0.7, "learning_rate": 5.919117647058824e-06, "logits/chosen": -1.016345739364624, "logits/rejected": -1.1207927465438843, "logps/chosen": -203.4102783203125, "logps/rejected": -55.43439483642578, "loss": 1.6598, "rewards/accuracies": 0.0, "rewards/chosen": -5.427737712860107, "rewards/margins": -2.972626209259033, "rewards/rejected": -2.455111503601074, "step": 3161 }, { "epoch": 0.7, "learning_rate": 5.955882352941177e-06, "logits/chosen": -0.7198994755744934, "logits/rejected": -0.6283266544342041, "logps/chosen": -98.37244415283203, "logps/rejected": -185.01412963867188, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": -0.7649208307266235, "rewards/margins": 2.350478172302246, "rewards/rejected": -3.115399122238159, "step": 3162 }, { "epoch": 0.7, "learning_rate": 5.99264705882353e-06, "logits/chosen": -1.3396672010421753, "logits/rejected": -1.3302949666976929, "logps/chosen": -75.18698120117188, "logps/rejected": -126.65680694580078, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.15979920327663422, "rewards/margins": 4.825602054595947, "rewards/rejected": -4.985401153564453, "step": 3163 }, { "epoch": 0.7, "learning_rate": 6.029411764705883e-06, "logits/chosen": -0.9888556599617004, "logits/rejected": -0.9760750532150269, "logps/chosen": -89.43717956542969, "logps/rejected": -103.98409271240234, "loss": 0.3632, "rewards/accuracies": 1.0, "rewards/chosen": -1.4638687372207642, "rewards/margins": 2.6441421508789062, "rewards/rejected": -4.108010768890381, "step": 3164 }, { "epoch": 0.7, "learning_rate": 6.066176470588236e-06, "logits/chosen": -0.9125403165817261, "logits/rejected": -0.9049214124679565, "logps/chosen": -154.88064575195312, "logps/rejected": -145.938232421875, "loss": 0.3372, "rewards/accuracies": 1.0, "rewards/chosen": -1.8987549543380737, "rewards/margins": 0.038665771484375, "rewards/rejected": -1.9374207258224487, "step": 3165 }, { "epoch": 0.7, "learning_rate": 6.102941176470589e-06, "logits/chosen": -1.161902904510498, "logits/rejected": -1.1381630897521973, "logps/chosen": -139.5615692138672, "logps/rejected": -161.38804626464844, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.48474428057670593, "rewards/margins": 6.647781848907471, "rewards/rejected": -6.1630377769470215, "step": 3166 }, { "epoch": 0.7, "learning_rate": 6.139705882352942e-06, "logits/chosen": -1.1682665348052979, "logits/rejected": -1.2537884712219238, "logps/chosen": -190.28562927246094, "logps/rejected": -104.12258911132812, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -1.7572036981582642, "rewards/margins": 3.3205294609069824, "rewards/rejected": -5.077733039855957, "step": 3167 }, { "epoch": 0.7, "learning_rate": 6.176470588235295e-06, "logits/chosen": -0.9183731079101562, "logits/rejected": -0.8810656070709229, "logps/chosen": -67.64788055419922, "logps/rejected": -195.26063537597656, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": 0.7118385434150696, "rewards/margins": 3.8416953086853027, "rewards/rejected": -3.129856824874878, "step": 3168 }, { "epoch": 0.7, "learning_rate": 6.213235294117647e-06, "logits/chosen": -1.1158816814422607, "logits/rejected": -1.147900938987732, "logps/chosen": -94.41633605957031, "logps/rejected": -53.11362838745117, "loss": 0.1143, "rewards/accuracies": 1.0, "rewards/chosen": -1.1616287231445312, "rewards/margins": 1.8261487483978271, "rewards/rejected": -2.9877774715423584, "step": 3169 }, { "epoch": 0.7, "learning_rate": 6.25e-06, "logits/chosen": -1.1336050033569336, "logits/rejected": -1.012184500694275, "logps/chosen": -97.29750061035156, "logps/rejected": -220.11248779296875, "loss": 0.6041, "rewards/accuracies": 1.0, "rewards/chosen": -0.6748924255371094, "rewards/margins": 3.3960533142089844, "rewards/rejected": -4.070945739746094, "step": 3170 }, { "epoch": 0.7, "learning_rate": 6.286764705882353e-06, "logits/chosen": -1.1034605503082275, "logits/rejected": -1.1139311790466309, "logps/chosen": -103.53228759765625, "logps/rejected": -143.2731170654297, "loss": 0.1732, "rewards/accuracies": 1.0, "rewards/chosen": -0.49566879868507385, "rewards/margins": 0.9216606616973877, "rewards/rejected": -1.4173294305801392, "step": 3171 }, { "epoch": 0.7, "learning_rate": 6.323529411764706e-06, "logits/chosen": -1.0031150579452515, "logits/rejected": -0.9476140141487122, "logps/chosen": -108.04193115234375, "logps/rejected": -215.88192749023438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.9190658926963806, "rewards/margins": 9.066854476928711, "rewards/rejected": -8.147789001464844, "step": 3172 }, { "epoch": 0.7, "learning_rate": 6.360294117647059e-06, "logits/chosen": -1.1178374290466309, "logits/rejected": -1.014487624168396, "logps/chosen": -121.78550720214844, "logps/rejected": -191.9036865234375, "loss": 0.2907, "rewards/accuracies": 1.0, "rewards/chosen": 0.7799758911132812, "rewards/margins": 1.8326141834259033, "rewards/rejected": -1.052638292312622, "step": 3173 }, { "epoch": 0.7, "learning_rate": 6.397058823529412e-06, "logits/chosen": -0.9508932828903198, "logits/rejected": -0.9961006045341492, "logps/chosen": -235.42868041992188, "logps/rejected": -189.57359313964844, "loss": 0.0714, "rewards/accuracies": 1.0, "rewards/chosen": 3.9470367431640625, "rewards/margins": 1.87459397315979, "rewards/rejected": 2.0724427700042725, "step": 3174 }, { "epoch": 0.7, "learning_rate": 6.433823529411766e-06, "logits/chosen": -0.6401482820510864, "logits/rejected": -0.7014507055282593, "logps/chosen": -183.1375274658203, "logps/rejected": -238.53965759277344, "loss": 0.6518, "rewards/accuracies": 0.0, "rewards/chosen": -2.9368577003479004, "rewards/margins": -0.9856079816818237, "rewards/rejected": -1.9512497186660767, "step": 3175 }, { "epoch": 0.7, "learning_rate": 6.470588235294119e-06, "logits/chosen": -0.7825522422790527, "logits/rejected": -0.7615411877632141, "logps/chosen": -78.4173355102539, "logps/rejected": -149.2048797607422, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": 0.4211326539516449, "rewards/margins": 3.6178877353668213, "rewards/rejected": -3.1967551708221436, "step": 3176 }, { "epoch": 0.7, "learning_rate": 6.507352941176472e-06, "logits/chosen": -1.1799871921539307, "logits/rejected": -1.255087971687317, "logps/chosen": -172.1826171875, "logps/rejected": -86.44021606445312, "loss": 0.4226, "rewards/accuracies": 0.0, "rewards/chosen": -5.363757610321045, "rewards/margins": -0.284149169921875, "rewards/rejected": -5.07960844039917, "step": 3177 }, { "epoch": 0.7, "learning_rate": 6.544117647058824e-06, "logits/chosen": -1.0636144876480103, "logits/rejected": -1.1238490343093872, "logps/chosen": -147.41409301757812, "logps/rejected": -127.61543273925781, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -0.10790710896253586, "rewards/margins": 3.8910844326019287, "rewards/rejected": -3.9989914894104004, "step": 3178 }, { "epoch": 0.7, "learning_rate": 6.580882352941177e-06, "logits/chosen": -0.9677082896232605, "logits/rejected": -0.9677082896232605, "logps/chosen": -87.5074462890625, "logps/rejected": -87.5074462890625, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -0.8370620608329773, "rewards/margins": 0.0, "rewards/rejected": -0.8370620608329773, "step": 3179 }, { "epoch": 0.7, "learning_rate": 6.61764705882353e-06, "logits/chosen": -0.9807813763618469, "logits/rejected": -0.9790571331977844, "logps/chosen": -159.1243438720703, "logps/rejected": -158.654541015625, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": -0.2574478089809418, "rewards/margins": 2.3128740787506104, "rewards/rejected": -2.570321798324585, "step": 3180 }, { "epoch": 0.7, "learning_rate": 6.654411764705883e-06, "logits/chosen": -1.3156054019927979, "logits/rejected": -1.4640856981277466, "logps/chosen": -132.36807250976562, "logps/rejected": -88.04177856445312, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -1.8266220092773438, "rewards/margins": 3.3958210945129395, "rewards/rejected": -5.222443103790283, "step": 3181 }, { "epoch": 0.7, "learning_rate": 6.6911764705882356e-06, "logits/chosen": -0.7844357490539551, "logits/rejected": -0.7712106704711914, "logps/chosen": -125.80827331542969, "logps/rejected": -116.21772003173828, "loss": 0.5487, "rewards/accuracies": 1.0, "rewards/chosen": -3.2092254161834717, "rewards/margins": 0.6970138549804688, "rewards/rejected": -3.9062392711639404, "step": 3182 }, { "epoch": 0.7, "learning_rate": 6.727941176470589e-06, "logits/chosen": -1.0260952711105347, "logits/rejected": -1.103816032409668, "logps/chosen": -152.80966186523438, "logps/rejected": -134.32839965820312, "loss": 0.3809, "rewards/accuracies": 0.0, "rewards/chosen": -2.3935883045196533, "rewards/margins": -0.13267207145690918, "rewards/rejected": -2.260916233062744, "step": 3183 }, { "epoch": 0.7, "learning_rate": 6.764705882352942e-06, "logits/chosen": -1.1303975582122803, "logits/rejected": -1.1137367486953735, "logps/chosen": -112.87113952636719, "logps/rejected": -95.43212890625, "loss": 0.1539, "rewards/accuracies": 1.0, "rewards/chosen": -4.210226535797119, "rewards/margins": 1.0207834243774414, "rewards/rejected": -5.2310099601745605, "step": 3184 }, { "epoch": 0.7, "learning_rate": 6.801470588235295e-06, "logits/chosen": -1.3400814533233643, "logits/rejected": -1.3020097017288208, "logps/chosen": -98.91616821289062, "logps/rejected": -140.50047302246094, "loss": 0.5711, "rewards/accuracies": 1.0, "rewards/chosen": -2.0641191005706787, "rewards/margins": 1.4468116760253906, "rewards/rejected": -3.5109307765960693, "step": 3185 }, { "epoch": 0.71, "learning_rate": 6.838235294117648e-06, "logits/chosen": -0.9914210438728333, "logits/rejected": -0.9989568591117859, "logps/chosen": -85.58819580078125, "logps/rejected": -105.40423583984375, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.9883590936660767, "rewards/margins": 4.503727436065674, "rewards/rejected": -5.492086410522461, "step": 3186 }, { "epoch": 0.71, "learning_rate": 6.875e-06, "logits/chosen": -1.2437297105789185, "logits/rejected": -1.0203810930252075, "logps/chosen": -152.12310791015625, "logps/rejected": -298.5862121582031, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": 1.4795135259628296, "rewards/margins": 3.539853096008301, "rewards/rejected": -2.0603394508361816, "step": 3187 }, { "epoch": 0.71, "learning_rate": 6.911764705882353e-06, "logits/chosen": -1.0010727643966675, "logits/rejected": -1.0010727643966675, "logps/chosen": -203.88209533691406, "logps/rejected": -203.88209533691406, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -2.1013214588165283, "rewards/margins": 0.0, "rewards/rejected": -2.1013214588165283, "step": 3188 }, { "epoch": 0.71, "learning_rate": 6.948529411764706e-06, "logits/chosen": -0.5806944370269775, "logits/rejected": -0.6294475793838501, "logps/chosen": -110.13240051269531, "logps/rejected": -60.117034912109375, "loss": 0.8076, "rewards/accuracies": 0.0, "rewards/chosen": -4.9936747550964355, "rewards/margins": -1.3898751735687256, "rewards/rejected": -3.60379958152771, "step": 3189 }, { "epoch": 0.71, "learning_rate": 6.985294117647059e-06, "logits/chosen": -1.0358186960220337, "logits/rejected": -1.0531415939331055, "logps/chosen": -163.83470153808594, "logps/rejected": -149.1795654296875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 3.695979356765747, "rewards/margins": 6.737727165222168, "rewards/rejected": -3.041748046875, "step": 3190 }, { "epoch": 0.71, "learning_rate": 7.022058823529412e-06, "logits/chosen": -0.8635543584823608, "logits/rejected": -0.8554019331932068, "logps/chosen": -129.78883361816406, "logps/rejected": -158.2142333984375, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.4745193421840668, "rewards/margins": 5.10011625289917, "rewards/rejected": -5.5746355056762695, "step": 3191 }, { "epoch": 0.71, "learning_rate": 7.058823529411766e-06, "logits/chosen": -1.1521624326705933, "logits/rejected": -1.080500602722168, "logps/chosen": -113.94369506835938, "logps/rejected": -170.21705627441406, "loss": 1.7265, "rewards/accuracies": 0.0, "rewards/chosen": -1.8339279890060425, "rewards/margins": -3.4046006202697754, "rewards/rejected": 1.570672631263733, "step": 3192 }, { "epoch": 0.71, "learning_rate": 7.095588235294119e-06, "logits/chosen": -0.8499069809913635, "logits/rejected": -0.856660783290863, "logps/chosen": -51.21489715576172, "logps/rejected": -43.28020477294922, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -0.33963510394096375, "rewards/margins": 2.9435901641845703, "rewards/rejected": -3.2832252979278564, "step": 3193 }, { "epoch": 0.71, "learning_rate": 7.132352941176472e-06, "logits/chosen": -0.8602073192596436, "logits/rejected": -0.9195562601089478, "logps/chosen": -201.9836883544922, "logps/rejected": -120.72024536132812, "loss": 0.2991, "rewards/accuracies": 1.0, "rewards/chosen": 0.05245513841509819, "rewards/margins": 0.20447540283203125, "rewards/rejected": -0.15202026069164276, "step": 3194 }, { "epoch": 0.71, "learning_rate": 7.169117647058825e-06, "logits/chosen": -1.155821681022644, "logits/rejected": -1.1649316549301147, "logps/chosen": -136.15560913085938, "logps/rejected": -112.19944763183594, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -0.20205841958522797, "rewards/margins": 3.405452013015747, "rewards/rejected": -3.6075103282928467, "step": 3195 }, { "epoch": 0.71, "learning_rate": 7.205882352941177e-06, "logits/chosen": -0.7763429284095764, "logits/rejected": -0.7527438402175903, "logps/chosen": -118.01214599609375, "logps/rejected": -140.39463806152344, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": -0.3560523986816406, "rewards/margins": 2.883263349533081, "rewards/rejected": -3.2393157482147217, "step": 3196 }, { "epoch": 0.71, "learning_rate": 7.24264705882353e-06, "logits/chosen": -0.8560213446617126, "logits/rejected": -0.8271073698997498, "logps/chosen": -79.20549011230469, "logps/rejected": -123.74164581298828, "loss": 0.1989, "rewards/accuracies": 1.0, "rewards/chosen": -1.6204960346221924, "rewards/margins": 1.2637577056884766, "rewards/rejected": -2.884253740310669, "step": 3197 }, { "epoch": 0.71, "learning_rate": 7.2794117647058826e-06, "logits/chosen": -0.7805367708206177, "logits/rejected": -0.5994903445243835, "logps/chosen": -200.04306030273438, "logps/rejected": -574.100830078125, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -0.08170013874769211, "rewards/margins": 40.517974853515625, "rewards/rejected": -40.599674224853516, "step": 3198 }, { "epoch": 0.71, "learning_rate": 7.3161764705882355e-06, "logits/chosen": -1.1587867736816406, "logits/rejected": -1.1639800071716309, "logps/chosen": -111.32073211669922, "logps/rejected": -89.55764770507812, "loss": 0.1862, "rewards/accuracies": 1.0, "rewards/chosen": -0.9817191958427429, "rewards/margins": 0.8719566464424133, "rewards/rejected": -1.8536758422851562, "step": 3199 }, { "epoch": 0.71, "learning_rate": 7.352941176470589e-06, "logits/chosen": -0.9823148250579834, "logits/rejected": -0.9334639310836792, "logps/chosen": -120.93028259277344, "logps/rejected": -175.33364868164062, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -0.43231508135795593, "rewards/margins": 3.4614944458007812, "rewards/rejected": -3.8938095569610596, "step": 3200 }, { "epoch": 0.71, "learning_rate": 7.389705882352942e-06, "logits/chosen": -0.6900633573532104, "logits/rejected": -0.649651288986206, "logps/chosen": -168.67877197265625, "logps/rejected": -253.42974853515625, "loss": 2.6149, "rewards/accuracies": 0.0, "rewards/chosen": -3.338818311691284, "rewards/margins": -4.916024684906006, "rewards/rejected": 1.5772064924240112, "step": 3201 }, { "epoch": 0.71, "learning_rate": 7.426470588235295e-06, "logits/chosen": -1.2617067098617554, "logits/rejected": -1.2050738334655762, "logps/chosen": -155.99696350097656, "logps/rejected": -211.427978515625, "loss": 0.4303, "rewards/accuracies": 0.0, "rewards/chosen": 0.4331772029399872, "rewards/margins": -0.3109115660190582, "rewards/rejected": 0.7440887689590454, "step": 3202 }, { "epoch": 0.71, "learning_rate": 7.463235294117648e-06, "logits/chosen": -1.076934814453125, "logits/rejected": -1.1072005033493042, "logps/chosen": -120.59092712402344, "logps/rejected": -163.2227783203125, "loss": 0.0837, "rewards/accuracies": 1.0, "rewards/chosen": -0.6370231509208679, "rewards/margins": 5.61720085144043, "rewards/rejected": -6.254223823547363, "step": 3203 }, { "epoch": 0.71, "learning_rate": 7.500000000000001e-06, "logits/chosen": -0.7858521938323975, "logits/rejected": -0.8440397381782532, "logps/chosen": -131.4798126220703, "logps/rejected": -78.11126708984375, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.2449859380722046, "rewards/margins": 4.595666885375977, "rewards/rejected": -5.840652942657471, "step": 3204 }, { "epoch": 0.71, "learning_rate": 7.536764705882353e-06, "logits/chosen": -0.8081466555595398, "logits/rejected": -0.8187845945358276, "logps/chosen": -167.79759216308594, "logps/rejected": -150.4625701904297, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": 0.168955996632576, "rewards/margins": 4.248566150665283, "rewards/rejected": -4.079610347747803, "step": 3205 }, { "epoch": 0.71, "learning_rate": 7.573529411764706e-06, "logits/chosen": -0.9872872829437256, "logits/rejected": -1.0464043617248535, "logps/chosen": -156.5782470703125, "logps/rejected": -54.835201263427734, "loss": 0.3957, "rewards/accuracies": 0.0, "rewards/chosen": -0.3932739198207855, "rewards/margins": -0.18784521520137787, "rewards/rejected": -0.20542870461940765, "step": 3206 }, { "epoch": 0.71, "learning_rate": 7.610294117647059e-06, "logits/chosen": -0.9806576371192932, "logits/rejected": -0.9507274031639099, "logps/chosen": -77.76395416259766, "logps/rejected": -146.1803436279297, "loss": 0.4509, "rewards/accuracies": 1.0, "rewards/chosen": -1.8196686506271362, "rewards/margins": 1.4607452154159546, "rewards/rejected": -3.280413866043091, "step": 3207 }, { "epoch": 0.71, "learning_rate": 7.647058823529411e-06, "logits/chosen": -0.9723131656646729, "logits/rejected": -0.933688759803772, "logps/chosen": -126.47066497802734, "logps/rejected": -89.5018310546875, "loss": 1.163, "rewards/accuracies": 1.0, "rewards/chosen": -4.426146984100342, "rewards/margins": 1.8961787223815918, "rewards/rejected": -6.322325706481934, "step": 3208 }, { "epoch": 0.71, "learning_rate": 7.683823529411766e-06, "logits/chosen": -1.1437063217163086, "logits/rejected": -1.139130711555481, "logps/chosen": -85.6978759765625, "logps/rejected": -88.16246032714844, "loss": 0.2987, "rewards/accuracies": 1.0, "rewards/chosen": -0.20680923759937286, "rewards/margins": 0.20267869532108307, "rewards/rejected": -0.40948793292045593, "step": 3209 }, { "epoch": 0.71, "learning_rate": 7.720588235294119e-06, "logits/chosen": -1.023311734199524, "logits/rejected": -1.044712781906128, "logps/chosen": -59.31377410888672, "logps/rejected": -76.97692108154297, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -0.4950599670410156, "rewards/margins": 4.30604887008667, "rewards/rejected": -4.8011088371276855, "step": 3210 }, { "epoch": 0.71, "learning_rate": 7.757352941176472e-06, "logits/chosen": -1.079755187034607, "logits/rejected": -1.089315414428711, "logps/chosen": -130.8916778564453, "logps/rejected": -156.56512451171875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": 1.6628525257110596, "rewards/margins": 4.256410598754883, "rewards/rejected": -2.593557834625244, "step": 3211 }, { "epoch": 0.71, "learning_rate": 7.794117647058825e-06, "logits/chosen": -1.0293152332305908, "logits/rejected": -1.061638355255127, "logps/chosen": -115.42156982421875, "logps/rejected": -77.9624252319336, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 0.43918153643608093, "rewards/margins": 5.205744743347168, "rewards/rejected": -4.766563415527344, "step": 3212 }, { "epoch": 0.71, "learning_rate": 7.830882352941177e-06, "logits/chosen": -0.8104581236839294, "logits/rejected": -0.7077635526657104, "logps/chosen": -291.7359619140625, "logps/rejected": -342.9066162109375, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 1.1489593982696533, "rewards/margins": 4.929117202758789, "rewards/rejected": -3.7801575660705566, "step": 3213 }, { "epoch": 0.71, "learning_rate": 7.86764705882353e-06, "logits/chosen": -0.9558854699134827, "logits/rejected": -1.1362473964691162, "logps/chosen": -240.05471801757812, "logps/rejected": -154.6111297607422, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 2.328726291656494, "rewards/margins": 9.56337833404541, "rewards/rejected": -7.234652042388916, "step": 3214 }, { "epoch": 0.71, "learning_rate": 7.904411764705883e-06, "logits/chosen": -1.120536208152771, "logits/rejected": -1.1281685829162598, "logps/chosen": -95.98651123046875, "logps/rejected": -81.78630065917969, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 1.2858482599258423, "rewards/margins": 4.982289791107178, "rewards/rejected": -3.696441411972046, "step": 3215 }, { "epoch": 0.71, "learning_rate": 7.941176470588236e-06, "logits/chosen": -0.9437126517295837, "logits/rejected": -0.9484481811523438, "logps/chosen": -79.28936767578125, "logps/rejected": -57.83672332763672, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": -0.8891143798828125, "rewards/margins": 2.557251453399658, "rewards/rejected": -3.4463658332824707, "step": 3216 }, { "epoch": 0.71, "learning_rate": 7.97794117647059e-06, "logits/chosen": -1.1468364000320435, "logits/rejected": -1.1643362045288086, "logps/chosen": -97.19822692871094, "logps/rejected": -127.5665283203125, "loss": 0.1146, "rewards/accuracies": 1.0, "rewards/chosen": 0.5488441586494446, "rewards/margins": 1.3560447692871094, "rewards/rejected": -0.8072006106376648, "step": 3217 }, { "epoch": 0.71, "learning_rate": 8.014705882352942e-06, "logits/chosen": -0.7626105546951294, "logits/rejected": -0.730094313621521, "logps/chosen": -87.16114807128906, "logps/rejected": -176.48574829101562, "loss": 0.2528, "rewards/accuracies": 1.0, "rewards/chosen": 0.8844154477119446, "rewards/margins": 0.4353492856025696, "rewards/rejected": 0.449066162109375, "step": 3218 }, { "epoch": 0.71, "learning_rate": 8.051470588235295e-06, "logits/chosen": -1.255967140197754, "logits/rejected": -1.255967140197754, "logps/chosen": -86.40631866455078, "logps/rejected": -86.40631866455078, "loss": 0.3481, "rewards/accuracies": 0.0, "rewards/chosen": -3.373288869857788, "rewards/margins": 0.0, "rewards/rejected": -3.373288869857788, "step": 3219 }, { "epoch": 0.71, "learning_rate": 8.088235294117648e-06, "logits/chosen": -1.506399393081665, "logits/rejected": -1.4623152017593384, "logps/chosen": -90.98312377929688, "logps/rejected": -147.4830322265625, "loss": 0.4575, "rewards/accuracies": 0.0, "rewards/chosen": 0.09196319431066513, "rewards/margins": -0.3999618589878082, "rewards/rejected": 0.49192506074905396, "step": 3220 }, { "epoch": 0.71, "learning_rate": 8.125000000000001e-06, "logits/chosen": -1.3559494018554688, "logits/rejected": -1.42649507522583, "logps/chosen": -78.614990234375, "logps/rejected": -85.32759857177734, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 1.4060547351837158, "rewards/margins": 6.254815101623535, "rewards/rejected": -4.84876012802124, "step": 3221 }, { "epoch": 0.71, "learning_rate": 8.161764705882354e-06, "logits/chosen": -0.7290557622909546, "logits/rejected": -0.7518976330757141, "logps/chosen": -220.57406616210938, "logps/rejected": -101.74964904785156, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": 1.905207872390747, "rewards/margins": 3.5417251586914062, "rewards/rejected": -1.6365174055099487, "step": 3222 }, { "epoch": 0.71, "learning_rate": 8.198529411764707e-06, "logits/chosen": -1.1441709995269775, "logits/rejected": -1.2536040544509888, "logps/chosen": -232.8908233642578, "logps/rejected": -148.11004638671875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 2.1154861450195312, "rewards/margins": 7.256957530975342, "rewards/rejected": -5.1414713859558105, "step": 3223 }, { "epoch": 0.71, "learning_rate": 8.23529411764706e-06, "logits/chosen": -0.9835023880004883, "logits/rejected": -1.0030465126037598, "logps/chosen": -181.37088012695312, "logps/rejected": -116.17826843261719, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": 1.5043152570724487, "rewards/margins": 3.965902805328369, "rewards/rejected": -2.46158766746521, "step": 3224 }, { "epoch": 0.71, "learning_rate": 8.272058823529413e-06, "logits/chosen": -0.8402515053749084, "logits/rejected": -0.8431165814399719, "logps/chosen": -76.86573791503906, "logps/rejected": -115.993896484375, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.46591949462890625, "rewards/margins": 4.469684600830078, "rewards/rejected": -4.935604095458984, "step": 3225 }, { "epoch": 0.71, "learning_rate": 8.308823529411766e-06, "logits/chosen": -1.074781060218811, "logits/rejected": -1.038081407546997, "logps/chosen": -63.39836883544922, "logps/rejected": -103.70674896240234, "loss": 0.1171, "rewards/accuracies": 1.0, "rewards/chosen": -1.541516900062561, "rewards/margins": 1.4227608442306519, "rewards/rejected": -2.964277744293213, "step": 3226 }, { "epoch": 0.71, "learning_rate": 8.345588235294119e-06, "logits/chosen": -1.1600501537322998, "logits/rejected": -1.2723395824432373, "logps/chosen": -192.55157470703125, "logps/rejected": -148.90652465820312, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 2.880812168121338, "rewards/margins": 9.045530319213867, "rewards/rejected": -6.164718151092529, "step": 3227 }, { "epoch": 0.71, "learning_rate": 8.382352941176472e-06, "logits/chosen": -0.8889119625091553, "logits/rejected": -0.9005829095840454, "logps/chosen": -90.18244934082031, "logps/rejected": -95.29376983642578, "loss": 0.2898, "rewards/accuracies": 1.0, "rewards/chosen": 0.06865539401769638, "rewards/margins": 5.141014575958252, "rewards/rejected": -5.072359085083008, "step": 3228 }, { "epoch": 0.71, "learning_rate": 8.419117647058824e-06, "logits/chosen": -1.0681055784225464, "logits/rejected": -1.0192668437957764, "logps/chosen": -114.45652770996094, "logps/rejected": -141.12857055664062, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 1.0010178089141846, "rewards/margins": 6.794817924499512, "rewards/rejected": -5.793800354003906, "step": 3229 }, { "epoch": 0.71, "learning_rate": 8.455882352941177e-06, "logits/chosen": -1.0301644802093506, "logits/rejected": -1.0301644802093506, "logps/chosen": -54.62095642089844, "logps/rejected": -54.62095642089844, "loss": 0.3686, "rewards/accuracies": 0.0, "rewards/chosen": -2.5341877937316895, "rewards/margins": 0.0, "rewards/rejected": -2.5341877937316895, "step": 3230 }, { "epoch": 0.72, "learning_rate": 8.49264705882353e-06, "logits/chosen": -1.3476370573043823, "logits/rejected": -1.3099087476730347, "logps/chosen": -53.1336555480957, "logps/rejected": -150.25070190429688, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": 1.1068073511123657, "rewards/margins": 3.796668529510498, "rewards/rejected": -2.689861297607422, "step": 3231 }, { "epoch": 0.72, "learning_rate": 8.529411764705883e-06, "logits/chosen": -0.8128260970115662, "logits/rejected": -0.7815830111503601, "logps/chosen": -110.59902954101562, "logps/rejected": -82.13227844238281, "loss": 0.1673, "rewards/accuracies": 1.0, "rewards/chosen": -1.7282730340957642, "rewards/margins": 0.9283119440078735, "rewards/rejected": -2.6565849781036377, "step": 3232 }, { "epoch": 0.72, "learning_rate": 8.566176470588236e-06, "logits/chosen": -0.9537913203239441, "logits/rejected": -0.9082650542259216, "logps/chosen": -194.5673370361328, "logps/rejected": -284.5758056640625, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": 1.4662399291992188, "rewards/margins": 10.48546028137207, "rewards/rejected": -9.019220352172852, "step": 3233 }, { "epoch": 0.72, "learning_rate": 8.60294117647059e-06, "logits/chosen": -1.1833884716033936, "logits/rejected": -1.203511357307434, "logps/chosen": -99.5593032836914, "logps/rejected": -144.823486328125, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": 1.3286796808242798, "rewards/margins": 3.8516321182250977, "rewards/rejected": -2.5229523181915283, "step": 3234 }, { "epoch": 0.72, "learning_rate": 8.639705882352942e-06, "logits/chosen": -1.2742644548416138, "logits/rejected": -1.279092788696289, "logps/chosen": -92.7694320678711, "logps/rejected": -188.44187927246094, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -0.0842994675040245, "rewards/margins": 6.8493499755859375, "rewards/rejected": -6.93364953994751, "step": 3235 }, { "epoch": 0.72, "learning_rate": 8.676470588235295e-06, "logits/chosen": -0.9562610983848572, "logits/rejected": -0.9537549614906311, "logps/chosen": -116.40408325195312, "logps/rejected": -193.06483459472656, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -1.37578284740448, "rewards/margins": 3.364532470703125, "rewards/rejected": -4.7403154373168945, "step": 3236 }, { "epoch": 0.72, "learning_rate": 8.713235294117648e-06, "logits/chosen": -1.0449676513671875, "logits/rejected": -0.9901478886604309, "logps/chosen": -139.4259033203125, "logps/rejected": -222.36471557617188, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.0961997509002686, "rewards/margins": 5.745224952697754, "rewards/rejected": -7.841424465179443, "step": 3237 }, { "epoch": 0.72, "learning_rate": 8.750000000000001e-06, "logits/chosen": -0.9557946920394897, "logits/rejected": -0.9697016477584839, "logps/chosen": -89.2376937866211, "logps/rejected": -128.0849609375, "loss": 1.1599, "rewards/accuracies": 0.0, "rewards/chosen": -1.1560897827148438, "rewards/margins": -2.2159624099731445, "rewards/rejected": 1.0598725080490112, "step": 3238 }, { "epoch": 0.72, "learning_rate": 8.786764705882354e-06, "logits/chosen": -1.19344162940979, "logits/rejected": -1.2159218788146973, "logps/chosen": -197.1187286376953, "logps/rejected": -149.05419921875, "loss": 0.2331, "rewards/accuracies": 1.0, "rewards/chosen": -0.0030639648903161287, "rewards/margins": 0.5208419561386108, "rewards/rejected": -0.5239059329032898, "step": 3239 }, { "epoch": 0.72, "learning_rate": 8.823529411764707e-06, "logits/chosen": -1.1552811861038208, "logits/rejected": -1.0887209177017212, "logps/chosen": -80.87642669677734, "logps/rejected": -139.87791442871094, "loss": 0.7037, "rewards/accuracies": 0.0, "rewards/chosen": -0.7609245181083679, "rewards/margins": -1.1265846490859985, "rewards/rejected": 0.3656601011753082, "step": 3240 }, { "epoch": 0.72, "learning_rate": 8.86029411764706e-06, "logits/chosen": -1.0921019315719604, "logits/rejected": -1.0921019315719604, "logps/chosen": -55.45104217529297, "logps/rejected": -55.45104217529297, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -3.345212936401367, "rewards/margins": 0.0, "rewards/rejected": -3.345212936401367, "step": 3241 }, { "epoch": 0.72, "learning_rate": 8.897058823529413e-06, "logits/chosen": -1.279053807258606, "logits/rejected": -1.27490234375, "logps/chosen": -101.67059326171875, "logps/rejected": -89.205078125, "loss": 0.2616, "rewards/accuracies": 1.0, "rewards/chosen": -0.8791915774345398, "rewards/margins": 0.3852035403251648, "rewards/rejected": -1.2643951177597046, "step": 3242 }, { "epoch": 0.72, "learning_rate": 8.933823529411766e-06, "logits/chosen": -0.9164687395095825, "logits/rejected": -0.9448972940444946, "logps/chosen": -205.9676513671875, "logps/rejected": -146.74996948242188, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": 3.2548844814300537, "rewards/margins": 7.43023681640625, "rewards/rejected": -4.175352573394775, "step": 3243 }, { "epoch": 0.72, "learning_rate": 8.970588235294119e-06, "logits/chosen": -0.8444098234176636, "logits/rejected": -0.8530433177947998, "logps/chosen": -189.51966857910156, "logps/rejected": -118.01811218261719, "loss": 0.3509, "rewards/accuracies": 1.0, "rewards/chosen": -0.5558685660362244, "rewards/margins": 0.26385343074798584, "rewards/rejected": -0.8197219967842102, "step": 3244 }, { "epoch": 0.72, "learning_rate": 9.007352941176471e-06, "logits/chosen": -0.9768723845481873, "logits/rejected": -0.9404060244560242, "logps/chosen": -97.69972229003906, "logps/rejected": -75.36924743652344, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": -0.6935272216796875, "rewards/margins": 3.2717907428741455, "rewards/rejected": -3.965317964553833, "step": 3245 }, { "epoch": 0.72, "learning_rate": 9.044117647058824e-06, "logits/chosen": -1.1306015253067017, "logits/rejected": -1.0604125261306763, "logps/chosen": -84.28056335449219, "logps/rejected": -166.41253662109375, "loss": 0.6227, "rewards/accuracies": 0.0, "rewards/chosen": -0.267660528421402, "rewards/margins": -0.5365082025527954, "rewards/rejected": 0.26884767413139343, "step": 3246 }, { "epoch": 0.72, "learning_rate": 9.080882352941177e-06, "logits/chosen": -0.9486105442047119, "logits/rejected": -0.6912997961044312, "logps/chosen": -114.49531555175781, "logps/rejected": -562.8319091796875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.724086046218872, "rewards/margins": 31.800987243652344, "rewards/rejected": -33.52507400512695, "step": 3247 }, { "epoch": 0.72, "learning_rate": 9.11764705882353e-06, "logits/chosen": -1.1893656253814697, "logits/rejected": -1.167738437652588, "logps/chosen": -118.83952331542969, "logps/rejected": -134.79177856445312, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.21165314316749573, "rewards/margins": 4.5161452293396, "rewards/rejected": -4.7277984619140625, "step": 3248 }, { "epoch": 0.72, "learning_rate": 9.154411764705883e-06, "logits/chosen": -0.6232114434242249, "logits/rejected": -0.6232114434242249, "logps/chosen": -88.60877990722656, "logps/rejected": -88.60877990722656, "loss": 0.3479, "rewards/accuracies": 0.0, "rewards/chosen": -2.7875421047210693, "rewards/margins": 0.0, "rewards/rejected": -2.7875421047210693, "step": 3249 }, { "epoch": 0.72, "learning_rate": 9.191176470588236e-06, "logits/chosen": -1.0794458389282227, "logits/rejected": -1.0149160623550415, "logps/chosen": -241.32748413085938, "logps/rejected": -244.24105834960938, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -0.13751983642578125, "rewards/margins": 3.6972503662109375, "rewards/rejected": -3.8347702026367188, "step": 3250 }, { "epoch": 0.72, "learning_rate": 9.227941176470589e-06, "logits/chosen": -0.997649610042572, "logits/rejected": -0.997649610042572, "logps/chosen": -178.84400939941406, "logps/rejected": -178.84400939941406, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -2.582139730453491, "rewards/margins": 0.0, "rewards/rejected": -2.582139730453491, "step": 3251 }, { "epoch": 0.72, "learning_rate": 9.264705882352942e-06, "logits/chosen": -0.8780214786529541, "logits/rejected": -0.597908616065979, "logps/chosen": -101.95222473144531, "logps/rejected": -442.2344970703125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.6024932861328125, "rewards/margins": 14.422082901000977, "rewards/rejected": -16.02457618713379, "step": 3252 }, { "epoch": 0.72, "learning_rate": 9.301470588235295e-06, "logits/chosen": -0.9558367133140564, "logits/rejected": -0.9643052220344543, "logps/chosen": -145.58120727539062, "logps/rejected": -188.13330078125, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": 1.7482131719589233, "rewards/margins": 8.274425506591797, "rewards/rejected": -6.526212215423584, "step": 3253 }, { "epoch": 0.72, "learning_rate": 9.338235294117648e-06, "logits/chosen": -0.835367739200592, "logits/rejected": -0.8352265357971191, "logps/chosen": -134.30210876464844, "logps/rejected": -125.76017761230469, "loss": 0.0618, "rewards/accuracies": 1.0, "rewards/chosen": 0.7378082275390625, "rewards/margins": 2.0440926551818848, "rewards/rejected": -1.3062843084335327, "step": 3254 }, { "epoch": 0.72, "learning_rate": 9.375000000000001e-06, "logits/chosen": -1.0541815757751465, "logits/rejected": -1.0399572849273682, "logps/chosen": -77.480224609375, "logps/rejected": -103.25983428955078, "loss": 1.5779, "rewards/accuracies": 0.0, "rewards/chosen": -4.212046146392822, "rewards/margins": -1.5280532836914062, "rewards/rejected": -2.683992862701416, "step": 3255 }, { "epoch": 0.72, "learning_rate": 9.411764705882354e-06, "logits/chosen": -1.0598536729812622, "logits/rejected": -1.0170248746871948, "logps/chosen": -152.12063598632812, "logps/rejected": -138.64788818359375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 0.3391265869140625, "rewards/margins": 5.519041538238525, "rewards/rejected": -5.179914951324463, "step": 3256 }, { "epoch": 0.72, "learning_rate": 9.448529411764707e-06, "logits/chosen": -1.2574620246887207, "logits/rejected": -1.2639062404632568, "logps/chosen": -114.17396545410156, "logps/rejected": -136.58753967285156, "loss": 0.0592, "rewards/accuracies": 1.0, "rewards/chosen": -0.5336441397666931, "rewards/margins": 4.0441765785217285, "rewards/rejected": -4.577820777893066, "step": 3257 }, { "epoch": 0.72, "learning_rate": 9.48529411764706e-06, "logits/chosen": -0.8800972700119019, "logits/rejected": -0.3523519039154053, "logps/chosen": -82.85092163085938, "logps/rejected": -447.883056640625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 1.1099532842636108, "rewards/margins": 30.168109893798828, "rewards/rejected": -29.058156967163086, "step": 3258 }, { "epoch": 0.72, "learning_rate": 9.522058823529413e-06, "logits/chosen": -1.3417474031448364, "logits/rejected": -1.3129308223724365, "logps/chosen": -76.10892486572266, "logps/rejected": -93.47808837890625, "loss": 0.1644, "rewards/accuracies": 1.0, "rewards/chosen": -0.5454460382461548, "rewards/margins": 0.9596420526504517, "rewards/rejected": -1.5050880908966064, "step": 3259 }, { "epoch": 0.72, "learning_rate": 9.558823529411766e-06, "logits/chosen": -1.142871379852295, "logits/rejected": -1.098982334136963, "logps/chosen": -91.32676696777344, "logps/rejected": -175.1363067626953, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": 0.10521850734949112, "rewards/margins": 4.185215950012207, "rewards/rejected": -4.079997539520264, "step": 3260 }, { "epoch": 0.72, "learning_rate": 9.595588235294119e-06, "logits/chosen": -1.2322282791137695, "logits/rejected": -1.2328338623046875, "logps/chosen": -185.90936279296875, "logps/rejected": -173.75216674804688, "loss": 0.0479, "rewards/accuracies": 1.0, "rewards/chosen": 1.8288787603378296, "rewards/margins": 2.297808885574341, "rewards/rejected": -0.46893006563186646, "step": 3261 }, { "epoch": 0.72, "learning_rate": 9.632352941176471e-06, "logits/chosen": -1.459044337272644, "logits/rejected": -1.5829800367355347, "logps/chosen": -195.0863800048828, "logps/rejected": -149.4731903076172, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": 0.7076004147529602, "rewards/margins": 3.1907150745391846, "rewards/rejected": -2.483114719390869, "step": 3262 }, { "epoch": 0.72, "learning_rate": 9.669117647058824e-06, "logits/chosen": -0.950213611125946, "logits/rejected": -0.950213611125946, "logps/chosen": -177.9145050048828, "logps/rejected": -177.9145050048828, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -5.764595031738281, "rewards/margins": 0.0, "rewards/rejected": -5.764595031738281, "step": 3263 }, { "epoch": 0.72, "learning_rate": 9.705882352941177e-06, "logits/chosen": -1.3053218126296997, "logits/rejected": -1.2847931385040283, "logps/chosen": -101.07976531982422, "logps/rejected": -138.392822265625, "loss": 0.0515, "rewards/accuracies": 1.0, "rewards/chosen": -0.11708831787109375, "rewards/margins": 2.5794014930725098, "rewards/rejected": -2.6964898109436035, "step": 3264 }, { "epoch": 0.72, "learning_rate": 9.74264705882353e-06, "logits/chosen": -1.116713047027588, "logits/rejected": -0.9128119349479675, "logps/chosen": -115.22904968261719, "logps/rejected": -408.4559020996094, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": -1.11785888671875, "rewards/margins": 2.735495090484619, "rewards/rejected": -3.853353977203369, "step": 3265 }, { "epoch": 0.72, "learning_rate": 9.779411764705883e-06, "logits/chosen": -1.0881396532058716, "logits/rejected": -1.2175372838974, "logps/chosen": -237.0161895751953, "logps/rejected": -161.2101593017578, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": -2.362065076828003, "rewards/margins": 2.0943949222564697, "rewards/rejected": -4.456459999084473, "step": 3266 }, { "epoch": 0.72, "learning_rate": 9.816176470588236e-06, "logits/chosen": -0.7673647403717041, "logits/rejected": -0.7747194766998291, "logps/chosen": -72.86131286621094, "logps/rejected": -73.19384002685547, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.6235405206680298, "rewards/margins": 5.524492263793945, "rewards/rejected": -4.900951862335205, "step": 3267 }, { "epoch": 0.72, "learning_rate": 9.852941176470589e-06, "logits/chosen": -1.1760143041610718, "logits/rejected": -1.0799179077148438, "logps/chosen": -123.5509033203125, "logps/rejected": -236.93850708007812, "loss": 0.6864, "rewards/accuracies": 0.0, "rewards/chosen": -1.080830454826355, "rewards/margins": -1.0804368257522583, "rewards/rejected": -0.0003936767752747983, "step": 3268 }, { "epoch": 0.72, "learning_rate": 9.889705882352942e-06, "logits/chosen": -0.7734348177909851, "logits/rejected": -0.7467751502990723, "logps/chosen": -94.67610168457031, "logps/rejected": -177.3660888671875, "loss": 0.9387, "rewards/accuracies": 0.0, "rewards/chosen": -0.7114364504814148, "rewards/margins": -1.3688247203826904, "rewards/rejected": 0.6573883295059204, "step": 3269 }, { "epoch": 0.72, "learning_rate": 9.926470588235295e-06, "logits/chosen": -1.107956051826477, "logits/rejected": -1.200735330581665, "logps/chosen": -193.74264526367188, "logps/rejected": -164.52720642089844, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": 0.5675415396690369, "rewards/margins": 3.2400498390197754, "rewards/rejected": -2.6725082397460938, "step": 3270 }, { "epoch": 0.72, "learning_rate": 9.963235294117648e-06, "logits/chosen": -1.0006612539291382, "logits/rejected": -1.0006612539291382, "logps/chosen": -110.46931457519531, "logps/rejected": -110.46931457519531, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.8868157863616943, "rewards/margins": 0.0, "rewards/rejected": -2.8868157863616943, "step": 3271 }, { "epoch": 0.72, "learning_rate": 1e-05, "logits/chosen": -1.0166107416152954, "logits/rejected": -1.0606801509857178, "logps/chosen": -62.09888458251953, "logps/rejected": -78.62660217285156, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -0.10467376559972763, "rewards/margins": 4.3675384521484375, "rewards/rejected": -4.472212314605713, "step": 3272 }, { "epoch": 0.72, "learning_rate": 9.99999967875601e-06, "logits/chosen": -1.1207289695739746, "logits/rejected": -1.1207289695739746, "logps/chosen": -111.947509765625, "logps/rejected": -111.947509765625, "loss": 0.3537, "rewards/accuracies": 0.0, "rewards/chosen": -1.2655220031738281, "rewards/margins": 0.0, "rewards/rejected": -1.2655220031738281, "step": 3273 }, { "epoch": 0.72, "learning_rate": 9.999998715024082e-06, "logits/chosen": -0.857438325881958, "logits/rejected": -0.9012169241905212, "logps/chosen": -207.869384765625, "logps/rejected": -180.52621459960938, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": 1.612115502357483, "rewards/margins": 9.36568832397461, "rewards/rejected": -7.753572940826416, "step": 3274 }, { "epoch": 0.72, "learning_rate": 9.999997108804337e-06, "logits/chosen": -1.2330561876296997, "logits/rejected": -1.3136985301971436, "logps/chosen": -201.37985229492188, "logps/rejected": -86.80648040771484, "loss": 0.1402, "rewards/accuracies": 1.0, "rewards/chosen": 1.8326996564865112, "rewards/margins": 1.1610162258148193, "rewards/rejected": 0.6716834902763367, "step": 3275 }, { "epoch": 0.73, "learning_rate": 9.999994860096985e-06, "logits/chosen": -1.2428081035614014, "logits/rejected": -1.3127681016921997, "logps/chosen": -164.12840270996094, "logps/rejected": -93.24832916259766, "loss": 1.6611, "rewards/accuracies": 0.0, "rewards/chosen": -7.877020359039307, "rewards/margins": -3.2648725509643555, "rewards/rejected": -4.612147808074951, "step": 3276 }, { "epoch": 0.73, "learning_rate": 9.99999196890231e-06, "logits/chosen": -1.2482736110687256, "logits/rejected": -1.120902180671692, "logps/chosen": -220.73634338378906, "logps/rejected": -325.2648010253906, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": 0.5563675165176392, "rewards/margins": 4.4013352394104, "rewards/rejected": -3.8449676036834717, "step": 3277 }, { "epoch": 0.73, "learning_rate": 9.999988435220688e-06, "logits/chosen": -0.8244557976722717, "logits/rejected": -0.8421499729156494, "logps/chosen": -92.15554809570312, "logps/rejected": -90.93495178222656, "loss": 0.3975, "rewards/accuracies": 0.0, "rewards/chosen": -0.34832459688186646, "rewards/margins": -0.13913117349147797, "rewards/rejected": -0.2091934233903885, "step": 3278 }, { "epoch": 0.73, "learning_rate": 9.999984259052573e-06, "logits/chosen": -1.3267227411270142, "logits/rejected": -1.3339730501174927, "logps/chosen": -90.86177062988281, "logps/rejected": -68.90164947509766, "loss": 0.4256, "rewards/accuracies": 1.0, "rewards/chosen": -0.2660019099712372, "rewards/margins": 0.2359950840473175, "rewards/rejected": -0.5019969940185547, "step": 3279 }, { "epoch": 0.73, "learning_rate": 9.9999794403985e-06, "logits/chosen": -1.0375800132751465, "logits/rejected": -1.045941948890686, "logps/chosen": -95.67984771728516, "logps/rejected": -81.95144653320312, "loss": 0.3499, "rewards/accuracies": 1.0, "rewards/chosen": 0.3939781188964844, "rewards/margins": 4.997323989868164, "rewards/rejected": -4.60334587097168, "step": 3280 }, { "epoch": 0.73, "learning_rate": 9.999973979259088e-06, "logits/chosen": -1.3003212213516235, "logits/rejected": -1.1284819841384888, "logps/chosen": -67.8901138305664, "logps/rejected": -272.7126159667969, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": 1.0567680597305298, "rewards/margins": 3.545712947845459, "rewards/rejected": -2.4889450073242188, "step": 3281 }, { "epoch": 0.73, "learning_rate": 9.99996787563504e-06, "logits/chosen": -0.9126958250999451, "logits/rejected": -0.8631613254547119, "logps/chosen": -228.7220458984375, "logps/rejected": -386.05322265625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.325666904449463, "rewards/margins": 11.222976684570312, "rewards/rejected": -13.548644065856934, "step": 3282 }, { "epoch": 0.73, "learning_rate": 9.999961129527139e-06, "logits/chosen": -0.9863011837005615, "logits/rejected": -0.9863011837005615, "logps/chosen": -92.12322998046875, "logps/rejected": -92.12322998046875, "loss": 0.3472, "rewards/accuracies": 0.0, "rewards/chosen": -3.658522367477417, "rewards/margins": 0.0, "rewards/rejected": -3.658522367477417, "step": 3283 }, { "epoch": 0.73, "learning_rate": 9.999953740936252e-06, "logits/chosen": -1.2392994165420532, "logits/rejected": -1.265992283821106, "logps/chosen": -117.41746520996094, "logps/rejected": -120.62956237792969, "loss": 0.1109, "rewards/accuracies": 1.0, "rewards/chosen": 1.3126343488693237, "rewards/margins": 2.217010498046875, "rewards/rejected": -0.904376208782196, "step": 3284 }, { "epoch": 0.73, "learning_rate": 9.99994570986333e-06, "logits/chosen": -0.9938772916793823, "logits/rejected": -0.9936754703521729, "logps/chosen": -115.58479309082031, "logps/rejected": -163.92672729492188, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1002289056777954, "rewards/margins": 6.433419227600098, "rewards/rejected": -7.5336480140686035, "step": 3285 }, { "epoch": 0.73, "learning_rate": 9.999937036309402e-06, "logits/chosen": -1.3425111770629883, "logits/rejected": -1.3425111770629883, "logps/chosen": -125.48887634277344, "logps/rejected": -125.48887634277344, "loss": 0.469, "rewards/accuracies": 0.0, "rewards/chosen": -2.5459091663360596, "rewards/margins": 0.0, "rewards/rejected": -2.5459091663360596, "step": 3286 }, { "epoch": 0.73, "learning_rate": 9.999927720275586e-06, "logits/chosen": -0.9085298776626587, "logits/rejected": -0.8851044774055481, "logps/chosen": -102.25505065917969, "logps/rejected": -95.25601959228516, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/chosen": 0.05627289041876793, "rewards/margins": 3.0340378284454346, "rewards/rejected": -2.977764844894409, "step": 3287 }, { "epoch": 0.73, "learning_rate": 9.999917761763076e-06, "logits/chosen": -0.8757814764976501, "logits/rejected": -0.8890542387962341, "logps/chosen": -172.30470275878906, "logps/rejected": -120.1996078491211, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": 0.04956207424402237, "rewards/margins": 3.7233870029449463, "rewards/rejected": -3.6738250255584717, "step": 3288 }, { "epoch": 0.73, "learning_rate": 9.999907160773155e-06, "logits/chosen": -1.0141594409942627, "logits/rejected": -1.0627847909927368, "logps/chosen": -230.47357177734375, "logps/rejected": -114.23355102539062, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.04377441480755806, "rewards/margins": 5.768295764923096, "rewards/rejected": -5.724521160125732, "step": 3289 }, { "epoch": 0.73, "learning_rate": 9.99989591730718e-06, "logits/chosen": -1.1959906816482544, "logits/rejected": -1.173755168914795, "logps/chosen": -85.45851135253906, "logps/rejected": -129.77357482910156, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -0.11864776909351349, "rewards/margins": 2.692614793777466, "rewards/rejected": -2.811262607574463, "step": 3290 }, { "epoch": 0.73, "learning_rate": 9.999884031366603e-06, "logits/chosen": -1.3146578073501587, "logits/rejected": -1.1773394346237183, "logps/chosen": -106.73301696777344, "logps/rejected": -358.7355041503906, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.7915375232696533, "rewards/margins": 5.649938583374023, "rewards/rejected": -7.441476345062256, "step": 3291 }, { "epoch": 0.73, "learning_rate": 9.999871502952944e-06, "logits/chosen": -0.8708336353302002, "logits/rejected": -0.8674443364143372, "logps/chosen": -89.89165496826172, "logps/rejected": -176.7454071044922, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.3183692991733551, "rewards/margins": 8.02221393585205, "rewards/rejected": -7.7038445472717285, "step": 3292 }, { "epoch": 0.73, "learning_rate": 9.99985833206782e-06, "logits/chosen": -1.2107391357421875, "logits/rejected": -1.1693617105484009, "logps/chosen": -97.05404663085938, "logps/rejected": -157.94839477539062, "loss": 1.4886, "rewards/accuracies": 0.0, "rewards/chosen": -2.631734609603882, "rewards/margins": -2.842362403869629, "rewards/rejected": 0.2106277495622635, "step": 3293 }, { "epoch": 0.73, "learning_rate": 9.999844518712917e-06, "logits/chosen": -1.1801649332046509, "logits/rejected": -1.1549350023269653, "logps/chosen": -116.59477233886719, "logps/rejected": -230.9766387939453, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": -1.6055755615234375, "rewards/margins": 7.3300580978393555, "rewards/rejected": -8.935633659362793, "step": 3294 }, { "epoch": 0.73, "learning_rate": 9.999830062890012e-06, "logits/chosen": -1.109264850616455, "logits/rejected": -0.9857929348945618, "logps/chosen": -68.48927307128906, "logps/rejected": -244.4254150390625, "loss": 0.1641, "rewards/accuracies": 1.0, "rewards/chosen": -1.2442924976348877, "rewards/margins": 0.9483070373535156, "rewards/rejected": -2.1925995349884033, "step": 3295 }, { "epoch": 0.73, "learning_rate": 9.999814964600965e-06, "logits/chosen": -0.7395650148391724, "logits/rejected": -0.7748211622238159, "logps/chosen": -106.42765808105469, "logps/rejected": -160.79859924316406, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -0.875256359577179, "rewards/margins": 3.600267171859741, "rewards/rejected": -4.475523471832275, "step": 3296 }, { "epoch": 0.73, "learning_rate": 9.999799223847714e-06, "logits/chosen": -1.1054606437683105, "logits/rejected": -1.0494308471679688, "logps/chosen": -73.17296600341797, "logps/rejected": -212.17898559570312, "loss": 0.1236, "rewards/accuracies": 1.0, "rewards/chosen": 0.44815292954444885, "rewards/margins": 3.4866693019866943, "rewards/rejected": -3.0385162830352783, "step": 3297 }, { "epoch": 0.73, "learning_rate": 9.999782840632281e-06, "logits/chosen": -1.3893696069717407, "logits/rejected": -1.2955100536346436, "logps/chosen": -120.15432739257812, "logps/rejected": -214.9083251953125, "loss": 0.2583, "rewards/accuracies": 1.0, "rewards/chosen": -1.3128654956817627, "rewards/margins": 0.4162818193435669, "rewards/rejected": -1.7291473150253296, "step": 3298 }, { "epoch": 0.73, "learning_rate": 9.999765814956771e-06, "logits/chosen": -0.9427560567855835, "logits/rejected": -1.0161097049713135, "logps/chosen": -105.33391571044922, "logps/rejected": -102.00468444824219, "loss": 0.6144, "rewards/accuracies": 0.0, "rewards/chosen": -2.187519073486328, "rewards/margins": -0.8824416399002075, "rewards/rejected": -1.3050774335861206, "step": 3299 }, { "epoch": 0.73, "learning_rate": 9.999748146823376e-06, "logits/chosen": -0.8447311520576477, "logits/rejected": -0.45136913657188416, "logps/chosen": -195.844970703125, "logps/rejected": -737.17041015625, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": 0.40457460284233093, "rewards/margins": 60.46672821044922, "rewards/rejected": -60.06215286254883, "step": 3300 }, { "epoch": 0.73, "learning_rate": 9.999729836234363e-06, "logits/chosen": -1.0574288368225098, "logits/rejected": -1.0538066625595093, "logps/chosen": -66.11013793945312, "logps/rejected": -59.757484436035156, "loss": 0.436, "rewards/accuracies": 1.0, "rewards/chosen": -0.4720645844936371, "rewards/margins": 3.342987298965454, "rewards/rejected": -3.815051794052124, "step": 3301 }, { "epoch": 0.73, "learning_rate": 9.999710883192082e-06, "logits/chosen": -0.8666673898696899, "logits/rejected": -0.6216907501220703, "logps/chosen": -282.10040283203125, "logps/rejected": -412.05487060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.0688995122909546, "rewards/margins": 27.40591812133789, "rewards/rejected": -26.337018966674805, "step": 3302 }, { "epoch": 0.73, "learning_rate": 9.999691287698975e-06, "logits/chosen": -1.1416971683502197, "logits/rejected": -1.1356648206710815, "logps/chosen": -76.10893249511719, "logps/rejected": -62.531070709228516, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": 0.4794677793979645, "rewards/margins": 3.9036171436309814, "rewards/rejected": -3.42414927482605, "step": 3303 }, { "epoch": 0.73, "learning_rate": 9.999671049757554e-06, "logits/chosen": -1.198053240776062, "logits/rejected": -1.1810088157653809, "logps/chosen": -78.60623168945312, "logps/rejected": -114.79515838623047, "loss": 2.3719, "rewards/accuracies": 0.0, "rewards/chosen": 0.05021972581744194, "rewards/margins": -0.3385879695415497, "rewards/rejected": 0.3888076841831207, "step": 3304 }, { "epoch": 0.73, "learning_rate": 9.999650169370423e-06, "logits/chosen": -1.1290199756622314, "logits/rejected": -1.0911015272140503, "logps/chosen": -165.94300842285156, "logps/rejected": -156.67526245117188, "loss": 0.1558, "rewards/accuracies": 1.0, "rewards/chosen": -3.9613778591156006, "rewards/margins": 1.9670965671539307, "rewards/rejected": -5.928474426269531, "step": 3305 }, { "epoch": 0.73, "learning_rate": 9.999628646540262e-06, "logits/chosen": -0.9815987348556519, "logits/rejected": -0.990924060344696, "logps/chosen": -126.72303771972656, "logps/rejected": -134.58663940429688, "loss": 0.4629, "rewards/accuracies": 1.0, "rewards/chosen": 0.18655243515968323, "rewards/margins": 0.6666259765625, "rewards/rejected": -0.4800735414028168, "step": 3306 }, { "epoch": 0.73, "learning_rate": 9.999606481269841e-06, "logits/chosen": -0.9022391438484192, "logits/rejected": -0.9026561379432678, "logps/chosen": -114.23110961914062, "logps/rejected": -139.982421875, "loss": 0.0822, "rewards/accuracies": 1.0, "rewards/chosen": -0.1627853363752365, "rewards/margins": 1.7287918329238892, "rewards/rejected": -1.891577124595642, "step": 3307 }, { "epoch": 0.73, "learning_rate": 9.999583673562006e-06, "logits/chosen": -1.0602854490280151, "logits/rejected": -1.0370912551879883, "logps/chosen": -164.49200439453125, "logps/rejected": -126.16149139404297, "loss": 1.1558, "rewards/accuracies": 0.0, "rewards/chosen": -4.41830587387085, "rewards/margins": -2.1823835372924805, "rewards/rejected": -2.235922336578369, "step": 3308 }, { "epoch": 0.73, "learning_rate": 9.999560223419687e-06, "logits/chosen": -1.1731970310211182, "logits/rejected": -1.1070778369903564, "logps/chosen": -94.98456573486328, "logps/rejected": -179.27407836914062, "loss": 0.2135, "rewards/accuracies": 1.0, "rewards/chosen": 0.4366867244243622, "rewards/margins": 0.6338958740234375, "rewards/rejected": -0.1972091645002365, "step": 3309 }, { "epoch": 0.73, "learning_rate": 9.999536130845897e-06, "logits/chosen": -1.1690891981124878, "logits/rejected": -1.2465823888778687, "logps/chosen": -188.47805786132812, "logps/rejected": -138.98484802246094, "loss": 0.1819, "rewards/accuracies": 1.0, "rewards/chosen": -0.767498791217804, "rewards/margins": 1.2436959743499756, "rewards/rejected": -2.0111947059631348, "step": 3310 }, { "epoch": 0.73, "learning_rate": 9.999511395843734e-06, "logits/chosen": -1.1034190654754639, "logits/rejected": -1.1021478176116943, "logps/chosen": -92.02233123779297, "logps/rejected": -77.92144012451172, "loss": 0.1867, "rewards/accuracies": 1.0, "rewards/chosen": -0.3661750853061676, "rewards/margins": 0.798309326171875, "rewards/rejected": -1.1644843816757202, "step": 3311 }, { "epoch": 0.73, "learning_rate": 9.999486018416375e-06, "logits/chosen": -1.156550645828247, "logits/rejected": -1.0907562971115112, "logps/chosen": -106.0414047241211, "logps/rejected": -233.97698974609375, "loss": 0.1009, "rewards/accuracies": 1.0, "rewards/chosen": 0.3403434753417969, "rewards/margins": 11.650506019592285, "rewards/rejected": -11.310162544250488, "step": 3312 }, { "epoch": 0.73, "learning_rate": 9.99945999856708e-06, "logits/chosen": -0.7669003009796143, "logits/rejected": -0.7582886219024658, "logps/chosen": -190.46835327148438, "logps/rejected": -255.17080688476562, "loss": 0.0915, "rewards/accuracies": 1.0, "rewards/chosen": 0.6078628897666931, "rewards/margins": 1.8212831020355225, "rewards/rejected": -1.2134201526641846, "step": 3313 }, { "epoch": 0.73, "learning_rate": 9.999433336299195e-06, "logits/chosen": -0.7761415243148804, "logits/rejected": -0.7669745087623596, "logps/chosen": -184.59042358398438, "logps/rejected": -229.52142333984375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": 0.6759994626045227, "rewards/margins": 13.776640892028809, "rewards/rejected": -13.100641250610352, "step": 3314 }, { "epoch": 0.73, "learning_rate": 9.999406031616143e-06, "logits/chosen": -1.018054485321045, "logits/rejected": -0.9825263619422913, "logps/chosen": -83.00630187988281, "logps/rejected": -167.17552185058594, "loss": 0.1051, "rewards/accuracies": 1.0, "rewards/chosen": -0.5822723507881165, "rewards/margins": 6.106531620025635, "rewards/rejected": -6.6888041496276855, "step": 3315 }, { "epoch": 0.73, "learning_rate": 9.999378084521436e-06, "logits/chosen": -1.2018765211105347, "logits/rejected": -0.6334260106086731, "logps/chosen": -67.17962646484375, "logps/rejected": -191.1129150390625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.4507000148296356, "rewards/margins": 5.64343786239624, "rewards/rejected": -6.094137668609619, "step": 3316 }, { "epoch": 0.73, "learning_rate": 9.999349495018662e-06, "logits/chosen": -0.8854032158851624, "logits/rejected": -0.860252320766449, "logps/chosen": -190.00132751464844, "logps/rejected": -227.19448852539062, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 1.101312279701233, "rewards/margins": 7.882967948913574, "rewards/rejected": -6.781655788421631, "step": 3317 }, { "epoch": 0.73, "learning_rate": 9.999320263111495e-06, "logits/chosen": -0.7653305530548096, "logits/rejected": -0.6623350977897644, "logps/chosen": -199.4412841796875, "logps/rejected": -371.5107421875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": 2.8565614223480225, "rewards/margins": 13.70792007446289, "rewards/rejected": -10.851358413696289, "step": 3318 }, { "epoch": 0.73, "learning_rate": 9.999290388803695e-06, "logits/chosen": -1.211276888847351, "logits/rejected": -1.0176054239273071, "logps/chosen": -106.15805053710938, "logps/rejected": -301.5372619628906, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.6450942754745483, "rewards/margins": 5.191153049468994, "rewards/rejected": -6.836247444152832, "step": 3319 }, { "epoch": 0.73, "learning_rate": 9.999259872099095e-06, "logits/chosen": -0.9342184662818909, "logits/rejected": -0.6120155453681946, "logps/chosen": -216.75152587890625, "logps/rejected": -625.5784301757812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.0154953002929688, "rewards/margins": 48.54034423828125, "rewards/rejected": -47.52484893798828, "step": 3320 }, { "epoch": 0.74, "learning_rate": 9.999228713001622e-06, "logits/chosen": -0.9676976203918457, "logits/rejected": -0.9400103688240051, "logps/chosen": -159.17555236816406, "logps/rejected": -163.58612060546875, "loss": 0.617, "rewards/accuracies": 0.0, "rewards/chosen": -5.152554512023926, "rewards/margins": -0.8740401268005371, "rewards/rejected": -4.278514385223389, "step": 3321 }, { "epoch": 0.74, "learning_rate": 9.999196911515277e-06, "logits/chosen": -1.2052547931671143, "logits/rejected": -1.181099772453308, "logps/chosen": -159.42367553710938, "logps/rejected": -190.3516082763672, "loss": 0.0707, "rewards/accuracies": 1.0, "rewards/chosen": -3.3836586475372314, "rewards/margins": 1.8855416774749756, "rewards/rejected": -5.269200325012207, "step": 3322 }, { "epoch": 0.74, "learning_rate": 9.999164467644146e-06, "logits/chosen": -1.3146603107452393, "logits/rejected": -1.2786600589752197, "logps/chosen": -162.85321044921875, "logps/rejected": -245.1654815673828, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.0822372436523438, "rewards/margins": 7.751315116882324, "rewards/rejected": -8.833552360534668, "step": 3323 }, { "epoch": 0.74, "learning_rate": 9.999131381392397e-06, "logits/chosen": -1.0225354433059692, "logits/rejected": -1.0384801626205444, "logps/chosen": -82.62132263183594, "logps/rejected": -118.2508544921875, "loss": 0.0724, "rewards/accuracies": 1.0, "rewards/chosen": 0.259237676858902, "rewards/margins": 1.8615996837615967, "rewards/rejected": -1.602362036705017, "step": 3324 }, { "epoch": 0.74, "learning_rate": 9.999097652764285e-06, "logits/chosen": -0.7312779426574707, "logits/rejected": -0.6464194655418396, "logps/chosen": -19.233909606933594, "logps/rejected": -160.40501403808594, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": -0.8596866726875305, "rewards/margins": 2.210996627807617, "rewards/rejected": -3.070683240890503, "step": 3325 }, { "epoch": 0.74, "learning_rate": 9.999063281764142e-06, "logits/chosen": -0.8144685626029968, "logits/rejected": -0.8297385573387146, "logps/chosen": -88.4368896484375, "logps/rejected": -80.04073333740234, "loss": 0.2038, "rewards/accuracies": 1.0, "rewards/chosen": -1.113366723060608, "rewards/margins": 0.7411727905273438, "rewards/rejected": -1.8545395135879517, "step": 3326 }, { "epoch": 0.74, "learning_rate": 9.999028268396384e-06, "logits/chosen": -1.0089980363845825, "logits/rejected": -1.036470890045166, "logps/chosen": -151.71697998046875, "logps/rejected": -217.60635375976562, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.827606201171875, "rewards/margins": 7.027249336242676, "rewards/rejected": -9.85485553741455, "step": 3327 }, { "epoch": 0.74, "learning_rate": 9.99899261266551e-06, "logits/chosen": -1.138088583946228, "logits/rejected": -1.1414616107940674, "logps/chosen": -79.65353393554688, "logps/rejected": -150.70179748535156, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": 0.5664840936660767, "rewards/margins": 3.6882944107055664, "rewards/rejected": -3.1218101978302, "step": 3328 }, { "epoch": 0.74, "learning_rate": 9.998956314576105e-06, "logits/chosen": -0.650927722454071, "logits/rejected": -0.6433272361755371, "logps/chosen": -169.37660217285156, "logps/rejected": -161.74813842773438, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": -1.8706649541854858, "rewards/margins": 2.922341823577881, "rewards/rejected": -4.793006896972656, "step": 3329 }, { "epoch": 0.74, "learning_rate": 9.998919374132829e-06, "logits/chosen": -0.9450615048408508, "logits/rejected": -0.9036527276039124, "logps/chosen": -250.6139678955078, "logps/rejected": -393.46923828125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.89849853515625, "rewards/margins": 11.913299560546875, "rewards/rejected": -13.811798095703125, "step": 3330 }, { "epoch": 0.74, "learning_rate": 9.99888179134043e-06, "logits/chosen": -1.10245943069458, "logits/rejected": -1.202890157699585, "logps/chosen": -192.29931640625, "logps/rejected": -185.54013061523438, "loss": 0.1051, "rewards/accuracies": 1.0, "rewards/chosen": 0.3708663880825043, "rewards/margins": 1.453077793121338, "rewards/rejected": -1.0822113752365112, "step": 3331 }, { "epoch": 0.74, "learning_rate": 9.99884356620374e-06, "logits/chosen": -0.9746571779251099, "logits/rejected": -0.9746571779251099, "logps/chosen": -272.1899719238281, "logps/rejected": -272.1899719238281, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -9.327147483825684, "rewards/margins": 0.0, "rewards/rejected": -9.327147483825684, "step": 3332 }, { "epoch": 0.74, "learning_rate": 9.998804698727667e-06, "logits/chosen": -1.293526530265808, "logits/rejected": -1.330710530281067, "logps/chosen": -165.08767700195312, "logps/rejected": -166.2435302734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 1.0677337646484375, "rewards/margins": 7.461864471435547, "rewards/rejected": -6.394130706787109, "step": 3333 }, { "epoch": 0.74, "learning_rate": 9.998765188917206e-06, "logits/chosen": -0.9743612408638, "logits/rejected": -0.5824282765388489, "logps/chosen": -269.4375915527344, "logps/rejected": -399.00054931640625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.278735399246216, "rewards/margins": 25.675344467163086, "rewards/rejected": -28.95408058166504, "step": 3334 }, { "epoch": 0.74, "learning_rate": 9.998725036777437e-06, "logits/chosen": -0.955136775970459, "logits/rejected": -0.9507162570953369, "logps/chosen": -71.21212005615234, "logps/rejected": -151.51058959960938, "loss": 0.0883, "rewards/accuracies": 1.0, "rewards/chosen": -0.1584480255842209, "rewards/margins": 4.759264945983887, "rewards/rejected": -4.917713165283203, "step": 3335 }, { "epoch": 0.74, "learning_rate": 9.998684242313516e-06, "logits/chosen": -1.0945364236831665, "logits/rejected": -1.0443954467773438, "logps/chosen": -144.5101318359375, "logps/rejected": -178.73367309570312, "loss": 0.0567, "rewards/accuracies": 1.0, "rewards/chosen": -0.6527679562568665, "rewards/margins": 2.1204652786254883, "rewards/rejected": -2.77323317527771, "step": 3336 }, { "epoch": 0.74, "learning_rate": 9.998642805530687e-06, "logits/chosen": -0.7727908492088318, "logits/rejected": -0.7727908492088318, "logps/chosen": -145.22259521484375, "logps/rejected": -145.22259521484375, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -6.752723693847656, "rewards/margins": 0.0, "rewards/rejected": -6.752723693847656, "step": 3337 }, { "epoch": 0.74, "learning_rate": 9.998600726434274e-06, "logits/chosen": -1.013974905014038, "logits/rejected": -1.0240362882614136, "logps/chosen": -119.90886688232422, "logps/rejected": -72.81373596191406, "loss": 0.3294, "rewards/accuracies": 1.0, "rewards/chosen": -3.379713535308838, "rewards/margins": 1.4962363243103027, "rewards/rejected": -4.875949859619141, "step": 3338 }, { "epoch": 0.74, "learning_rate": 9.998558005029685e-06, "logits/chosen": -0.9878198504447937, "logits/rejected": -0.6303431987762451, "logps/chosen": -193.946044921875, "logps/rejected": -460.399658203125, "loss": 0.1029, "rewards/accuracies": 1.0, "rewards/chosen": 1.4804290533065796, "rewards/margins": 16.0449275970459, "rewards/rejected": -14.564498901367188, "step": 3339 }, { "epoch": 0.74, "learning_rate": 9.998514641322406e-06, "logits/chosen": -0.865471363067627, "logits/rejected": -0.9181341528892517, "logps/chosen": -137.3905029296875, "logps/rejected": -139.24691772460938, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.137725830078125, "rewards/margins": 7.917504787445068, "rewards/rejected": -7.779778957366943, "step": 3340 }, { "epoch": 0.74, "learning_rate": 9.998470635318015e-06, "logits/chosen": -1.0114593505859375, "logits/rejected": -0.9990518689155579, "logps/chosen": -88.78781127929688, "logps/rejected": -65.44806671142578, "loss": 0.1607, "rewards/accuracies": 1.0, "rewards/chosen": -2.2461745738983154, "rewards/margins": 1.181037187576294, "rewards/rejected": -3.4272117614746094, "step": 3341 }, { "epoch": 0.74, "learning_rate": 9.99842598702216e-06, "logits/chosen": -0.7961300611495972, "logits/rejected": -0.7961300611495972, "logps/chosen": -54.92808532714844, "logps/rejected": -54.92808532714844, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -3.128239870071411, "rewards/margins": 0.0, "rewards/rejected": -3.128239870071411, "step": 3342 }, { "epoch": 0.74, "learning_rate": 9.998380696440582e-06, "logits/chosen": -0.5311002731323242, "logits/rejected": -0.48469939827919006, "logps/chosen": -86.8892822265625, "logps/rejected": -144.0021209716797, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.21227721869945526, "rewards/margins": 6.971587181091309, "rewards/rejected": -7.183864593505859, "step": 3343 }, { "epoch": 0.74, "learning_rate": 9.998334763579103e-06, "logits/chosen": -1.1145480871200562, "logits/rejected": -1.110533595085144, "logps/chosen": -97.88013458251953, "logps/rejected": -97.0534896850586, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.2162071317434311, "rewards/margins": 7.072685241699219, "rewards/rejected": -6.856478214263916, "step": 3344 }, { "epoch": 0.74, "learning_rate": 9.998288188443619e-06, "logits/chosen": -0.719842255115509, "logits/rejected": -0.6991544365882874, "logps/chosen": -103.23292541503906, "logps/rejected": -168.823486328125, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": 0.10160217434167862, "rewards/margins": 4.761650085449219, "rewards/rejected": -4.660048007965088, "step": 3345 }, { "epoch": 0.74, "learning_rate": 9.99824097104012e-06, "logits/chosen": -1.0617350339889526, "logits/rejected": -1.0635185241699219, "logps/chosen": -168.96458435058594, "logps/rejected": -157.0494384765625, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": -0.6188369989395142, "rewards/margins": 4.317266941070557, "rewards/rejected": -4.936103820800781, "step": 3346 }, { "epoch": 0.74, "learning_rate": 9.998193111374673e-06, "logits/chosen": -0.6949244141578674, "logits/rejected": -0.6949244141578674, "logps/chosen": -77.74768829345703, "logps/rejected": -77.74768829345703, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -3.231767416000366, "rewards/margins": 0.0, "rewards/rejected": -3.231767416000366, "step": 3347 }, { "epoch": 0.74, "learning_rate": 9.998144609453425e-06, "logits/chosen": -0.7278530597686768, "logits/rejected": -0.6971985101699829, "logps/chosen": -71.66000366210938, "logps/rejected": -262.22906494140625, "loss": 3.8521, "rewards/accuracies": 1.0, "rewards/chosen": -0.8772503137588501, "rewards/margins": 11.233850479125977, "rewards/rejected": -12.111101150512695, "step": 3348 }, { "epoch": 0.74, "learning_rate": 9.99809546528261e-06, "logits/chosen": -1.0542008876800537, "logits/rejected": -1.0152649879455566, "logps/chosen": -117.78219604492188, "logps/rejected": -106.42231750488281, "loss": 0.1258, "rewards/accuracies": 1.0, "rewards/chosen": 0.39015504717826843, "rewards/margins": 1.2513641119003296, "rewards/rejected": -0.8612090945243835, "step": 3349 }, { "epoch": 0.74, "learning_rate": 9.998045678868541e-06, "logits/chosen": -1.1814091205596924, "logits/rejected": -1.1814091205596924, "logps/chosen": -113.39427185058594, "logps/rejected": -113.39427185058594, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -1.127528429031372, "rewards/margins": 0.0, "rewards/rejected": -1.127528429031372, "step": 3350 }, { "epoch": 0.74, "learning_rate": 9.99799525021762e-06, "logits/chosen": -1.0861549377441406, "logits/rejected": -1.1247323751449585, "logps/chosen": -169.07376098632812, "logps/rejected": -129.4215545654297, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.0455001592636108, "rewards/margins": 4.2720232009887695, "rewards/rejected": -5.31752347946167, "step": 3351 }, { "epoch": 0.74, "learning_rate": 9.997944179336323e-06, "logits/chosen": -0.6825397610664368, "logits/rejected": -0.7627034783363342, "logps/chosen": -151.94015502929688, "logps/rejected": -76.24617004394531, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": 1.1883834600448608, "rewards/margins": 3.0931732654571533, "rewards/rejected": -1.9047898054122925, "step": 3352 }, { "epoch": 0.74, "learning_rate": 9.997892466231215e-06, "logits/chosen": -0.867279052734375, "logits/rejected": -0.91436368227005, "logps/chosen": -168.18055725097656, "logps/rejected": -217.00010681152344, "loss": 0.8366, "rewards/accuracies": 0.0, "rewards/chosen": -3.3860108852386475, "rewards/margins": -1.4650864601135254, "rewards/rejected": -1.920924425125122, "step": 3353 }, { "epoch": 0.74, "learning_rate": 9.997840110908938e-06, "logits/chosen": -0.9664395451545715, "logits/rejected": -0.9326712489128113, "logps/chosen": -105.21340942382812, "logps/rejected": -181.02130126953125, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": 0.4855003356933594, "rewards/margins": 2.9575188159942627, "rewards/rejected": -2.4720184803009033, "step": 3354 }, { "epoch": 0.74, "learning_rate": 9.997787113376223e-06, "logits/chosen": -1.0313059091567993, "logits/rejected": -1.091940999031067, "logps/chosen": -83.66256713867188, "logps/rejected": -101.11701965332031, "loss": 0.1961, "rewards/accuracies": 1.0, "rewards/chosen": -0.6060409545898438, "rewards/margins": 0.739875078201294, "rewards/rejected": -1.3459160327911377, "step": 3355 }, { "epoch": 0.74, "learning_rate": 9.997733473639876e-06, "logits/chosen": -0.8018215298652649, "logits/rejected": -0.7921248078346252, "logps/chosen": -109.28163146972656, "logps/rejected": -52.583717346191406, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 0.3156677186489105, "rewards/margins": 3.7689554691314697, "rewards/rejected": -3.4532878398895264, "step": 3356 }, { "epoch": 0.74, "learning_rate": 9.997679191706794e-06, "logits/chosen": -0.556972861289978, "logits/rejected": -0.556972861289978, "logps/chosen": -98.03155517578125, "logps/rejected": -98.03155517578125, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -1.760308861732483, "rewards/margins": 0.0, "rewards/rejected": -1.760308861732483, "step": 3357 }, { "epoch": 0.74, "learning_rate": 9.99762426758395e-06, "logits/chosen": -0.8382530808448792, "logits/rejected": -0.8494743704795837, "logps/chosen": -115.07159423828125, "logps/rejected": -166.96112060546875, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": 0.733258843421936, "rewards/margins": 5.526636600494385, "rewards/rejected": -4.793377876281738, "step": 3358 }, { "epoch": 0.74, "learning_rate": 9.997568701278399e-06, "logits/chosen": -0.9741261601448059, "logits/rejected": -0.919978678226471, "logps/chosen": -155.43984985351562, "logps/rejected": -168.41519165039062, "loss": 0.1234, "rewards/accuracies": 1.0, "rewards/chosen": 1.1694366931915283, "rewards/margins": 2.981292724609375, "rewards/rejected": -1.8118561506271362, "step": 3359 }, { "epoch": 0.74, "learning_rate": 9.997512492797285e-06, "logits/chosen": -0.8387710452079773, "logits/rejected": -0.8387710452079773, "logps/chosen": -128.26173400878906, "logps/rejected": -128.26173400878906, "loss": 0.3481, "rewards/accuracies": 0.0, "rewards/chosen": -4.405588626861572, "rewards/margins": 0.0, "rewards/rejected": -4.405588626861572, "step": 3360 }, { "epoch": 0.74, "learning_rate": 9.997455642147831e-06, "logits/chosen": -0.8533007502555847, "logits/rejected": -0.8533007502555847, "logps/chosen": -97.57383728027344, "logps/rejected": -97.57383728027344, "loss": 0.3541, "rewards/accuracies": 0.0, "rewards/chosen": -1.1351250410079956, "rewards/margins": 0.0, "rewards/rejected": -1.1351250410079956, "step": 3361 }, { "epoch": 0.74, "learning_rate": 9.997398149337338e-06, "logits/chosen": -1.1203924417495728, "logits/rejected": -1.0973811149597168, "logps/chosen": -39.35993957519531, "logps/rejected": -77.7249526977539, "loss": 0.5537, "rewards/accuracies": 1.0, "rewards/chosen": -1.0016489028930664, "rewards/margins": 0.2948172092437744, "rewards/rejected": -1.2964661121368408, "step": 3362 }, { "epoch": 0.74, "learning_rate": 9.997340014373198e-06, "logits/chosen": -1.0378111600875854, "logits/rejected": -1.111496925354004, "logps/chosen": -81.41000366210938, "logps/rejected": -67.16127014160156, "loss": 0.0729, "rewards/accuracies": 1.0, "rewards/chosen": 0.06424713134765625, "rewards/margins": 1.8536125421524048, "rewards/rejected": -1.7893654108047485, "step": 3363 }, { "epoch": 0.74, "learning_rate": 9.99728123726288e-06, "logits/chosen": -0.5034317970275879, "logits/rejected": -0.4629489481449127, "logps/chosen": -126.892578125, "logps/rejected": -123.08113098144531, "loss": 0.5491, "rewards/accuracies": 0.0, "rewards/chosen": -1.4537811279296875, "rewards/margins": -0.14921104907989502, "rewards/rejected": -1.3045700788497925, "step": 3364 }, { "epoch": 0.74, "learning_rate": 9.997221818013933e-06, "logits/chosen": -0.8185995221138, "logits/rejected": -1.0406521558761597, "logps/chosen": -263.64263916015625, "logps/rejected": -67.58218383789062, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": -1.5584198236465454, "rewards/margins": 2.47029972076416, "rewards/rejected": -4.028719425201416, "step": 3365 }, { "epoch": 0.75, "learning_rate": 9.997161756633998e-06, "logits/chosen": -0.8656384944915771, "logits/rejected": -0.7782727479934692, "logps/chosen": -83.97002410888672, "logps/rejected": -240.68157958984375, "loss": 0.245, "rewards/accuracies": 1.0, "rewards/chosen": -1.3310844898223877, "rewards/margins": 0.4583534002304077, "rewards/rejected": -1.7894378900527954, "step": 3366 }, { "epoch": 0.75, "learning_rate": 9.99710105313079e-06, "logits/chosen": -0.7542625665664673, "logits/rejected": -0.7542625665664673, "logps/chosen": -101.01153564453125, "logps/rejected": -101.01153564453125, "loss": 0.3473, "rewards/accuracies": 0.0, "rewards/chosen": -1.2086776494979858, "rewards/margins": 0.0, "rewards/rejected": -1.2086776494979858, "step": 3367 }, { "epoch": 0.75, "learning_rate": 9.997039707512109e-06, "logits/chosen": -0.8347312808036804, "logits/rejected": -0.7827824354171753, "logps/chosen": -120.06559753417969, "logps/rejected": -150.8450164794922, "loss": 0.5698, "rewards/accuracies": 1.0, "rewards/chosen": -0.731275200843811, "rewards/margins": 2.713486671447754, "rewards/rejected": -3.4447617530822754, "step": 3368 }, { "epoch": 0.75, "learning_rate": 9.996977719785837e-06, "logits/chosen": -0.8219765424728394, "logits/rejected": -0.8463776111602783, "logps/chosen": -182.87379455566406, "logps/rejected": -91.49818420410156, "loss": 0.1115, "rewards/accuracies": 1.0, "rewards/chosen": 0.25925445556640625, "rewards/margins": 1.4111229181289673, "rewards/rejected": -1.151868462562561, "step": 3369 }, { "epoch": 0.75, "learning_rate": 9.996915089959942e-06, "logits/chosen": -1.4440679550170898, "logits/rejected": -0.7988704442977905, "logps/chosen": -149.60379028320312, "logps/rejected": -474.970458984375, "loss": 0.0934, "rewards/accuracies": 1.0, "rewards/chosen": -2.537898302078247, "rewards/margins": 35.40974044799805, "rewards/rejected": -37.94763946533203, "step": 3370 }, { "epoch": 0.75, "learning_rate": 9.99685181804247e-06, "logits/chosen": -1.0017249584197998, "logits/rejected": -1.0762938261032104, "logps/chosen": -107.32075500488281, "logps/rejected": -101.1287612915039, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": 0.4644760191440582, "rewards/margins": 5.447844982147217, "rewards/rejected": -4.983368873596191, "step": 3371 }, { "epoch": 0.75, "learning_rate": 9.996787904041551e-06, "logits/chosen": -1.1991190910339355, "logits/rejected": -1.1754703521728516, "logps/chosen": -87.4425048828125, "logps/rejected": -181.82460021972656, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": 0.27974778413772583, "rewards/margins": 4.0303263664245605, "rewards/rejected": -3.7505784034729004, "step": 3372 }, { "epoch": 0.75, "learning_rate": 9.996723347965399e-06, "logits/chosen": -0.6422926783561707, "logits/rejected": -0.5839121341705322, "logps/chosen": -77.94732666015625, "logps/rejected": -117.98876953125, "loss": 0.052, "rewards/accuracies": 1.0, "rewards/chosen": -0.08487243950366974, "rewards/margins": 2.210540771484375, "rewards/rejected": -2.2954132556915283, "step": 3373 }, { "epoch": 0.75, "learning_rate": 9.996658149822307e-06, "logits/chosen": -1.0483248233795166, "logits/rejected": -1.0483248233795166, "logps/chosen": -181.70590209960938, "logps/rejected": -181.70590209960938, "loss": 0.3605, "rewards/accuracies": 0.0, "rewards/chosen": -0.46264344453811646, "rewards/margins": 0.0, "rewards/rejected": -0.46264344453811646, "step": 3374 }, { "epoch": 0.75, "learning_rate": 9.996592309620656e-06, "logits/chosen": -0.7080057263374329, "logits/rejected": -0.6758175492286682, "logps/chosen": -81.13924407958984, "logps/rejected": -86.77157592773438, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": 1.0361489057540894, "rewards/margins": 3.806443214416504, "rewards/rejected": -2.770294189453125, "step": 3375 }, { "epoch": 0.75, "learning_rate": 9.996525827368903e-06, "logits/chosen": -0.7341600060462952, "logits/rejected": -0.7688921093940735, "logps/chosen": -90.70442199707031, "logps/rejected": -118.46517181396484, "loss": 0.6821, "rewards/accuracies": 0.0, "rewards/chosen": -0.446258544921875, "rewards/margins": -1.0514427423477173, "rewards/rejected": 0.6051841974258423, "step": 3376 }, { "epoch": 0.75, "learning_rate": 9.996458703075593e-06, "logits/chosen": -1.258794903755188, "logits/rejected": -1.3138726949691772, "logps/chosen": -93.25126647949219, "logps/rejected": -75.63330078125, "loss": 0.0801, "rewards/accuracies": 1.0, "rewards/chosen": 0.4538734555244446, "rewards/margins": 1.8233871459960938, "rewards/rejected": -1.369513750076294, "step": 3377 }, { "epoch": 0.75, "learning_rate": 9.996390936749351e-06, "logits/chosen": -0.6286852955818176, "logits/rejected": -0.6414957642555237, "logps/chosen": -180.3160400390625, "logps/rejected": -123.8392333984375, "loss": 0.561, "rewards/accuracies": 0.0, "rewards/chosen": -3.316453695297241, "rewards/margins": -0.6948013305664062, "rewards/rejected": -2.621652364730835, "step": 3378 }, { "epoch": 0.75, "learning_rate": 9.996322528398886e-06, "logits/chosen": -1.0354896783828735, "logits/rejected": -1.0511115789413452, "logps/chosen": -85.25898742675781, "logps/rejected": -56.020320892333984, "loss": 0.0981, "rewards/accuracies": 1.0, "rewards/chosen": -1.886012315750122, "rewards/margins": 1.71630859375, "rewards/rejected": -3.602320909500122, "step": 3379 }, { "epoch": 0.75, "learning_rate": 9.996253478032987e-06, "logits/chosen": -0.6131276488304138, "logits/rejected": -0.5783950686454773, "logps/chosen": -180.2923126220703, "logps/rejected": -244.34799194335938, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 1.6971435546875, "rewards/margins": 7.022300720214844, "rewards/rejected": -5.325157165527344, "step": 3380 }, { "epoch": 0.75, "learning_rate": 9.996183785660526e-06, "logits/chosen": -0.7675120830535889, "logits/rejected": -0.7820764780044556, "logps/chosen": -95.12384033203125, "logps/rejected": -119.25827026367188, "loss": 0.2078, "rewards/accuracies": 1.0, "rewards/chosen": 0.3881118893623352, "rewards/margins": 0.7124183773994446, "rewards/rejected": -0.3243064880371094, "step": 3381 }, { "epoch": 0.75, "learning_rate": 9.996113451290457e-06, "logits/chosen": -0.6953226923942566, "logits/rejected": -0.6228655576705933, "logps/chosen": -155.07179260253906, "logps/rejected": -340.3533935546875, "loss": 0.1518, "rewards/accuracies": 1.0, "rewards/chosen": 1.14171302318573, "rewards/margins": 1.036518931388855, "rewards/rejected": 0.105194091796875, "step": 3382 }, { "epoch": 0.75, "learning_rate": 9.996042474931821e-06, "logits/chosen": -1.0200109481811523, "logits/rejected": -0.9223167300224304, "logps/chosen": -167.1959228515625, "logps/rejected": -394.15447998046875, "loss": 0.324, "rewards/accuracies": 1.0, "rewards/chosen": 1.003692626953125, "rewards/margins": 4.436022758483887, "rewards/rejected": -3.432330369949341, "step": 3383 }, { "epoch": 0.75, "learning_rate": 9.995970856593739e-06, "logits/chosen": -1.3069089651107788, "logits/rejected": -1.303473711013794, "logps/chosen": -85.16696166992188, "logps/rejected": -84.38179779052734, "loss": 0.5154, "rewards/accuracies": 0.0, "rewards/chosen": -0.6152435541152954, "rewards/margins": -0.5455436706542969, "rewards/rejected": -0.06969986110925674, "step": 3384 }, { "epoch": 0.75, "learning_rate": 9.99589859628541e-06, "logits/chosen": -0.6441360712051392, "logits/rejected": -0.6441360712051392, "logps/chosen": -137.21661376953125, "logps/rejected": -137.21661376953125, "loss": 0.351, "rewards/accuracies": 0.0, "rewards/chosen": -0.8331252932548523, "rewards/margins": 0.0, "rewards/rejected": -0.8331252932548523, "step": 3385 }, { "epoch": 0.75, "learning_rate": 9.995825694016122e-06, "logits/chosen": -0.8760210871696472, "logits/rejected": -0.8870463967323303, "logps/chosen": -112.29267883300781, "logps/rejected": -140.42294311523438, "loss": 0.385, "rewards/accuracies": 0.0, "rewards/chosen": -2.5560920238494873, "rewards/margins": -0.14708995819091797, "rewards/rejected": -2.4090020656585693, "step": 3386 }, { "epoch": 0.75, "learning_rate": 9.995752149795241e-06, "logits/chosen": -1.0807100534439087, "logits/rejected": -1.1252381801605225, "logps/chosen": -208.260498046875, "logps/rejected": -221.0228271484375, "loss": 0.0734, "rewards/accuracies": 1.0, "rewards/chosen": 1.0612777471542358, "rewards/margins": 6.711920261383057, "rewards/rejected": -5.650642395019531, "step": 3387 }, { "epoch": 0.75, "learning_rate": 9.99567796363222e-06, "logits/chosen": -0.627483069896698, "logits/rejected": -0.6805875897407532, "logps/chosen": -251.36199951171875, "logps/rejected": -176.43502807617188, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": 3.167132616043091, "rewards/margins": 7.698931694030762, "rewards/rejected": -4.53179931640625, "step": 3388 }, { "epoch": 0.75, "learning_rate": 9.995603135536587e-06, "logits/chosen": -1.038629412651062, "logits/rejected": -1.0298079252243042, "logps/chosen": -76.50947570800781, "logps/rejected": -87.29791259765625, "loss": 0.154, "rewards/accuracies": 1.0, "rewards/chosen": 0.6069137454032898, "rewards/margins": 1.0989364385604858, "rewards/rejected": -0.49202272295951843, "step": 3389 }, { "epoch": 0.75, "learning_rate": 9.995527665517964e-06, "logits/chosen": -0.9840377569198608, "logits/rejected": -0.8867903351783752, "logps/chosen": -80.36079406738281, "logps/rejected": -206.91558837890625, "loss": 0.3593, "rewards/accuracies": 1.0, "rewards/chosen": -0.6172882318496704, "rewards/margins": 1.4727996587753296, "rewards/rejected": -2.090087890625, "step": 3390 }, { "epoch": 0.75, "learning_rate": 9.995451553586042e-06, "logits/chosen": -1.3622969388961792, "logits/rejected": -1.282914638519287, "logps/chosen": -111.37139892578125, "logps/rejected": -143.73486328125, "loss": 0.2451, "rewards/accuracies": 1.0, "rewards/chosen": -0.7066673636436462, "rewards/margins": 1.4111831188201904, "rewards/rejected": -2.1178505420684814, "step": 3391 }, { "epoch": 0.75, "learning_rate": 9.995374799750606e-06, "logits/chosen": -0.46508121490478516, "logits/rejected": -0.4736538529396057, "logps/chosen": -90.18174743652344, "logps/rejected": -99.83893585205078, "loss": 0.0919, "rewards/accuracies": 1.0, "rewards/chosen": -0.02450714074075222, "rewards/margins": 1.6006660461425781, "rewards/rejected": -1.6251732110977173, "step": 3392 }, { "epoch": 0.75, "learning_rate": 9.995297404021515e-06, "logits/chosen": -1.2276611328125, "logits/rejected": -1.2282133102416992, "logps/chosen": -100.7833251953125, "logps/rejected": -117.37720489501953, "loss": 0.187, "rewards/accuracies": 1.0, "rewards/chosen": -0.24115295708179474, "rewards/margins": 1.262529730796814, "rewards/rejected": -1.5036827325820923, "step": 3393 }, { "epoch": 0.75, "learning_rate": 9.995219366408717e-06, "logits/chosen": -1.0815372467041016, "logits/rejected": -1.0911372900009155, "logps/chosen": -181.86314392089844, "logps/rejected": -201.54638671875, "loss": 0.7188, "rewards/accuracies": 1.0, "rewards/chosen": -0.4137435853481293, "rewards/margins": 4.2489013671875, "rewards/rejected": -4.662644863128662, "step": 3394 }, { "epoch": 0.75, "learning_rate": 9.995140686922237e-06, "logits/chosen": -0.9531077146530151, "logits/rejected": -0.974259614944458, "logps/chosen": -110.75519561767578, "logps/rejected": -98.57804107666016, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 1.3434380292892456, "rewards/margins": 5.783135414123535, "rewards/rejected": -4.439697265625, "step": 3395 }, { "epoch": 0.75, "learning_rate": 9.995061365572188e-06, "logits/chosen": -0.5653917789459229, "logits/rejected": -0.5653917789459229, "logps/chosen": -81.4761962890625, "logps/rejected": -81.4761962890625, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -1.6241470575332642, "rewards/margins": 0.0, "rewards/rejected": -1.6241470575332642, "step": 3396 }, { "epoch": 0.75, "learning_rate": 9.994981402368763e-06, "logits/chosen": -0.8553529381752014, "logits/rejected": -0.8216818571090698, "logps/chosen": -98.44514465332031, "logps/rejected": -124.07335662841797, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": 1.0153076648712158, "rewards/margins": 4.164155960083008, "rewards/rejected": -3.148848056793213, "step": 3397 }, { "epoch": 0.75, "learning_rate": 9.994900797322233e-06, "logits/chosen": -0.8588627576828003, "logits/rejected": -0.7831588387489319, "logps/chosen": -97.8122787475586, "logps/rejected": -243.15093994140625, "loss": 0.0536, "rewards/accuracies": 1.0, "rewards/chosen": -1.5403435230255127, "rewards/margins": 2.332369327545166, "rewards/rejected": -3.8727128505706787, "step": 3398 }, { "epoch": 0.75, "learning_rate": 9.994819550442958e-06, "logits/chosen": -0.7291976809501648, "logits/rejected": -0.6944681406021118, "logps/chosen": -60.228302001953125, "logps/rejected": -68.0715560913086, "loss": 0.6128, "rewards/accuracies": 0.0, "rewards/chosen": -3.2636048793792725, "rewards/margins": -0.8781552314758301, "rewards/rejected": -2.3854496479034424, "step": 3399 }, { "epoch": 0.75, "learning_rate": 9.994737661741379e-06, "logits/chosen": -1.3198081254959106, "logits/rejected": -1.3773012161254883, "logps/chosen": -110.99894714355469, "logps/rejected": -123.00154113769531, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": 0.7089683413505554, "rewards/margins": 5.48840856552124, "rewards/rejected": -4.779440402984619, "step": 3400 }, { "epoch": 0.75, "learning_rate": 9.994655131228017e-06, "logits/chosen": -1.018699049949646, "logits/rejected": -1.1726493835449219, "logps/chosen": -200.66012573242188, "logps/rejected": -112.37605285644531, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.30847930908203125, "rewards/margins": 4.7017717361450195, "rewards/rejected": -5.010251045227051, "step": 3401 }, { "epoch": 0.75, "learning_rate": 9.994571958913477e-06, "logits/chosen": -1.2900516986846924, "logits/rejected": -1.2759090662002563, "logps/chosen": -95.61810302734375, "logps/rejected": -147.60629272460938, "loss": 0.121, "rewards/accuracies": 1.0, "rewards/chosen": 0.624621570110321, "rewards/margins": 1.465051293373108, "rewards/rejected": -0.8404297232627869, "step": 3402 }, { "epoch": 0.75, "learning_rate": 9.994488144808449e-06, "logits/chosen": -0.8851841688156128, "logits/rejected": -0.865599513053894, "logps/chosen": -97.26742553710938, "logps/rejected": -189.55255126953125, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": -1.8467819690704346, "rewards/margins": 2.6116974353790283, "rewards/rejected": -4.458479404449463, "step": 3403 }, { "epoch": 0.75, "learning_rate": 9.994403688923699e-06, "logits/chosen": -0.9590185284614563, "logits/rejected": -0.9392352104187012, "logps/chosen": -25.50881576538086, "logps/rejected": -27.941225051879883, "loss": 0.4471, "rewards/accuracies": 0.0, "rewards/chosen": -0.8153743743896484, "rewards/margins": -0.33906880021095276, "rewards/rejected": -0.4763055741786957, "step": 3404 }, { "epoch": 0.75, "learning_rate": 9.994318591270081e-06, "logits/chosen": -0.8976263403892517, "logits/rejected": -0.7951911687850952, "logps/chosen": -126.01982879638672, "logps/rejected": -241.2801971435547, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -1.3489960432052612, "rewards/margins": 3.740966796875, "rewards/rejected": -5.089962959289551, "step": 3405 }, { "epoch": 0.75, "learning_rate": 9.99423285185853e-06, "logits/chosen": -1.1545796394348145, "logits/rejected": -1.2532819509506226, "logps/chosen": -145.89968872070312, "logps/rejected": -57.371604919433594, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": 1.1888641119003296, "rewards/margins": 4.103763103485107, "rewards/rejected": -2.9148991107940674, "step": 3406 }, { "epoch": 0.75, "learning_rate": 9.994146470700065e-06, "logits/chosen": -1.0329713821411133, "logits/rejected": -1.0229750871658325, "logps/chosen": -175.42608642578125, "logps/rejected": -113.06278991699219, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 0.13973084092140198, "rewards/margins": 5.712793350219727, "rewards/rejected": -5.573062419891357, "step": 3407 }, { "epoch": 0.75, "learning_rate": 9.994059447805781e-06, "logits/chosen": -0.6959514021873474, "logits/rejected": -0.6926737427711487, "logps/chosen": -231.62144470214844, "logps/rejected": -694.7232666015625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 1.4608932733535767, "rewards/margins": 53.428245544433594, "rewards/rejected": -51.96735382080078, "step": 3408 }, { "epoch": 0.75, "learning_rate": 9.993971783186867e-06, "logits/chosen": -0.9923614263534546, "logits/rejected": -1.0176340341567993, "logps/chosen": -128.07774353027344, "logps/rejected": -108.880615234375, "loss": 0.2343, "rewards/accuracies": 1.0, "rewards/chosen": -1.129913330078125, "rewards/margins": 0.5173492431640625, "rewards/rejected": -1.6472625732421875, "step": 3409 }, { "epoch": 0.75, "learning_rate": 9.993883476854582e-06, "logits/chosen": -0.7721178531646729, "logits/rejected": -0.7133895754814148, "logps/chosen": -78.93807983398438, "logps/rejected": -104.32292175292969, "loss": 0.0856, "rewards/accuracies": 1.0, "rewards/chosen": -0.18293152749538422, "rewards/margins": 2.388176202774048, "rewards/rejected": -2.5711076259613037, "step": 3410 }, { "epoch": 0.75, "learning_rate": 9.993794528820275e-06, "logits/chosen": -1.1311594247817993, "logits/rejected": -1.144946575164795, "logps/chosen": -59.20498275756836, "logps/rejected": -108.98287200927734, "loss": 0.0726, "rewards/accuracies": 1.0, "rewards/chosen": 0.27944642305374146, "rewards/margins": 1.9433534145355225, "rewards/rejected": -1.6639069318771362, "step": 3411 }, { "epoch": 0.76, "learning_rate": 9.993704939095376e-06, "logits/chosen": -0.47775912284851074, "logits/rejected": -0.49582338333129883, "logps/chosen": -53.554603576660156, "logps/rejected": -133.5418701171875, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": 0.25240403413772583, "rewards/margins": 6.545979022979736, "rewards/rejected": -6.293574810028076, "step": 3412 }, { "epoch": 0.76, "learning_rate": 9.9936147076914e-06, "logits/chosen": -0.9848957061767578, "logits/rejected": -1.0480570793151855, "logps/chosen": -230.69454956054688, "logps/rejected": -146.5789794921875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 6.3835601806640625, "rewards/margins": 9.718596458435059, "rewards/rejected": -3.335036516189575, "step": 3413 }, { "epoch": 0.76, "learning_rate": 9.993523834619933e-06, "logits/chosen": -1.3968170881271362, "logits/rejected": -1.2767318487167358, "logps/chosen": -76.624755859375, "logps/rejected": -182.52847290039062, "loss": 1.0951, "rewards/accuracies": 0.0, "rewards/chosen": -2.196770191192627, "rewards/margins": -2.024519920349121, "rewards/rejected": -0.17225037515163422, "step": 3414 }, { "epoch": 0.76, "learning_rate": 9.99343231989266e-06, "logits/chosen": -1.2783913612365723, "logits/rejected": -1.1969624757766724, "logps/chosen": -93.06048583984375, "logps/rejected": -205.1800994873047, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": 1.5023659467697144, "rewards/margins": 3.4254281520843506, "rewards/rejected": -1.9230622053146362, "step": 3415 }, { "epoch": 0.76, "learning_rate": 9.99334016352134e-06, "logits/chosen": -0.9741566777229309, "logits/rejected": -0.8836269378662109, "logps/chosen": -169.955078125, "logps/rejected": -294.0887451171875, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": 1.1167877912521362, "rewards/margins": 3.879324436187744, "rewards/rejected": -2.7625367641448975, "step": 3416 }, { "epoch": 0.76, "learning_rate": 9.993247365517808e-06, "logits/chosen": -1.371993899345398, "logits/rejected": -1.2491356134414673, "logps/chosen": -91.52069091796875, "logps/rejected": -153.9039306640625, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": -0.05331878736615181, "rewards/margins": 2.3403382301330566, "rewards/rejected": -2.3936569690704346, "step": 3417 }, { "epoch": 0.76, "learning_rate": 9.993153925893997e-06, "logits/chosen": -0.9125544428825378, "logits/rejected": -0.7796028256416321, "logps/chosen": -102.88134765625, "logps/rejected": -211.59527587890625, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": 0.043479155749082565, "rewards/margins": 3.832753896713257, "rewards/rejected": -3.7892746925354004, "step": 3418 }, { "epoch": 0.76, "learning_rate": 9.993059844661908e-06, "logits/chosen": -1.1945661306381226, "logits/rejected": -1.065774917602539, "logps/chosen": -174.94781494140625, "logps/rejected": -234.9914093017578, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 1.03166663646698, "rewards/margins": 6.310013294219971, "rewards/rejected": -5.278346538543701, "step": 3419 }, { "epoch": 0.76, "learning_rate": 9.992965121833631e-06, "logits/chosen": -1.0251874923706055, "logits/rejected": -1.026486873626709, "logps/chosen": -100.26398468017578, "logps/rejected": -113.53370666503906, "loss": 0.0806, "rewards/accuracies": 1.0, "rewards/chosen": 1.1942962408065796, "rewards/margins": 1.759098768234253, "rewards/rejected": -0.5648025870323181, "step": 3420 }, { "epoch": 0.76, "learning_rate": 9.99286975742134e-06, "logits/chosen": -0.8817189335823059, "logits/rejected": -0.9875519275665283, "logps/chosen": -173.76039123535156, "logps/rejected": -96.682373046875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 2.7758729457855225, "rewards/margins": 5.254375457763672, "rewards/rejected": -2.4785027503967285, "step": 3421 }, { "epoch": 0.76, "learning_rate": 9.992773751437288e-06, "logits/chosen": -1.1175684928894043, "logits/rejected": -1.089004635810852, "logps/chosen": -251.66220092773438, "logps/rejected": -174.30410766601562, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": 2.9061920642852783, "rewards/margins": 2.0932159423828125, "rewards/rejected": 0.812976062297821, "step": 3422 }, { "epoch": 0.76, "learning_rate": 9.99267710389381e-06, "logits/chosen": -1.046440601348877, "logits/rejected": -1.0079628229141235, "logps/chosen": -79.41706848144531, "logps/rejected": -117.47215270996094, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": 2.019437551498413, "rewards/margins": 3.2355363368988037, "rewards/rejected": -1.2160987854003906, "step": 3423 }, { "epoch": 0.76, "learning_rate": 9.992579814803327e-06, "logits/chosen": -0.760165810585022, "logits/rejected": -0.7600921392440796, "logps/chosen": -95.20585632324219, "logps/rejected": -106.2572021484375, "loss": 0.1937, "rewards/accuracies": 1.0, "rewards/chosen": 0.6046493649482727, "rewards/margins": 0.7796829342842102, "rewards/rejected": -0.1750335693359375, "step": 3424 }, { "epoch": 0.76, "learning_rate": 9.992481884178338e-06, "logits/chosen": -0.884485125541687, "logits/rejected": -0.884485125541687, "logps/chosen": -83.06929779052734, "logps/rejected": -83.06929779052734, "loss": 0.3486, "rewards/accuracies": 0.0, "rewards/chosen": -2.965498447418213, "rewards/margins": 0.0, "rewards/rejected": -2.965498447418213, "step": 3425 }, { "epoch": 0.76, "learning_rate": 9.99238331203143e-06, "logits/chosen": -1.2013849020004272, "logits/rejected": -0.631356954574585, "logps/chosen": -146.1285400390625, "logps/rejected": -471.31976318359375, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/chosen": 2.3656327724456787, "rewards/margins": 35.0477294921875, "rewards/rejected": -32.682098388671875, "step": 3426 }, { "epoch": 0.76, "learning_rate": 9.99228409837527e-06, "logits/chosen": -0.8779667019844055, "logits/rejected": -0.9613966345787048, "logps/chosen": -149.43984985351562, "logps/rejected": -85.94741821289062, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 1.5312408208847046, "rewards/margins": 5.601565837860107, "rewards/rejected": -4.070324897766113, "step": 3427 }, { "epoch": 0.76, "learning_rate": 9.9921842432226e-06, "logits/chosen": -0.8841976523399353, "logits/rejected": -0.9064825773239136, "logps/chosen": -104.92649841308594, "logps/rejected": -116.77899169921875, "loss": 0.0797, "rewards/accuracies": 1.0, "rewards/chosen": -1.7504547834396362, "rewards/margins": 3.567842960357666, "rewards/rejected": -5.318297863006592, "step": 3428 }, { "epoch": 0.76, "learning_rate": 9.992083746586258e-06, "logits/chosen": -1.0030875205993652, "logits/rejected": -1.0662940740585327, "logps/chosen": -117.52410888671875, "logps/rejected": -92.91461944580078, "loss": 0.0716, "rewards/accuracies": 1.0, "rewards/chosen": -0.8775039911270142, "rewards/margins": 4.2365264892578125, "rewards/rejected": -5.114030361175537, "step": 3429 }, { "epoch": 0.76, "learning_rate": 9.991982608479156e-06, "logits/chosen": -1.0457738637924194, "logits/rejected": -1.153088092803955, "logps/chosen": -221.8995361328125, "logps/rejected": -110.09544372558594, "loss": 0.367, "rewards/accuracies": 1.0, "rewards/chosen": 1.7476227283477783, "rewards/margins": 3.174914598464966, "rewards/rejected": -1.4272918701171875, "step": 3430 }, { "epoch": 0.76, "learning_rate": 9.991880828914288e-06, "logits/chosen": -1.091339111328125, "logits/rejected": -1.0581274032592773, "logps/chosen": -77.825927734375, "logps/rejected": -135.34922790527344, "loss": 0.1247, "rewards/accuracies": 1.0, "rewards/chosen": -0.5881462097167969, "rewards/margins": 2.6955666542053223, "rewards/rejected": -3.283712863922119, "step": 3431 }, { "epoch": 0.76, "learning_rate": 9.991778407904733e-06, "logits/chosen": -0.9200042486190796, "logits/rejected": -0.8009853363037109, "logps/chosen": -115.81346130371094, "logps/rejected": -298.8425598144531, "loss": 0.0686, "rewards/accuracies": 1.0, "rewards/chosen": -2.7307305335998535, "rewards/margins": 1.9260063171386719, "rewards/rejected": -4.656736850738525, "step": 3432 }, { "epoch": 0.76, "learning_rate": 9.991675345463654e-06, "logits/chosen": -0.8930885195732117, "logits/rejected": -0.9051051139831543, "logps/chosen": -121.607666015625, "logps/rejected": -68.44206237792969, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -0.7933074831962585, "rewards/margins": 3.0723540782928467, "rewards/rejected": -3.86566162109375, "step": 3433 }, { "epoch": 0.76, "learning_rate": 9.991571641604291e-06, "logits/chosen": -0.8910753130912781, "logits/rejected": -0.8166922926902771, "logps/chosen": -156.3025665283203, "logps/rejected": -199.46585083007812, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": 0.9638046622276306, "rewards/margins": 3.257293939590454, "rewards/rejected": -2.2934892177581787, "step": 3434 }, { "epoch": 0.76, "learning_rate": 9.991467296339973e-06, "logits/chosen": -1.0233447551727295, "logits/rejected": -1.0233447551727295, "logps/chosen": -132.91162109375, "logps/rejected": -132.91162109375, "loss": 0.3949, "rewards/accuracies": 0.0, "rewards/chosen": -3.7608888149261475, "rewards/margins": 0.0, "rewards/rejected": -3.7608888149261475, "step": 3435 }, { "epoch": 0.76, "learning_rate": 9.991362309684105e-06, "logits/chosen": -1.0564333200454712, "logits/rejected": -1.0245938301086426, "logps/chosen": -93.8782958984375, "logps/rejected": -126.4764633178711, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -0.5089317560195923, "rewards/margins": 3.822366714477539, "rewards/rejected": -4.331298351287842, "step": 3436 }, { "epoch": 0.76, "learning_rate": 9.991256681650181e-06, "logits/chosen": -0.9223946928977966, "logits/rejected": -0.8572526574134827, "logps/chosen": -110.04654693603516, "logps/rejected": -183.83425903320312, "loss": 0.0541, "rewards/accuracies": 1.0, "rewards/chosen": 1.0546928644180298, "rewards/margins": 3.7520241737365723, "rewards/rejected": -2.697331190109253, "step": 3437 }, { "epoch": 0.76, "learning_rate": 9.99115041225177e-06, "logits/chosen": -0.8458942770957947, "logits/rejected": -0.780712902545929, "logps/chosen": -53.5383186340332, "logps/rejected": -104.71841430664062, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": 1.1656216382980347, "rewards/margins": 6.094326019287109, "rewards/rejected": -4.928704261779785, "step": 3438 }, { "epoch": 0.76, "learning_rate": 9.991043501502532e-06, "logits/chosen": -0.9776638150215149, "logits/rejected": -0.9367964267730713, "logps/chosen": -137.86090087890625, "logps/rejected": -177.25076293945312, "loss": 1.7572, "rewards/accuracies": 0.0, "rewards/chosen": -2.5591118335723877, "rewards/margins": -3.479656219482422, "rewards/rejected": 0.920544445514679, "step": 3439 }, { "epoch": 0.76, "learning_rate": 9.9909359494162e-06, "logits/chosen": -0.9175789952278137, "logits/rejected": -0.8720570802688599, "logps/chosen": -61.39239501953125, "logps/rejected": -88.56204223632812, "loss": 0.4454, "rewards/accuracies": 1.0, "rewards/chosen": 0.48954468965530396, "rewards/margins": 1.7322022914886475, "rewards/rejected": -1.2426575422286987, "step": 3440 }, { "epoch": 0.76, "learning_rate": 9.990827756006599e-06, "logits/chosen": -0.9402621388435364, "logits/rejected": -0.9030271172523499, "logps/chosen": -59.04710388183594, "logps/rejected": -77.92034912109375, "loss": 0.066, "rewards/accuracies": 1.0, "rewards/chosen": -0.44783973693847656, "rewards/margins": 2.677802324295044, "rewards/rejected": -3.1256420612335205, "step": 3441 }, { "epoch": 0.76, "learning_rate": 9.990718921287625e-06, "logits/chosen": -0.8299311995506287, "logits/rejected": -0.7852020859718323, "logps/chosen": -81.23445129394531, "logps/rejected": -138.35040283203125, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": 0.9171707034111023, "rewards/margins": 4.697271823883057, "rewards/rejected": -3.7801010608673096, "step": 3442 }, { "epoch": 0.76, "learning_rate": 9.99060944527327e-06, "logits/chosen": -1.2091461420059204, "logits/rejected": -1.2105090618133545, "logps/chosen": -108.08831787109375, "logps/rejected": -125.8334732055664, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.9708244204521179, "rewards/margins": 3.7395384311676025, "rewards/rejected": -4.710362911224365, "step": 3443 }, { "epoch": 0.76, "learning_rate": 9.990499327977599e-06, "logits/chosen": -1.106834888458252, "logits/rejected": -1.0963159799575806, "logps/chosen": -95.42671203613281, "logps/rejected": -171.4246826171875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.0340439081192017, "rewards/margins": 4.661709785461426, "rewards/rejected": -5.695753574371338, "step": 3444 }, { "epoch": 0.76, "learning_rate": 9.990388569414759e-06, "logits/chosen": -1.066219449043274, "logits/rejected": -1.021796464920044, "logps/chosen": -179.3118133544922, "logps/rejected": -262.0273132324219, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": 1.093342661857605, "rewards/margins": 3.763148784637451, "rewards/rejected": -2.6698060035705566, "step": 3445 }, { "epoch": 0.76, "learning_rate": 9.990277169598985e-06, "logits/chosen": -0.6893659830093384, "logits/rejected": -0.5087051391601562, "logps/chosen": -173.00027465820312, "logps/rejected": -313.24139404296875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 1.1102707386016846, "rewards/margins": 11.117271423339844, "rewards/rejected": -10.007000923156738, "step": 3446 }, { "epoch": 0.76, "learning_rate": 9.99016512854459e-06, "logits/chosen": -0.9304202795028687, "logits/rejected": -0.9571642279624939, "logps/chosen": -176.04241943359375, "logps/rejected": -136.88009643554688, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": 2.7267913818359375, "rewards/margins": 3.931227207183838, "rewards/rejected": -1.2044357061386108, "step": 3447 }, { "epoch": 0.76, "learning_rate": 9.990052446265974e-06, "logits/chosen": -0.9098584055900574, "logits/rejected": -0.8104400038719177, "logps/chosen": -69.9493408203125, "logps/rejected": -240.26817321777344, "loss": 0.1376, "rewards/accuracies": 1.0, "rewards/chosen": 0.6665496826171875, "rewards/margins": 1.6782578229904175, "rewards/rejected": -1.01170814037323, "step": 3448 }, { "epoch": 0.76, "learning_rate": 9.989939122777614e-06, "logits/chosen": -0.9707316756248474, "logits/rejected": -0.9508836269378662, "logps/chosen": -86.23448181152344, "logps/rejected": -151.6658935546875, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": 0.43708497285842896, "rewards/margins": 2.826369524002075, "rewards/rejected": -2.389284610748291, "step": 3449 }, { "epoch": 0.76, "learning_rate": 9.98982515809407e-06, "logits/chosen": -0.8283393979072571, "logits/rejected": -0.8688634634017944, "logps/chosen": -186.1591796875, "logps/rejected": -176.7426300048828, "loss": 0.0335, "rewards/accuracies": 1.0, "rewards/chosen": 4.192747592926025, "rewards/margins": 2.671769618988037, "rewards/rejected": 1.5209778547286987, "step": 3450 }, { "epoch": 0.76, "learning_rate": 9.989710552229992e-06, "logits/chosen": -0.8321048021316528, "logits/rejected": -0.8321048021316528, "logps/chosen": -46.92387390136719, "logps/rejected": -46.92387390136719, "loss": 0.6788, "rewards/accuracies": 0.0, "rewards/chosen": 0.0045566558837890625, "rewards/margins": 0.0, "rewards/rejected": 0.0045566558837890625, "step": 3451 }, { "epoch": 0.76, "learning_rate": 9.9895953052001e-06, "logits/chosen": -1.0685442686080933, "logits/rejected": -1.0612462759017944, "logps/chosen": -87.83354187011719, "logps/rejected": -159.5579833984375, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": 0.7446220517158508, "rewards/margins": 2.5589866638183594, "rewards/rejected": -1.8143646717071533, "step": 3452 }, { "epoch": 0.76, "learning_rate": 9.989479417019208e-06, "logits/chosen": -0.7048086524009705, "logits/rejected": -0.7403362989425659, "logps/chosen": -105.63111877441406, "logps/rejected": -186.44223022460938, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": -1.1274948120117188, "rewards/margins": 3.013354778289795, "rewards/rejected": -4.140849590301514, "step": 3453 }, { "epoch": 0.76, "learning_rate": 9.989362887702203e-06, "logits/chosen": -0.9593647122383118, "logits/rejected": -0.9303333759307861, "logps/chosen": -81.50453186035156, "logps/rejected": -110.18821716308594, "loss": 0.336, "rewards/accuracies": 1.0, "rewards/chosen": -0.8352462649345398, "rewards/margins": 0.042997777462005615, "rewards/rejected": -0.8782440423965454, "step": 3454 }, { "epoch": 0.76, "learning_rate": 9.989245717264063e-06, "logits/chosen": -0.9293232560157776, "logits/rejected": -0.9293232560157776, "logps/chosen": -137.99673461914062, "logps/rejected": -137.99673461914062, "loss": 0.3589, "rewards/accuracies": 0.0, "rewards/chosen": -2.839170217514038, "rewards/margins": 0.0, "rewards/rejected": -2.839170217514038, "step": 3455 }, { "epoch": 0.76, "learning_rate": 9.989127905719841e-06, "logits/chosen": -1.255643367767334, "logits/rejected": -1.2932347059249878, "logps/chosen": -155.1541290283203, "logps/rejected": -96.94454956054688, "loss": 0.0611, "rewards/accuracies": 1.0, "rewards/chosen": -1.788356065750122, "rewards/margins": 2.0937511920928955, "rewards/rejected": -3.8821072578430176, "step": 3456 }, { "epoch": 0.77, "learning_rate": 9.989009453084678e-06, "logits/chosen": -1.1397219896316528, "logits/rejected": -1.0971019268035889, "logps/chosen": -107.30656433105469, "logps/rejected": -192.40234375, "loss": 0.147, "rewards/accuracies": 1.0, "rewards/chosen": 1.9226715564727783, "rewards/margins": 8.569534301757812, "rewards/rejected": -6.646862983703613, "step": 3457 }, { "epoch": 0.77, "learning_rate": 9.988890359373794e-06, "logits/chosen": -1.1680220365524292, "logits/rejected": -0.7690833806991577, "logps/chosen": -138.40667724609375, "logps/rejected": -504.810791015625, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": -1.1585816144943237, "rewards/margins": 13.150433540344238, "rewards/rejected": -14.309015274047852, "step": 3458 }, { "epoch": 0.77, "learning_rate": 9.988770624602488e-06, "logits/chosen": -1.1529706716537476, "logits/rejected": -1.1643683910369873, "logps/chosen": -114.19728088378906, "logps/rejected": -75.03160095214844, "loss": 0.5764, "rewards/accuracies": 0.0, "rewards/chosen": 0.31396713852882385, "rewards/margins": -0.773291826248169, "rewards/rejected": 1.0872589349746704, "step": 3459 }, { "epoch": 0.77, "learning_rate": 9.988650248786153e-06, "logits/chosen": -0.9912278056144714, "logits/rejected": -1.0862345695495605, "logps/chosen": -265.73748779296875, "logps/rejected": -281.3498840332031, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.566088855266571, "rewards/margins": 12.103680610656738, "rewards/rejected": -11.537591934204102, "step": 3460 }, { "epoch": 0.77, "learning_rate": 9.988529231940252e-06, "logits/chosen": -0.983177900314331, "logits/rejected": -1.0326745510101318, "logps/chosen": -156.2256622314453, "logps/rejected": -108.88333892822266, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": 1.1212494373321533, "rewards/margins": 3.962421417236328, "rewards/rejected": -2.841171979904175, "step": 3461 }, { "epoch": 0.77, "learning_rate": 9.988407574080337e-06, "logits/chosen": -1.167575478553772, "logits/rejected": -1.1454174518585205, "logps/chosen": -93.56271362304688, "logps/rejected": -162.1140594482422, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 1.0449875593185425, "rewards/margins": 5.35756254196167, "rewards/rejected": -4.312574863433838, "step": 3462 }, { "epoch": 0.77, "learning_rate": 9.988285275222041e-06, "logits/chosen": -1.1321722269058228, "logits/rejected": -0.897756040096283, "logps/chosen": -38.452735900878906, "logps/rejected": -384.0405578613281, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.3138999938964844, "rewards/margins": 21.794055938720703, "rewards/rejected": -21.48015594482422, "step": 3463 }, { "epoch": 0.77, "learning_rate": 9.988162335381077e-06, "logits/chosen": -0.8109872937202454, "logits/rejected": -0.7408488392829895, "logps/chosen": -237.84478759765625, "logps/rejected": -446.3848876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 4.525320529937744, "rewards/margins": 17.663938522338867, "rewards/rejected": -13.138617515563965, "step": 3464 }, { "epoch": 0.77, "learning_rate": 9.988038754573245e-06, "logits/chosen": -1.0582174062728882, "logits/rejected": -0.8981987833976746, "logps/chosen": -179.24624633789062, "logps/rejected": -357.8178405761719, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": 0.6230499148368835, "rewards/margins": 7.273919582366943, "rewards/rejected": -6.650869846343994, "step": 3465 }, { "epoch": 0.77, "learning_rate": 9.987914532814425e-06, "logits/chosen": -1.5182780027389526, "logits/rejected": -1.235568642616272, "logps/chosen": -65.51465606689453, "logps/rejected": -270.66424560546875, "loss": 0.4824, "rewards/accuracies": 0.0, "rewards/chosen": -0.5288444757461548, "rewards/margins": -0.4821525812149048, "rewards/rejected": -0.04669189453125, "step": 3466 }, { "epoch": 0.77, "learning_rate": 9.987789670120578e-06, "logits/chosen": -1.1145251989364624, "logits/rejected": -0.9841464161872864, "logps/chosen": -129.60426330566406, "logps/rejected": -237.09434509277344, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/chosen": 0.3175796568393707, "rewards/margins": 2.3874573707580566, "rewards/rejected": -2.0698776245117188, "step": 3467 }, { "epoch": 0.77, "learning_rate": 9.987664166507749e-06, "logits/chosen": -0.6453135013580322, "logits/rejected": -0.6481508612632751, "logps/chosen": -150.78443908691406, "logps/rejected": -115.37887573242188, "loss": 0.0653, "rewards/accuracies": 1.0, "rewards/chosen": 1.6163681745529175, "rewards/margins": 3.835702419281006, "rewards/rejected": -2.219334363937378, "step": 3468 }, { "epoch": 0.77, "learning_rate": 9.987538021992063e-06, "logits/chosen": -1.0011874437332153, "logits/rejected": -1.0260214805603027, "logps/chosen": -110.9577407836914, "logps/rejected": -86.71866607666016, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": -2.6159920692443848, "rewards/margins": 2.58737850189209, "rewards/rejected": -5.203370571136475, "step": 3469 }, { "epoch": 0.77, "learning_rate": 9.987411236589733e-06, "logits/chosen": -0.887226402759552, "logits/rejected": -0.921766459941864, "logps/chosen": -208.55331420898438, "logps/rejected": -124.13326263427734, "loss": 0.435, "rewards/accuracies": 1.0, "rewards/chosen": 0.9128814935684204, "rewards/margins": 1.7847191095352173, "rewards/rejected": -0.8718376159667969, "step": 3470 }, { "epoch": 0.77, "learning_rate": 9.987283810317046e-06, "logits/chosen": -1.4287244081497192, "logits/rejected": -1.300258994102478, "logps/chosen": -114.15396118164062, "logps/rejected": -216.78826904296875, "loss": 1.5755, "rewards/accuracies": 0.0, "rewards/chosen": -0.3218185603618622, "rewards/margins": -3.0833756923675537, "rewards/rejected": 2.761557102203369, "step": 3471 }, { "epoch": 0.77, "learning_rate": 9.987155743190379e-06, "logits/chosen": -0.7460350394248962, "logits/rejected": -0.7227141261100769, "logps/chosen": -97.09060668945312, "logps/rejected": -179.4905242919922, "loss": 0.152, "rewards/accuracies": 1.0, "rewards/chosen": -2.928009033203125, "rewards/margins": 1.7324585914611816, "rewards/rejected": -4.660467624664307, "step": 3472 }, { "epoch": 0.77, "learning_rate": 9.98702703522619e-06, "logits/chosen": -0.9501626491546631, "logits/rejected": -1.0774174928665161, "logps/chosen": -248.8018798828125, "logps/rejected": -127.87528228759766, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 4.125677585601807, "rewards/margins": 5.4955058097839355, "rewards/rejected": -1.3698281049728394, "step": 3473 }, { "epoch": 0.77, "learning_rate": 9.986897686441012e-06, "logits/chosen": -0.877107560634613, "logits/rejected": -0.8427822589874268, "logps/chosen": -133.2576446533203, "logps/rejected": -212.73556518554688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.45335853099823, "rewards/margins": 6.389491081237793, "rewards/rejected": -7.8428497314453125, "step": 3474 }, { "epoch": 0.77, "learning_rate": 9.986767696851472e-06, "logits/chosen": -1.2926065921783447, "logits/rejected": -1.1947870254516602, "logps/chosen": -119.78160095214844, "logps/rejected": -248.04315185546875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.109649658203125, "rewards/margins": 5.260678291320801, "rewards/rejected": -6.370327949523926, "step": 3475 }, { "epoch": 0.77, "learning_rate": 9.98663706647427e-06, "logits/chosen": -0.9989219307899475, "logits/rejected": -1.1333593130111694, "logps/chosen": -216.341796875, "logps/rejected": -171.03009033203125, "loss": 0.5295, "rewards/accuracies": 0.0, "rewards/chosen": 1.4784088134765625, "rewards/margins": -0.6312835216522217, "rewards/rejected": 2.109692335128784, "step": 3476 }, { "epoch": 0.77, "learning_rate": 9.986505795326194e-06, "logits/chosen": -1.273646593093872, "logits/rejected": -1.1950669288635254, "logps/chosen": -94.6059341430664, "logps/rejected": -183.64227294921875, "loss": 0.0741, "rewards/accuracies": 1.0, "rewards/chosen": -0.051845550537109375, "rewards/margins": 2.138202667236328, "rewards/rejected": -2.1900482177734375, "step": 3477 }, { "epoch": 0.77, "learning_rate": 9.986373883424108e-06, "logits/chosen": -1.441733717918396, "logits/rejected": -1.444846272468567, "logps/chosen": -103.81094360351562, "logps/rejected": -119.92461395263672, "loss": 0.1465, "rewards/accuracies": 1.0, "rewards/chosen": 0.16261139512062073, "rewards/margins": 1.0819542407989502, "rewards/rejected": -0.9193428158760071, "step": 3478 }, { "epoch": 0.77, "learning_rate": 9.986241330784967e-06, "logits/chosen": -0.9211180210113525, "logits/rejected": -0.9163370728492737, "logps/chosen": -152.44309997558594, "logps/rejected": -237.6732177734375, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": 1.413081407546997, "rewards/margins": 7.005699157714844, "rewards/rejected": -5.592617988586426, "step": 3479 }, { "epoch": 0.77, "learning_rate": 9.9861081374258e-06, "logits/chosen": -0.9307984113693237, "logits/rejected": -0.8478273153305054, "logps/chosen": -223.55409240722656, "logps/rejected": -257.7137451171875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 2.437023878097534, "rewards/margins": 5.6535491943359375, "rewards/rejected": -3.2165253162384033, "step": 3480 }, { "epoch": 0.77, "learning_rate": 9.985974303363723e-06, "logits/chosen": -1.2261334657669067, "logits/rejected": -1.244336724281311, "logps/chosen": -242.24169921875, "logps/rejected": -221.1062774658203, "loss": 0.1514, "rewards/accuracies": 1.0, "rewards/chosen": 0.20486144721508026, "rewards/margins": 1.0461242198944092, "rewards/rejected": -0.8412628173828125, "step": 3481 }, { "epoch": 0.77, "learning_rate": 9.985839828615937e-06, "logits/chosen": -0.9199389815330505, "logits/rejected": -0.9199389815330505, "logps/chosen": -127.1429672241211, "logps/rejected": -127.1429672241211, "loss": 0.3475, "rewards/accuracies": 0.0, "rewards/chosen": -3.4866974353790283, "rewards/margins": 0.0, "rewards/rejected": -3.4866974353790283, "step": 3482 }, { "epoch": 0.77, "learning_rate": 9.985704713199715e-06, "logits/chosen": -0.8730406165122986, "logits/rejected": -0.8730406165122986, "logps/chosen": -228.59945678710938, "logps/rejected": -228.59945678710938, "loss": 0.3469, "rewards/accuracies": 0.0, "rewards/chosen": -1.0305389165878296, "rewards/margins": 0.0, "rewards/rejected": -1.0305389165878296, "step": 3483 }, { "epoch": 0.77, "learning_rate": 9.985568957132425e-06, "logits/chosen": -1.2623164653778076, "logits/rejected": -1.0669156312942505, "logps/chosen": -120.09134674072266, "logps/rejected": -326.5390319824219, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -2.365070343017578, "rewards/margins": 3.3297934532165527, "rewards/rejected": -5.694863796234131, "step": 3484 }, { "epoch": 0.77, "learning_rate": 9.98543256043151e-06, "logits/chosen": -1.1154682636260986, "logits/rejected": -1.1154682636260986, "logps/chosen": -120.50379180908203, "logps/rejected": -120.50379180908203, "loss": 0.3987, "rewards/accuracies": 0.0, "rewards/chosen": -2.606623888015747, "rewards/margins": 0.0, "rewards/rejected": -2.606623888015747, "step": 3485 }, { "epoch": 0.77, "learning_rate": 9.985295523114492e-06, "logits/chosen": -1.1758942604064941, "logits/rejected": -1.147140383720398, "logps/chosen": -81.9002685546875, "logps/rejected": -184.5714111328125, "loss": 0.0897, "rewards/accuracies": 1.0, "rewards/chosen": 0.3751938045024872, "rewards/margins": 1.6808639764785767, "rewards/rejected": -1.305670142173767, "step": 3486 }, { "epoch": 0.77, "learning_rate": 9.985157845198987e-06, "logits/chosen": -0.9182955622673035, "logits/rejected": -0.8047636151313782, "logps/chosen": -153.11505126953125, "logps/rejected": -206.43743896484375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.16321106255054474, "rewards/margins": 7.393506050109863, "rewards/rejected": -7.5567169189453125, "step": 3487 }, { "epoch": 0.77, "learning_rate": 9.985019526702682e-06, "logits/chosen": -0.7995058298110962, "logits/rejected": -0.8285212516784668, "logps/chosen": -189.55130004882812, "logps/rejected": -174.4712677001953, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.087899923324585, "rewards/margins": 4.181325912475586, "rewards/rejected": -6.26922607421875, "step": 3488 }, { "epoch": 0.77, "learning_rate": 9.984880567643351e-06, "logits/chosen": -1.0869344472885132, "logits/rejected": -1.1741470098495483, "logps/chosen": -169.1930389404297, "logps/rejected": -103.83430480957031, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": -0.3377700746059418, "rewards/margins": 2.7253129482269287, "rewards/rejected": -3.0630829334259033, "step": 3489 }, { "epoch": 0.77, "learning_rate": 9.984740968038852e-06, "logits/chosen": -1.322988510131836, "logits/rejected": -1.0993118286132812, "logps/chosen": -131.15028381347656, "logps/rejected": -283.51007080078125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": 0.9105987548828125, "rewards/margins": 4.581947326660156, "rewards/rejected": -3.6713485717773438, "step": 3490 }, { "epoch": 0.77, "learning_rate": 9.984600727907119e-06, "logits/chosen": -1.0542486906051636, "logits/rejected": -1.0095160007476807, "logps/chosen": -113.82440185546875, "logps/rejected": -86.27294921875, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": 0.4311836361885071, "rewards/margins": 3.201380968093872, "rewards/rejected": -2.7701973915100098, "step": 3491 }, { "epoch": 0.77, "learning_rate": 9.984459847266176e-06, "logits/chosen": -1.102280855178833, "logits/rejected": -1.040231704711914, "logps/chosen": -58.3985481262207, "logps/rejected": -116.97982788085938, "loss": 0.14, "rewards/accuracies": 1.0, "rewards/chosen": -0.7125999331474304, "rewards/margins": 2.1879875659942627, "rewards/rejected": -2.900587558746338, "step": 3492 }, { "epoch": 0.77, "learning_rate": 9.984318326134125e-06, "logits/chosen": -0.7097378373146057, "logits/rejected": -0.7367116808891296, "logps/chosen": -98.77081298828125, "logps/rejected": -128.33673095703125, "loss": 0.4927, "rewards/accuracies": 1.0, "rewards/chosen": -1.2726020812988281, "rewards/margins": 0.13741528987884521, "rewards/rejected": -1.4100173711776733, "step": 3493 }, { "epoch": 0.77, "learning_rate": 9.984176164529151e-06, "logits/chosen": -1.2863377332687378, "logits/rejected": -1.2502777576446533, "logps/chosen": -207.90028381347656, "logps/rejected": -201.7117919921875, "loss": 0.2043, "rewards/accuracies": 1.0, "rewards/chosen": 0.03708953782916069, "rewards/margins": 0.7201614379882812, "rewards/rejected": -0.6830719113349915, "step": 3494 }, { "epoch": 0.77, "learning_rate": 9.984033362469522e-06, "logits/chosen": -1.2484718561172485, "logits/rejected": -1.2684028148651123, "logps/chosen": -163.65841674804688, "logps/rejected": -139.09503173828125, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -1.8409759998321533, "rewards/margins": 3.7184035778045654, "rewards/rejected": -5.559379577636719, "step": 3495 }, { "epoch": 0.77, "learning_rate": 9.983889919973586e-06, "logits/chosen": -1.0277990102767944, "logits/rejected": -1.0277990102767944, "logps/chosen": -111.82159423828125, "logps/rejected": -111.82159423828125, "loss": 0.9934, "rewards/accuracies": 0.0, "rewards/chosen": -2.819950819015503, "rewards/margins": 0.0, "rewards/rejected": -2.819950819015503, "step": 3496 }, { "epoch": 0.77, "learning_rate": 9.983745837059777e-06, "logits/chosen": -1.1682642698287964, "logits/rejected": -1.1682642698287964, "logps/chosen": -172.7135009765625, "logps/rejected": -172.7135009765625, "loss": 0.3518, "rewards/accuracies": 0.0, "rewards/chosen": -3.21140456199646, "rewards/margins": 0.0, "rewards/rejected": -3.21140456199646, "step": 3497 }, { "epoch": 0.77, "learning_rate": 9.98360111374661e-06, "logits/chosen": -0.839834988117218, "logits/rejected": -0.7301527261734009, "logps/chosen": -167.73825073242188, "logps/rejected": -246.95199584960938, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.2807510495185852, "rewards/margins": 6.887059211730957, "rewards/rejected": -6.6063079833984375, "step": 3498 }, { "epoch": 0.77, "learning_rate": 9.983455750052678e-06, "logits/chosen": -0.821428656578064, "logits/rejected": -0.9173873662948608, "logps/chosen": -151.38531494140625, "logps/rejected": -92.4626693725586, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": 0.6627334952354431, "rewards/margins": 3.6173269748687744, "rewards/rejected": -2.9545934200286865, "step": 3499 }, { "epoch": 0.77, "learning_rate": 9.983309745996663e-06, "logits/chosen": -1.2327042818069458, "logits/rejected": -1.2327042818069458, "logps/chosen": -107.9747085571289, "logps/rejected": -107.9747085571289, "loss": 0.5918, "rewards/accuracies": 0.0, "rewards/chosen": -3.236140489578247, "rewards/margins": 0.0, "rewards/rejected": -3.236140489578247, "step": 3500 }, { "epoch": 0.77, "learning_rate": 9.983163101597325e-06, "logits/chosen": -1.002156138420105, "logits/rejected": -0.985310435295105, "logps/chosen": -226.81716918945312, "logps/rejected": -304.385009765625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 2.788421630859375, "rewards/margins": 8.6167573928833, "rewards/rejected": -5.828335762023926, "step": 3501 }, { "epoch": 0.78, "learning_rate": 9.983015816873508e-06, "logits/chosen": -0.9555557370185852, "logits/rejected": -0.9533042311668396, "logps/chosen": -150.38876342773438, "logps/rejected": -142.93028259277344, "loss": 0.076, "rewards/accuracies": 1.0, "rewards/chosen": -1.7196563482284546, "rewards/margins": 1.8074182271957397, "rewards/rejected": -3.5270745754241943, "step": 3502 }, { "epoch": 0.78, "learning_rate": 9.982867891844136e-06, "logits/chosen": -1.1883710622787476, "logits/rejected": -1.1110650300979614, "logps/chosen": -87.87054443359375, "logps/rejected": -224.33035278320312, "loss": 0.071, "rewards/accuracies": 1.0, "rewards/chosen": 0.7193466424942017, "rewards/margins": 4.854988098144531, "rewards/rejected": -4.135641574859619, "step": 3503 }, { "epoch": 0.78, "learning_rate": 9.98271932652822e-06, "logits/chosen": -0.9822145104408264, "logits/rejected": -0.9633991718292236, "logps/chosen": -91.40362548828125, "logps/rejected": -265.2998352050781, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": 0.748583972454071, "rewards/margins": 11.573156356811523, "rewards/rejected": -10.824572563171387, "step": 3504 }, { "epoch": 0.78, "learning_rate": 9.982570120944847e-06, "logits/chosen": -1.4979602098464966, "logits/rejected": -1.7973742485046387, "logps/chosen": -210.31637573242188, "logps/rejected": -96.26229858398438, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -1.1669219732284546, "rewards/margins": 3.870638847351074, "rewards/rejected": -5.037560939788818, "step": 3505 }, { "epoch": 0.78, "learning_rate": 9.982420275113194e-06, "logits/chosen": -1.2115061283111572, "logits/rejected": -0.6495279669761658, "logps/chosen": -85.96102905273438, "logps/rejected": -565.922607421875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.1700035184621811, "rewards/margins": 48.3934211730957, "rewards/rejected": -48.56342315673828, "step": 3506 }, { "epoch": 0.78, "learning_rate": 9.98226978905251e-06, "logits/chosen": -1.1291096210479736, "logits/rejected": -1.145857810974121, "logps/chosen": -92.0369873046875, "logps/rejected": -170.42971801757812, "loss": 0.3771, "rewards/accuracies": 1.0, "rewards/chosen": 0.2941177487373352, "rewards/margins": 2.7658417224884033, "rewards/rejected": -2.471724033355713, "step": 3507 }, { "epoch": 0.78, "learning_rate": 9.982118662782136e-06, "logits/chosen": -1.219933032989502, "logits/rejected": -1.2470299005508423, "logps/chosen": -158.34933471679688, "logps/rejected": -120.45352935791016, "loss": 0.0605, "rewards/accuracies": 1.0, "rewards/chosen": -0.8695892691612244, "rewards/margins": 3.300565242767334, "rewards/rejected": -4.170154571533203, "step": 3508 }, { "epoch": 0.78, "learning_rate": 9.981966896321492e-06, "logits/chosen": -1.0944424867630005, "logits/rejected": -1.1324788331985474, "logps/chosen": -128.103515625, "logps/rejected": -128.5537109375, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 0.33587646484375, "rewards/margins": 4.355159282684326, "rewards/rejected": -4.019282817840576, "step": 3509 }, { "epoch": 0.78, "learning_rate": 9.981814489690077e-06, "logits/chosen": -1.2703471183776855, "logits/rejected": -1.319364070892334, "logps/chosen": -288.49273681640625, "logps/rejected": -187.27285766601562, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 2.427899122238159, "rewards/margins": 4.659292697906494, "rewards/rejected": -2.231393575668335, "step": 3510 }, { "epoch": 0.78, "learning_rate": 9.981661442907477e-06, "logits/chosen": -0.6861621141433716, "logits/rejected": -0.6861621141433716, "logps/chosen": -174.91424560546875, "logps/rejected": -174.91424560546875, "loss": 0.3748, "rewards/accuracies": 0.0, "rewards/chosen": -5.262149333953857, "rewards/margins": 0.0, "rewards/rejected": -5.262149333953857, "step": 3511 }, { "epoch": 0.78, "learning_rate": 9.981507755993357e-06, "logits/chosen": -1.0233291387557983, "logits/rejected": -0.930072009563446, "logps/chosen": -91.84561157226562, "logps/rejected": -163.71192932128906, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": 0.9025993347167969, "rewards/margins": 4.179777145385742, "rewards/rejected": -3.277177572250366, "step": 3512 }, { "epoch": 0.78, "learning_rate": 9.981353428967465e-06, "logits/chosen": -1.0845739841461182, "logits/rejected": -1.0370322465896606, "logps/chosen": -128.88812255859375, "logps/rejected": -179.41448974609375, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": -3.266730546951294, "rewards/margins": 2.646160364151001, "rewards/rejected": -5.912890911102295, "step": 3513 }, { "epoch": 0.78, "learning_rate": 9.98119846184963e-06, "logits/chosen": -1.5184485912322998, "logits/rejected": -1.4231091737747192, "logps/chosen": -98.31612396240234, "logps/rejected": -224.07025146484375, "loss": 0.3216, "rewards/accuracies": 1.0, "rewards/chosen": -1.5299255847930908, "rewards/margins": 0.10491025447845459, "rewards/rejected": -1.6348358392715454, "step": 3514 }, { "epoch": 0.78, "learning_rate": 9.98104285465977e-06, "logits/chosen": -0.9784009456634521, "logits/rejected": -0.95469731092453, "logps/chosen": -141.42970275878906, "logps/rejected": -252.99166870117188, "loss": 0.1309, "rewards/accuracies": 1.0, "rewards/chosen": -1.661542534828186, "rewards/margins": 2.608762741088867, "rewards/rejected": -4.270305156707764, "step": 3515 }, { "epoch": 0.78, "learning_rate": 9.980886607417877e-06, "logits/chosen": -1.171700119972229, "logits/rejected": -1.210301160812378, "logps/chosen": -184.55056762695312, "logps/rejected": -121.52574157714844, "loss": 0.6084, "rewards/accuracies": 0.0, "rewards/chosen": -4.742032051086426, "rewards/margins": -0.8434097766876221, "rewards/rejected": -3.8986222743988037, "step": 3516 }, { "epoch": 0.78, "learning_rate": 9.980729720144027e-06, "logits/chosen": -1.087342381477356, "logits/rejected": -0.9497127532958984, "logps/chosen": -66.88021850585938, "logps/rejected": -169.45217895507812, "loss": 0.1823, "rewards/accuracies": 1.0, "rewards/chosen": -0.6406547427177429, "rewards/margins": 0.8303963541984558, "rewards/rejected": -1.4710510969161987, "step": 3517 }, { "epoch": 0.78, "learning_rate": 9.980572192858383e-06, "logits/chosen": -1.3039129972457886, "logits/rejected": -1.2143762111663818, "logps/chosen": -102.41425323486328, "logps/rejected": -200.9597930908203, "loss": 0.4983, "rewards/accuracies": 0.0, "rewards/chosen": 0.421121209859848, "rewards/margins": -0.4715271294116974, "rewards/rejected": 0.8926483392715454, "step": 3518 }, { "epoch": 0.78, "learning_rate": 9.980414025581185e-06, "logits/chosen": -1.035613775253296, "logits/rejected": -1.035613775253296, "logps/chosen": -105.77845001220703, "logps/rejected": -105.77845001220703, "loss": 0.3534, "rewards/accuracies": 0.0, "rewards/chosen": -0.5851127505302429, "rewards/margins": 0.0, "rewards/rejected": -0.5851127505302429, "step": 3519 }, { "epoch": 0.78, "learning_rate": 9.980255218332758e-06, "logits/chosen": -0.7243508696556091, "logits/rejected": -0.6936619281768799, "logps/chosen": -85.97732543945312, "logps/rejected": -240.42922973632812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.0644874572753906, "rewards/margins": 7.497442245483398, "rewards/rejected": -9.561929702758789, "step": 3520 }, { "epoch": 0.78, "learning_rate": 9.980095771133504e-06, "logits/chosen": -1.1494083404541016, "logits/rejected": -1.1382875442504883, "logps/chosen": -122.48550415039062, "logps/rejected": -187.19290161132812, "loss": 0.4489, "rewards/accuracies": 1.0, "rewards/chosen": -1.447529673576355, "rewards/margins": 1.4828766584396362, "rewards/rejected": -2.930406332015991, "step": 3521 }, { "epoch": 0.78, "learning_rate": 9.979935684003918e-06, "logits/chosen": -0.9685068726539612, "logits/rejected": -0.989643394947052, "logps/chosen": -141.95361328125, "logps/rejected": -95.701171875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 0.19614258408546448, "rewards/margins": 4.762972354888916, "rewards/rejected": -4.566829681396484, "step": 3522 }, { "epoch": 0.78, "learning_rate": 9.979774956964569e-06, "logits/chosen": -0.9433490037918091, "logits/rejected": -1.0526597499847412, "logps/chosen": -214.86669921875, "logps/rejected": -151.13076782226562, "loss": 0.1192, "rewards/accuracies": 1.0, "rewards/chosen": -4.105995178222656, "rewards/margins": 1.319526195526123, "rewards/rejected": -5.425521373748779, "step": 3523 }, { "epoch": 0.78, "learning_rate": 9.979613590036108e-06, "logits/chosen": -1.214369535446167, "logits/rejected": -1.147699236869812, "logps/chosen": -120.1320571899414, "logps/rejected": -160.16317749023438, "loss": 0.028, "rewards/accuracies": 1.0, "rewards/chosen": -1.6124985218048096, "rewards/margins": 2.85512375831604, "rewards/rejected": -4.46762228012085, "step": 3524 }, { "epoch": 0.78, "learning_rate": 9.979451583239272e-06, "logits/chosen": -1.0579938888549805, "logits/rejected": -1.0121877193450928, "logps/chosen": -85.93938446044922, "logps/rejected": -138.88653564453125, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -2.028806447982788, "rewards/margins": 3.2880208492279053, "rewards/rejected": -5.316827297210693, "step": 3525 }, { "epoch": 0.78, "learning_rate": 9.979288936594877e-06, "logits/chosen": -0.9351726174354553, "logits/rejected": -0.9332887530326843, "logps/chosen": -125.52149963378906, "logps/rejected": -115.0776596069336, "loss": 0.851, "rewards/accuracies": 1.0, "rewards/chosen": -0.9922996759414673, "rewards/margins": 2.2471351623535156, "rewards/rejected": -3.2394349575042725, "step": 3526 }, { "epoch": 0.78, "learning_rate": 9.979125650123824e-06, "logits/chosen": -1.1185219287872314, "logits/rejected": -1.0833781957626343, "logps/chosen": -91.52488708496094, "logps/rejected": -140.2908172607422, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -0.6345199942588806, "rewards/margins": 4.04473876953125, "rewards/rejected": -4.679258823394775, "step": 3527 }, { "epoch": 0.78, "learning_rate": 9.978961723847093e-06, "logits/chosen": -0.843296229839325, "logits/rejected": -0.7780508995056152, "logps/chosen": -107.655517578125, "logps/rejected": -150.38873291015625, "loss": 0.3478, "rewards/accuracies": 1.0, "rewards/chosen": 0.42153167724609375, "rewards/margins": 5.993324279785156, "rewards/rejected": -5.5717926025390625, "step": 3528 }, { "epoch": 0.78, "learning_rate": 9.978797157785752e-06, "logits/chosen": -0.7249825596809387, "logits/rejected": -0.6155399680137634, "logps/chosen": -250.06126403808594, "logps/rejected": -403.70703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.1265609711408615, "rewards/margins": 8.529518127441406, "rewards/rejected": -8.656079292297363, "step": 3529 }, { "epoch": 0.78, "learning_rate": 9.978631951960942e-06, "logits/chosen": -0.9872393608093262, "logits/rejected": -0.9807484149932861, "logps/chosen": -104.91187286376953, "logps/rejected": -114.76580810546875, "loss": 0.6406, "rewards/accuracies": 0.0, "rewards/chosen": -2.70732045173645, "rewards/margins": -0.9491074085235596, "rewards/rejected": -1.7582130432128906, "step": 3530 }, { "epoch": 0.78, "learning_rate": 9.978466106393896e-06, "logits/chosen": -1.2932771444320679, "logits/rejected": -1.234829068183899, "logps/chosen": -89.89656066894531, "logps/rejected": -103.53466796875, "loss": 1.6597, "rewards/accuracies": 0.0, "rewards/chosen": -1.8726791143417358, "rewards/margins": -1.0710937976837158, "rewards/rejected": -0.8015853762626648, "step": 3531 }, { "epoch": 0.78, "learning_rate": 9.978299621105924e-06, "logits/chosen": -1.351305603981018, "logits/rejected": -1.2992949485778809, "logps/chosen": -104.2928695678711, "logps/rejected": -223.92416381835938, "loss": 0.4294, "rewards/accuracies": 1.0, "rewards/chosen": -0.4934028685092926, "rewards/margins": 1.0316886901855469, "rewards/rejected": -1.525091528892517, "step": 3532 }, { "epoch": 0.78, "learning_rate": 9.978132496118418e-06, "logits/chosen": -0.9229589104652405, "logits/rejected": -1.0540920495986938, "logps/chosen": -245.22152709960938, "logps/rejected": -107.79119873046875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.5435760617256165, "rewards/margins": 4.909689426422119, "rewards/rejected": -5.45326566696167, "step": 3533 }, { "epoch": 0.78, "learning_rate": 9.977964731452852e-06, "logits/chosen": -1.0658162832260132, "logits/rejected": -1.046987533569336, "logps/chosen": -144.47219848632812, "logps/rejected": -132.85433959960938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.47483229637146, "rewards/margins": 9.086311340332031, "rewards/rejected": -6.61147928237915, "step": 3534 }, { "epoch": 0.78, "learning_rate": 9.977796327130786e-06, "logits/chosen": -0.8965086340904236, "logits/rejected": -0.9240479469299316, "logps/chosen": -113.54607391357422, "logps/rejected": -87.66407012939453, "loss": 0.2445, "rewards/accuracies": 1.0, "rewards/chosen": -0.7520889639854431, "rewards/margins": 0.4642433524131775, "rewards/rejected": -1.2163323163986206, "step": 3535 }, { "epoch": 0.78, "learning_rate": 9.977627283173858e-06, "logits/chosen": -1.3100448846817017, "logits/rejected": -1.3195048570632935, "logps/chosen": -97.49122619628906, "logps/rejected": -137.6725616455078, "loss": 1.68, "rewards/accuracies": 1.0, "rewards/chosen": -1.4267364740371704, "rewards/margins": 3.028472900390625, "rewards/rejected": -4.455209255218506, "step": 3536 }, { "epoch": 0.78, "learning_rate": 9.97745759960379e-06, "logits/chosen": -0.7494063973426819, "logits/rejected": -0.6107776165008545, "logps/chosen": -91.34368896484375, "logps/rejected": -133.60848999023438, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -0.14408722519874573, "rewards/margins": 5.829087734222412, "rewards/rejected": -5.973175048828125, "step": 3537 }, { "epoch": 0.78, "learning_rate": 9.977287276442385e-06, "logits/chosen": -1.1352980136871338, "logits/rejected": -1.137962818145752, "logps/chosen": -58.08883285522461, "logps/rejected": -146.32513427734375, "loss": 0.1508, "rewards/accuracies": 1.0, "rewards/chosen": -0.23577384650707245, "rewards/margins": 5.688019275665283, "rewards/rejected": -5.923793315887451, "step": 3538 }, { "epoch": 0.78, "learning_rate": 9.97711631371153e-06, "logits/chosen": -0.8699241280555725, "logits/rejected": -0.8448229432106018, "logps/chosen": -168.73289489746094, "logps/rejected": -231.78195190429688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.059967041015625, "rewards/margins": 6.4521331787109375, "rewards/rejected": -6.5121002197265625, "step": 3539 }, { "epoch": 0.78, "learning_rate": 9.976944711433194e-06, "logits/chosen": -1.1659486293792725, "logits/rejected": -1.0271854400634766, "logps/chosen": -102.5504150390625, "logps/rejected": -235.7080535888672, "loss": 0.3279, "rewards/accuracies": 1.0, "rewards/chosen": -0.6566848754882812, "rewards/margins": 4.775860786437988, "rewards/rejected": -5.4325456619262695, "step": 3540 }, { "epoch": 0.78, "learning_rate": 9.976772469629428e-06, "logits/chosen": -0.7768205404281616, "logits/rejected": -0.7411426901817322, "logps/chosen": -186.89175415039062, "logps/rejected": -134.0682373046875, "loss": 0.1519, "rewards/accuracies": 1.0, "rewards/chosen": -0.9140228629112244, "rewards/margins": 1.8843748569488525, "rewards/rejected": -2.7983977794647217, "step": 3541 }, { "epoch": 0.78, "learning_rate": 9.976599588322362e-06, "logits/chosen": -1.3217158317565918, "logits/rejected": -1.3039811849594116, "logps/chosen": -96.12867736816406, "logps/rejected": -122.62919616699219, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": -0.74502032995224, "rewards/margins": 3.055917978286743, "rewards/rejected": -3.800938367843628, "step": 3542 }, { "epoch": 0.78, "learning_rate": 9.976426067534212e-06, "logits/chosen": -0.9303925633430481, "logits/rejected": -0.9579259157180786, "logps/chosen": -204.06068420410156, "logps/rejected": -220.49362182617188, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -1.107844591140747, "rewards/margins": 5.8041582107543945, "rewards/rejected": -6.9120025634765625, "step": 3543 }, { "epoch": 0.78, "learning_rate": 9.976251907287277e-06, "logits/chosen": -1.244232416152954, "logits/rejected": -1.3189297914505005, "logps/chosen": -96.78758239746094, "logps/rejected": -71.40895080566406, "loss": 2.8096, "rewards/accuracies": 1.0, "rewards/chosen": -1.7631638050079346, "rewards/margins": 2.2617952823638916, "rewards/rejected": -4.024959087371826, "step": 3544 }, { "epoch": 0.78, "learning_rate": 9.976077107603933e-06, "logits/chosen": -1.256837010383606, "logits/rejected": -1.272242784500122, "logps/chosen": -202.61268615722656, "logps/rejected": -271.7187805175781, "loss": 0.2821, "rewards/accuracies": 1.0, "rewards/chosen": -1.9337036609649658, "rewards/margins": 0.27685546875, "rewards/rejected": -2.210559129714966, "step": 3545 }, { "epoch": 0.78, "learning_rate": 9.975901668506644e-06, "logits/chosen": -1.1637232303619385, "logits/rejected": -1.1192423105239868, "logps/chosen": -111.06595611572266, "logps/rejected": -117.65522766113281, "loss": 0.0462, "rewards/accuracies": 1.0, "rewards/chosen": -0.4415939450263977, "rewards/margins": 2.894803762435913, "rewards/rejected": -3.336397647857666, "step": 3546 }, { "epoch": 0.79, "learning_rate": 9.97572559001795e-06, "logits/chosen": -1.071379542350769, "logits/rejected": -1.0098438262939453, "logps/chosen": -89.8702163696289, "logps/rejected": -140.84442138671875, "loss": 0.0796, "rewards/accuracies": 1.0, "rewards/chosen": -1.0069633722305298, "rewards/margins": 1.757302165031433, "rewards/rejected": -2.764265537261963, "step": 3547 }, { "epoch": 0.79, "learning_rate": 9.975548872160482e-06, "logits/chosen": -0.8681823015213013, "logits/rejected": -0.8385363817214966, "logps/chosen": -97.47760009765625, "logps/rejected": -127.35759735107422, "loss": 0.1144, "rewards/accuracies": 1.0, "rewards/chosen": -0.8157364130020142, "rewards/margins": 3.763547897338867, "rewards/rejected": -4.579284191131592, "step": 3548 }, { "epoch": 0.79, "learning_rate": 9.975371514956945e-06, "logits/chosen": -1.0910102128982544, "logits/rejected": -1.114667296409607, "logps/chosen": -152.82882690429688, "logps/rejected": -125.8094482421875, "loss": 0.2062, "rewards/accuracies": 1.0, "rewards/chosen": -0.209666445851326, "rewards/margins": 2.345498561859131, "rewards/rejected": -2.5551650524139404, "step": 3549 }, { "epoch": 0.79, "learning_rate": 9.975193518430127e-06, "logits/chosen": -1.2723428010940552, "logits/rejected": -1.3027278184890747, "logps/chosen": -94.32759094238281, "logps/rejected": -84.43687438964844, "loss": 0.1684, "rewards/accuracies": 1.0, "rewards/chosen": -1.4103195667266846, "rewards/margins": 2.8632724285125732, "rewards/rejected": -4.273591995239258, "step": 3550 }, { "epoch": 0.79, "learning_rate": 9.9750148826029e-06, "logits/chosen": -1.1236605644226074, "logits/rejected": -1.1332786083221436, "logps/chosen": -125.6363525390625, "logps/rejected": -146.59423828125, "loss": 0.0748, "rewards/accuracies": 1.0, "rewards/chosen": -0.6069580316543579, "rewards/margins": 1.8790725469589233, "rewards/rejected": -2.4860305786132812, "step": 3551 }, { "epoch": 0.79, "learning_rate": 9.974835607498224e-06, "logits/chosen": -1.2496601343154907, "logits/rejected": -1.324202060699463, "logps/chosen": -132.09597778320312, "logps/rejected": -89.00813293457031, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": 0.008129882626235485, "rewards/margins": 5.620673656463623, "rewards/rejected": -5.61254358291626, "step": 3552 }, { "epoch": 0.79, "learning_rate": 9.97465569313913e-06, "logits/chosen": -0.6144351959228516, "logits/rejected": -0.5116555690765381, "logps/chosen": -196.3855438232422, "logps/rejected": -410.3424377441406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.11672516167163849, "rewards/margins": 10.493059158325195, "rewards/rejected": -10.376334190368652, "step": 3553 }, { "epoch": 0.79, "learning_rate": 9.974475139548738e-06, "logits/chosen": -0.7995294332504272, "logits/rejected": -0.7638794183731079, "logps/chosen": -218.27569580078125, "logps/rejected": -201.046142578125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": 2.035762071609497, "rewards/margins": 6.187758445739746, "rewards/rejected": -4.15199613571167, "step": 3554 }, { "epoch": 0.79, "learning_rate": 9.97429394675025e-06, "logits/chosen": -0.7629119157791138, "logits/rejected": -0.726138174533844, "logps/chosen": -100.43301391601562, "logps/rejected": -88.71635437011719, "loss": 0.1293, "rewards/accuracies": 1.0, "rewards/chosen": -2.765392303466797, "rewards/margins": 1.2732148170471191, "rewards/rejected": -4.038607120513916, "step": 3555 }, { "epoch": 0.79, "learning_rate": 9.974112114766945e-06, "logits/chosen": -0.7671969532966614, "logits/rejected": -0.7813592553138733, "logps/chosen": -235.3174591064453, "logps/rejected": -257.47882080078125, "loss": 0.3553, "rewards/accuracies": 1.0, "rewards/chosen": -5.903051853179932, "rewards/margins": 0.33971548080444336, "rewards/rejected": -6.242767333984375, "step": 3556 }, { "epoch": 0.79, "learning_rate": 9.973929643622194e-06, "logits/chosen": -0.8946243524551392, "logits/rejected": -0.9094901084899902, "logps/chosen": -227.2541046142578, "logps/rejected": -218.65528869628906, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.155506893992424, "rewards/margins": 6.311722278594971, "rewards/rejected": -6.156215190887451, "step": 3557 }, { "epoch": 0.79, "learning_rate": 9.973746533339438e-06, "logits/chosen": -0.8780970573425293, "logits/rejected": -0.9686931371688843, "logps/chosen": -203.67831420898438, "logps/rejected": -156.06394958496094, "loss": 0.137, "rewards/accuracies": 1.0, "rewards/chosen": -3.6679656505584717, "rewards/margins": 1.1714532375335693, "rewards/rejected": -4.839418888092041, "step": 3558 }, { "epoch": 0.79, "learning_rate": 9.97356278394221e-06, "logits/chosen": -0.9916805028915405, "logits/rejected": -1.01570725440979, "logps/chosen": -124.22566223144531, "logps/rejected": -141.28176879882812, "loss": 0.8057, "rewards/accuracies": 1.0, "rewards/chosen": 0.19678421318531036, "rewards/margins": 2.392798662185669, "rewards/rejected": -2.196014404296875, "step": 3559 }, { "epoch": 0.79, "learning_rate": 9.973378395454121e-06, "logits/chosen": -1.2039209604263306, "logits/rejected": -1.2069259881973267, "logps/chosen": -70.59849548339844, "logps/rejected": -106.33442687988281, "loss": 0.0539, "rewards/accuracies": 1.0, "rewards/chosen": 0.3001518249511719, "rewards/margins": 2.608435869216919, "rewards/rejected": -2.308284044265747, "step": 3560 }, { "epoch": 0.79, "learning_rate": 9.973193367898863e-06, "logits/chosen": -0.7128708362579346, "logits/rejected": -0.6632288694381714, "logps/chosen": -109.99964904785156, "logps/rejected": -134.94412231445312, "loss": 0.1372, "rewards/accuracies": 1.0, "rewards/chosen": -2.8158607482910156, "rewards/margins": 1.156121015548706, "rewards/rejected": -3.9719817638397217, "step": 3561 }, { "epoch": 0.79, "learning_rate": 9.973007701300214e-06, "logits/chosen": -1.0300308465957642, "logits/rejected": -0.39021366834640503, "logps/chosen": -252.10829162597656, "logps/rejected": -623.8416748046875, "loss": 1.4767, "rewards/accuracies": 1.0, "rewards/chosen": -1.9484055042266846, "rewards/margins": 38.62641906738281, "rewards/rejected": -40.574825286865234, "step": 3562 }, { "epoch": 0.79, "learning_rate": 9.972821395682029e-06, "logits/chosen": -1.173223614692688, "logits/rejected": -0.7797195911407471, "logps/chosen": -259.77166748046875, "logps/rejected": -390.4157409667969, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -10.991055488586426, "rewards/margins": 4.126202583312988, "rewards/rejected": -15.117258071899414, "step": 3563 }, { "epoch": 0.79, "learning_rate": 9.972634451068248e-06, "logits/chosen": -0.7113274335861206, "logits/rejected": -0.7074534296989441, "logps/chosen": -125.22854614257812, "logps/rejected": -197.82127380371094, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -4.489486217498779, "rewards/margins": 5.24347448348999, "rewards/rejected": -9.73296070098877, "step": 3564 }, { "epoch": 0.79, "learning_rate": 9.972446867482896e-06, "logits/chosen": -1.086828589439392, "logits/rejected": -1.1067193746566772, "logps/chosen": -96.1309585571289, "logps/rejected": -67.04661560058594, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 0.9611656069755554, "rewards/margins": 5.425022602081299, "rewards/rejected": -4.463857173919678, "step": 3565 }, { "epoch": 0.79, "learning_rate": 9.972258644950074e-06, "logits/chosen": -0.8988329768180847, "logits/rejected": -0.8988329768180847, "logps/chosen": -96.6937026977539, "logps/rejected": -96.6937026977539, "loss": 0.4868, "rewards/accuracies": 0.0, "rewards/chosen": -5.472294807434082, "rewards/margins": 0.0, "rewards/rejected": -5.472294807434082, "step": 3566 }, { "epoch": 0.79, "learning_rate": 9.97206978349397e-06, "logits/chosen": -0.9156029224395752, "logits/rejected": -0.6701394319534302, "logps/chosen": -258.5457763671875, "logps/rejected": -267.0201416015625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -6.972962856292725, "rewards/margins": 6.0706868171691895, "rewards/rejected": -13.043649673461914, "step": 3567 }, { "epoch": 0.79, "learning_rate": 9.971880283138849e-06, "logits/chosen": -0.9935376048088074, "logits/rejected": -0.549490749835968, "logps/chosen": -142.3668975830078, "logps/rejected": -353.42181396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5453872680664062, "rewards/margins": 25.75572395324707, "rewards/rejected": -27.301111221313477, "step": 3568 }, { "epoch": 0.79, "learning_rate": 9.971690143909066e-06, "logits/chosen": -1.0371400117874146, "logits/rejected": -1.0547794103622437, "logps/chosen": -99.19490051269531, "logps/rejected": -77.89109802246094, "loss": 0.123, "rewards/accuracies": 1.0, "rewards/chosen": -2.2688300609588623, "rewards/margins": 1.288696050643921, "rewards/rejected": -3.557526111602783, "step": 3569 }, { "epoch": 0.79, "learning_rate": 9.971499365829049e-06, "logits/chosen": -1.0321232080459595, "logits/rejected": -1.1161749362945557, "logps/chosen": -184.01156616210938, "logps/rejected": -198.38418579101562, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": 0.6159027218818665, "rewards/margins": 4.132504463195801, "rewards/rejected": -3.5166015625, "step": 3570 }, { "epoch": 0.79, "learning_rate": 9.971307948923316e-06, "logits/chosen": -1.5239568948745728, "logits/rejected": -1.5124778747558594, "logps/chosen": -95.20635986328125, "logps/rejected": -108.4167709350586, "loss": 0.5255, "rewards/accuracies": 1.0, "rewards/chosen": -1.5103386640548706, "rewards/margins": 0.7812110185623169, "rewards/rejected": -2.2915496826171875, "step": 3571 }, { "epoch": 0.79, "learning_rate": 9.971115893216463e-06, "logits/chosen": -1.202815055847168, "logits/rejected": -1.23104989528656, "logps/chosen": -88.24482727050781, "logps/rejected": -141.73724365234375, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": 0.10272369533777237, "rewards/margins": 3.24809193611145, "rewards/rejected": -3.1453683376312256, "step": 3572 }, { "epoch": 0.79, "learning_rate": 9.970923198733167e-06, "logits/chosen": -0.9047147631645203, "logits/rejected": -0.913733720779419, "logps/chosen": -81.54315185546875, "logps/rejected": -177.93382263183594, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": 0.7320938110351562, "rewards/margins": 3.1108429431915283, "rewards/rejected": -2.378749132156372, "step": 3573 }, { "epoch": 0.79, "learning_rate": 9.97072986549819e-06, "logits/chosen": -1.2439335584640503, "logits/rejected": -1.230331540107727, "logps/chosen": -133.9328155517578, "logps/rejected": -128.9289093017578, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": 1.5323959589004517, "rewards/margins": 4.164665222167969, "rewards/rejected": -2.6322693824768066, "step": 3574 }, { "epoch": 0.79, "learning_rate": 9.970535893536375e-06, "logits/chosen": -1.2283570766448975, "logits/rejected": -1.2327333688735962, "logps/chosen": -149.3621063232422, "logps/rejected": -203.1636199951172, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 1.1104965209960938, "rewards/margins": 7.343235969543457, "rewards/rejected": -6.232739448547363, "step": 3575 }, { "epoch": 0.79, "learning_rate": 9.970341282872645e-06, "logits/chosen": -0.851209282875061, "logits/rejected": -0.8857782483100891, "logps/chosen": -86.45437622070312, "logps/rejected": -128.613525390625, "loss": 0.1314, "rewards/accuracies": 1.0, "rewards/chosen": 0.5634933710098267, "rewards/margins": 1.2155730724334717, "rewards/rejected": -0.6520797610282898, "step": 3576 }, { "epoch": 0.79, "learning_rate": 9.97014603353201e-06, "logits/chosen": -1.0052189826965332, "logits/rejected": -1.021016240119934, "logps/chosen": -188.01199340820312, "logps/rejected": -146.31326293945312, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -1.8214492797851562, "rewards/margins": 4.427853584289551, "rewards/rejected": -6.249302864074707, "step": 3577 }, { "epoch": 0.79, "learning_rate": 9.969950145539557e-06, "logits/chosen": -1.1051199436187744, "logits/rejected": -1.1244515180587769, "logps/chosen": -53.75802993774414, "logps/rejected": -78.5953598022461, "loss": 0.2092, "rewards/accuracies": 1.0, "rewards/chosen": -0.084228515625, "rewards/margins": 0.6844596862792969, "rewards/rejected": -0.7686882019042969, "step": 3578 }, { "epoch": 0.79, "learning_rate": 9.969753618920456e-06, "logits/chosen": -1.0965131521224976, "logits/rejected": -1.222585916519165, "logps/chosen": -235.79490661621094, "logps/rejected": -88.82440185546875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 2.530961751937866, "rewards/margins": 7.7454071044921875, "rewards/rejected": -5.214445114135742, "step": 3579 }, { "epoch": 0.79, "learning_rate": 9.969556453699966e-06, "logits/chosen": -0.779543399810791, "logits/rejected": -0.6658188700675964, "logps/chosen": -77.09180450439453, "logps/rejected": -273.0586853027344, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -2.0595157146453857, "rewards/margins": 3.3109829425811768, "rewards/rejected": -5.3704986572265625, "step": 3580 }, { "epoch": 0.79, "learning_rate": 9.969358649903415e-06, "logits/chosen": -0.973721444606781, "logits/rejected": -1.078174352645874, "logps/chosen": -306.0515441894531, "logps/rejected": -259.56695556640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.975451648235321, "rewards/margins": 8.630126953125, "rewards/rejected": -7.654675483703613, "step": 3581 }, { "epoch": 0.79, "learning_rate": 9.969160207556225e-06, "logits/chosen": -0.9742686748504639, "logits/rejected": -0.9327457547187805, "logps/chosen": -98.12742614746094, "logps/rejected": -121.83169555664062, "loss": 0.131, "rewards/accuracies": 1.0, "rewards/chosen": -0.5544937252998352, "rewards/margins": 2.728398323059082, "rewards/rejected": -3.2828919887542725, "step": 3582 }, { "epoch": 0.79, "learning_rate": 9.968961126683893e-06, "logits/chosen": -0.8436738848686218, "logits/rejected": -0.8402147889137268, "logps/chosen": -128.1487274169922, "logps/rejected": -130.5013885498047, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": 1.7888519763946533, "rewards/margins": 4.183021545410156, "rewards/rejected": -2.394169569015503, "step": 3583 }, { "epoch": 0.79, "learning_rate": 9.968761407312002e-06, "logits/chosen": -1.3019561767578125, "logits/rejected": -1.4019761085510254, "logps/chosen": -127.49549865722656, "logps/rejected": -103.14772033691406, "loss": 0.2124, "rewards/accuracies": 1.0, "rewards/chosen": -1.8829482793807983, "rewards/margins": 0.637497067451477, "rewards/rejected": -2.5204453468322754, "step": 3584 }, { "epoch": 0.79, "learning_rate": 9.968561049466214e-06, "logits/chosen": -0.9955949187278748, "logits/rejected": -1.000897765159607, "logps/chosen": -117.15478515625, "logps/rejected": -69.78526306152344, "loss": 0.1031, "rewards/accuracies": 1.0, "rewards/chosen": -3.006821393966675, "rewards/margins": 1.4758903980255127, "rewards/rejected": -4.4827117919921875, "step": 3585 }, { "epoch": 0.79, "learning_rate": 9.968360053172275e-06, "logits/chosen": -1.213499665260315, "logits/rejected": -1.189173698425293, "logps/chosen": -220.137451171875, "logps/rejected": -227.04454040527344, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -3.026332139968872, "rewards/margins": 3.506065607070923, "rewards/rejected": -6.532397747039795, "step": 3586 }, { "epoch": 0.79, "learning_rate": 9.968158418456013e-06, "logits/chosen": -1.0752794742584229, "logits/rejected": -0.2664361298084259, "logps/chosen": -86.14111328125, "logps/rejected": -264.85870361328125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.9317611455917358, "rewards/margins": 16.197593688964844, "rewards/rejected": -18.12935447692871, "step": 3587 }, { "epoch": 0.79, "learning_rate": 9.967956145343339e-06, "logits/chosen": -1.0222059488296509, "logits/rejected": -1.02920401096344, "logps/chosen": -203.56658935546875, "logps/rejected": -165.70474243164062, "loss": 0.0763, "rewards/accuracies": 1.0, "rewards/chosen": -0.6812591552734375, "rewards/margins": 1.9883942604064941, "rewards/rejected": -2.6696534156799316, "step": 3588 }, { "epoch": 0.79, "learning_rate": 9.96775323386024e-06, "logits/chosen": -0.9487892389297485, "logits/rejected": -0.9487892389297485, "logps/chosen": -176.70742797851562, "logps/rejected": -176.70742797851562, "loss": 0.3738, "rewards/accuracies": 0.0, "rewards/chosen": -5.4939422607421875, "rewards/margins": 0.0, "rewards/rejected": -5.4939422607421875, "step": 3589 }, { "epoch": 0.79, "learning_rate": 9.967549684032796e-06, "logits/chosen": -0.9054948687553406, "logits/rejected": -0.8733164072036743, "logps/chosen": -98.09310913085938, "logps/rejected": -142.43194580078125, "loss": 0.4673, "rewards/accuracies": 0.0, "rewards/chosen": -0.6605438590049744, "rewards/margins": -0.32843631505966187, "rewards/rejected": -0.3321075439453125, "step": 3590 }, { "epoch": 0.79, "learning_rate": 9.967345495887157e-06, "logits/chosen": -0.87965327501297, "logits/rejected": -0.9289643168449402, "logps/chosen": -265.2511901855469, "logps/rejected": -143.6544952392578, "loss": 0.0476, "rewards/accuracies": 1.0, "rewards/chosen": -3.1774842739105225, "rewards/margins": 2.5661489963531494, "rewards/rejected": -5.743633270263672, "step": 3591 }, { "epoch": 0.8, "learning_rate": 9.967140669449562e-06, "logits/chosen": -1.0542353391647339, "logits/rejected": -1.0621463060379028, "logps/chosen": -90.89216613769531, "logps/rejected": -117.34428405761719, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 0.27304840087890625, "rewards/margins": 4.644748687744141, "rewards/rejected": -4.371700286865234, "step": 3592 }, { "epoch": 0.8, "learning_rate": 9.966935204746332e-06, "logits/chosen": -1.148919701576233, "logits/rejected": -1.1571451425552368, "logps/chosen": -110.30682373046875, "logps/rejected": -131.0041046142578, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": 1.4125183820724487, "rewards/margins": 4.336874485015869, "rewards/rejected": -2.92435622215271, "step": 3593 }, { "epoch": 0.8, "learning_rate": 9.966729101803872e-06, "logits/chosen": -1.2598726749420166, "logits/rejected": -1.4343265295028687, "logps/chosen": -187.769287109375, "logps/rejected": -127.77159118652344, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.530029296875, "rewards/margins": 5.890819072723389, "rewards/rejected": -5.360789775848389, "step": 3594 }, { "epoch": 0.8, "learning_rate": 9.966522360648659e-06, "logits/chosen": -0.8038485050201416, "logits/rejected": -0.6817718148231506, "logps/chosen": -160.97161865234375, "logps/rejected": -158.7443084716797, "loss": 1.1424, "rewards/accuracies": 0.0, "rewards/chosen": -1.1071289777755737, "rewards/margins": -2.172686815261841, "rewards/rejected": 1.065557837486267, "step": 3595 }, { "epoch": 0.8, "learning_rate": 9.966314981307261e-06, "logits/chosen": -0.8305411338806152, "logits/rejected": -0.839181661605835, "logps/chosen": -186.6145782470703, "logps/rejected": -151.35890197753906, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.86376953125, "rewards/margins": 4.489572048187256, "rewards/rejected": -5.353341579437256, "step": 3596 }, { "epoch": 0.8, "learning_rate": 9.96610696380633e-06, "logits/chosen": -1.2466646432876587, "logits/rejected": -0.7495834827423096, "logps/chosen": -124.16370391845703, "logps/rejected": -523.1580810546875, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -0.6022552847862244, "rewards/margins": 14.947400093078613, "rewards/rejected": -15.549654960632324, "step": 3597 }, { "epoch": 0.8, "learning_rate": 9.965898308172589e-06, "logits/chosen": -1.1835278272628784, "logits/rejected": -1.2206296920776367, "logps/chosen": -117.34624481201172, "logps/rejected": -71.50679016113281, "loss": 2.3311, "rewards/accuracies": 1.0, "rewards/chosen": -2.2167108058929443, "rewards/margins": 1.6734449863433838, "rewards/rejected": -3.890155792236328, "step": 3598 }, { "epoch": 0.8, "learning_rate": 9.965689014432854e-06, "logits/chosen": -0.8858517408370972, "logits/rejected": -0.9268622398376465, "logps/chosen": -201.04112243652344, "logps/rejected": -186.32691955566406, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 2.2447586059570312, "rewards/margins": 5.277496337890625, "rewards/rejected": -3.0327377319335938, "step": 3599 }, { "epoch": 0.8, "learning_rate": 9.965479082614019e-06, "logits/chosen": -1.8855952024459839, "logits/rejected": -1.8224624395370483, "logps/chosen": -185.7562713623047, "logps/rejected": -250.75811767578125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.3289551734924316, "rewards/margins": 6.399322986602783, "rewards/rejected": -8.728278160095215, "step": 3600 }, { "epoch": 0.8, "learning_rate": 9.965268512743058e-06, "logits/chosen": -1.1258273124694824, "logits/rejected": -1.1258273124694824, "logps/chosen": -83.02100372314453, "logps/rejected": -83.02100372314453, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.559383392333984, "rewards/margins": 0.0, "rewards/rejected": -4.559383392333984, "step": 3601 }, { "epoch": 0.8, "learning_rate": 9.965057304847029e-06, "logits/chosen": -1.150530457496643, "logits/rejected": -1.142439603805542, "logps/chosen": -92.6874771118164, "logps/rejected": -223.29026794433594, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": 0.3728736937046051, "rewards/margins": 9.528505325317383, "rewards/rejected": -9.155632019042969, "step": 3602 }, { "epoch": 0.8, "learning_rate": 9.964845458953072e-06, "logits/chosen": -1.0515528917312622, "logits/rejected": -1.0659189224243164, "logps/chosen": -229.283447265625, "logps/rejected": -211.1764373779297, "loss": 0.3054, "rewards/accuracies": 1.0, "rewards/chosen": 1.9512115716934204, "rewards/margins": 0.2069946527481079, "rewards/rejected": 1.7442169189453125, "step": 3603 }, { "epoch": 0.8, "learning_rate": 9.964632975088408e-06, "logits/chosen": -0.8547335267066956, "logits/rejected": -0.8453467488288879, "logps/chosen": -103.5937271118164, "logps/rejected": -106.27107238769531, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/chosen": -0.508588433265686, "rewards/margins": 3.254000663757324, "rewards/rejected": -3.7625892162323, "step": 3604 }, { "epoch": 0.8, "learning_rate": 9.964419853280343e-06, "logits/chosen": -0.8562764525413513, "logits/rejected": -0.8768197894096375, "logps/chosen": -188.6537628173828, "logps/rejected": -192.76080322265625, "loss": 0.9993, "rewards/accuracies": 0.0, "rewards/chosen": 1.0452651977539062, "rewards/margins": -1.8249740600585938, "rewards/rejected": 2.8702392578125, "step": 3605 }, { "epoch": 0.8, "learning_rate": 9.96420609355626e-06, "logits/chosen": -0.9156009554862976, "logits/rejected": -0.918293297290802, "logps/chosen": -76.54316711425781, "logps/rejected": -110.30662536621094, "loss": 1.8336, "rewards/accuracies": 1.0, "rewards/chosen": 0.102294921875, "rewards/margins": 1.9615195989608765, "rewards/rejected": -1.8592246770858765, "step": 3606 }, { "epoch": 0.8, "learning_rate": 9.963991695943627e-06, "logits/chosen": -0.73514324426651, "logits/rejected": -0.7470657825469971, "logps/chosen": -179.51007080078125, "logps/rejected": -116.4339370727539, "loss": 0.2619, "rewards/accuracies": 1.0, "rewards/chosen": -0.1055450439453125, "rewards/margins": 3.178518772125244, "rewards/rejected": -3.2840638160705566, "step": 3607 }, { "epoch": 0.8, "learning_rate": 9.963776660469996e-06, "logits/chosen": -0.8549723625183105, "logits/rejected": -0.9852457046508789, "logps/chosen": -98.32572937011719, "logps/rejected": -108.36817169189453, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7984833121299744, "rewards/margins": 6.492297172546387, "rewards/rejected": -7.290780544281006, "step": 3608 }, { "epoch": 0.8, "learning_rate": 9.963560987162994e-06, "logits/chosen": -1.0241901874542236, "logits/rejected": -1.0403310060501099, "logps/chosen": -197.42324829101562, "logps/rejected": -180.22561645507812, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -0.23782043159008026, "rewards/margins": 3.400683641433716, "rewards/rejected": -3.6385040283203125, "step": 3609 }, { "epoch": 0.8, "learning_rate": 9.96334467605034e-06, "logits/chosen": -0.8485105633735657, "logits/rejected": -0.8105887770652771, "logps/chosen": -87.72299194335938, "logps/rejected": -176.23167419433594, "loss": 0.0475, "rewards/accuracies": 1.0, "rewards/chosen": -1.062554955482483, "rewards/margins": 2.439889430999756, "rewards/rejected": -3.5024445056915283, "step": 3610 }, { "epoch": 0.8, "learning_rate": 9.963127727159825e-06, "logits/chosen": -1.2506372928619385, "logits/rejected": -1.2976044416427612, "logps/chosen": -120.13572692871094, "logps/rejected": -168.7976837158203, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": -0.8062103390693665, "rewards/margins": 3.5907280445098877, "rewards/rejected": -4.396938323974609, "step": 3611 }, { "epoch": 0.8, "learning_rate": 9.962910140519328e-06, "logits/chosen": -1.0132936239242554, "logits/rejected": -0.9995623230934143, "logps/chosen": -49.989898681640625, "logps/rejected": -66.84172821044922, "loss": 0.2016, "rewards/accuracies": 1.0, "rewards/chosen": -0.16488495469093323, "rewards/margins": 1.3935863971710205, "rewards/rejected": -1.5584713220596313, "step": 3612 }, { "epoch": 0.8, "learning_rate": 9.96269191615681e-06, "logits/chosen": -0.94829922914505, "logits/rejected": -1.0502405166625977, "logps/chosen": -208.08441162109375, "logps/rejected": -182.59219360351562, "loss": 0.0793, "rewards/accuracies": 1.0, "rewards/chosen": -0.6116897463798523, "rewards/margins": 2.1627533435821533, "rewards/rejected": -2.7744431495666504, "step": 3613 }, { "epoch": 0.8, "learning_rate": 9.96247305410031e-06, "logits/chosen": -1.0435469150543213, "logits/rejected": -1.0082695484161377, "logps/chosen": -65.72099304199219, "logps/rejected": -120.83908081054688, "loss": 0.0909, "rewards/accuracies": 1.0, "rewards/chosen": -0.36940422654151917, "rewards/margins": 2.3748743534088135, "rewards/rejected": -2.7442786693573, "step": 3614 }, { "epoch": 0.8, "learning_rate": 9.962253554377952e-06, "logits/chosen": -1.270927906036377, "logits/rejected": -1.263197898864746, "logps/chosen": -100.0920639038086, "logps/rejected": -90.5769271850586, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.5879532098770142, "rewards/margins": 4.817378520965576, "rewards/rejected": -6.405331611633301, "step": 3615 }, { "epoch": 0.8, "learning_rate": 9.96203341701794e-06, "logits/chosen": -0.9775381684303284, "logits/rejected": -0.9869553446769714, "logps/chosen": -89.06735229492188, "logps/rejected": -145.06805419921875, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": 0.2295883148908615, "rewards/margins": 4.293893814086914, "rewards/rejected": -4.064305305480957, "step": 3616 }, { "epoch": 0.8, "learning_rate": 9.961812642048563e-06, "logits/chosen": -1.2379658222198486, "logits/rejected": -1.233469843864441, "logps/chosen": -153.8540496826172, "logps/rejected": -98.91719055175781, "loss": 0.4543, "rewards/accuracies": 1.0, "rewards/chosen": -2.4143357276916504, "rewards/margins": 0.29628586769104004, "rewards/rejected": -2.7106215953826904, "step": 3617 }, { "epoch": 0.8, "learning_rate": 9.961591229498192e-06, "logits/chosen": -0.869638979434967, "logits/rejected": -0.8963245153427124, "logps/chosen": -177.20083618164062, "logps/rejected": -210.97450256347656, "loss": 0.179, "rewards/accuracies": 1.0, "rewards/chosen": -0.3804168701171875, "rewards/margins": 2.6433427333831787, "rewards/rejected": -3.023759603500366, "step": 3618 }, { "epoch": 0.8, "learning_rate": 9.96136917939527e-06, "logits/chosen": -1.1744166612625122, "logits/rejected": -1.173374891281128, "logps/chosen": -122.44123840332031, "logps/rejected": -60.3917236328125, "loss": 0.0792, "rewards/accuracies": 1.0, "rewards/chosen": -0.3854782283306122, "rewards/margins": 2.9220423698425293, "rewards/rejected": -3.307520627975464, "step": 3619 }, { "epoch": 0.8, "learning_rate": 9.961146491768338e-06, "logits/chosen": -1.1654607057571411, "logits/rejected": -0.7919734716415405, "logps/chosen": -253.3421173095703, "logps/rejected": -648.7728881835938, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.2591293454170227, "rewards/margins": 49.97526168823242, "rewards/rejected": -49.71613311767578, "step": 3620 }, { "epoch": 0.8, "learning_rate": 9.96092316664601e-06, "logits/chosen": -1.1728260517120361, "logits/rejected": -0.5462839007377625, "logps/chosen": -104.98624420166016, "logps/rejected": -749.734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.6356819272041321, "rewards/margins": 56.26054763793945, "rewards/rejected": -55.6248664855957, "step": 3621 }, { "epoch": 0.8, "learning_rate": 9.960699204056978e-06, "logits/chosen": -0.9354479312896729, "logits/rejected": -0.9175099730491638, "logps/chosen": -84.51579284667969, "logps/rejected": -103.58075714111328, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": 0.29461365938186646, "rewards/margins": 4.374950885772705, "rewards/rejected": -4.080337047576904, "step": 3622 }, { "epoch": 0.8, "learning_rate": 9.960474604030026e-06, "logits/chosen": -1.2755963802337646, "logits/rejected": -1.2789347171783447, "logps/chosen": -78.3005142211914, "logps/rejected": -98.40093231201172, "loss": 0.0714, "rewards/accuracies": 1.0, "rewards/chosen": 1.2459534406661987, "rewards/margins": 2.5270683765411377, "rewards/rejected": -1.281114935874939, "step": 3623 }, { "epoch": 0.8, "learning_rate": 9.96024936659401e-06, "logits/chosen": -0.9509494304656982, "logits/rejected": -0.9509494304656982, "logps/chosen": -103.86671447753906, "logps/rejected": -103.86671447753906, "loss": 0.3593, "rewards/accuracies": 0.0, "rewards/chosen": -2.118360996246338, "rewards/margins": 0.0, "rewards/rejected": -2.118360996246338, "step": 3624 }, { "epoch": 0.8, "learning_rate": 9.960023491777875e-06, "logits/chosen": -0.915697455406189, "logits/rejected": -0.5701372027397156, "logps/chosen": -66.85704803466797, "logps/rejected": -761.98291015625, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -4.135683536529541, "rewards/margins": 57.00804138183594, "rewards/rejected": -61.14372634887695, "step": 3625 }, { "epoch": 0.8, "learning_rate": 9.959796979610646e-06, "logits/chosen": -0.6948200464248657, "logits/rejected": -0.6898788213729858, "logps/chosen": -75.87919616699219, "logps/rejected": -111.13900756835938, "loss": 0.2148, "rewards/accuracies": 1.0, "rewards/chosen": -0.21177367866039276, "rewards/margins": 2.6812074184417725, "rewards/rejected": -2.8929810523986816, "step": 3626 }, { "epoch": 0.8, "learning_rate": 9.959569830121427e-06, "logits/chosen": -0.5385144352912903, "logits/rejected": -0.4886556565761566, "logps/chosen": -147.1453094482422, "logps/rejected": -174.46224975585938, "loss": 0.1001, "rewards/accuracies": 1.0, "rewards/chosen": 2.025041341781616, "rewards/margins": 1.5602678060531616, "rewards/rejected": 0.464773565530777, "step": 3627 }, { "epoch": 0.8, "learning_rate": 9.959342043339406e-06, "logits/chosen": -0.8349047899246216, "logits/rejected": -0.817500650882721, "logps/chosen": -124.96038818359375, "logps/rejected": -130.61553955078125, "loss": 0.2761, "rewards/accuracies": 1.0, "rewards/chosen": -1.4366363286972046, "rewards/margins": 0.5548866987228394, "rewards/rejected": -1.991523027420044, "step": 3628 }, { "epoch": 0.8, "learning_rate": 9.959113619293857e-06, "logits/chosen": -0.8148269057273865, "logits/rejected": -0.7273417711257935, "logps/chosen": -154.858154296875, "logps/rejected": -295.6640625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.5843734741210938, "rewards/margins": 5.303460597991943, "rewards/rejected": -5.887834072113037, "step": 3629 }, { "epoch": 0.8, "learning_rate": 9.958884558014128e-06, "logits/chosen": -0.9270634651184082, "logits/rejected": -0.20656488835811615, "logps/chosen": -91.56268310546875, "logps/rejected": -290.3195495605469, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.021955108270049095, "rewards/margins": 16.73817253112793, "rewards/rejected": -16.716217041015625, "step": 3630 }, { "epoch": 0.8, "learning_rate": 9.958654859529654e-06, "logits/chosen": -0.9083372950553894, "logits/rejected": -0.9132512211799622, "logps/chosen": -67.66328430175781, "logps/rejected": -130.0302734375, "loss": 1.121, "rewards/accuracies": 1.0, "rewards/chosen": 0.9404823184013367, "rewards/margins": 3.2316009998321533, "rewards/rejected": -2.291118621826172, "step": 3631 }, { "epoch": 0.8, "learning_rate": 9.958424523869952e-06, "logits/chosen": -0.9505008459091187, "logits/rejected": -0.9896199107170105, "logps/chosen": -205.46551513671875, "logps/rejected": -166.26116943359375, "loss": 0.0661, "rewards/accuracies": 1.0, "rewards/chosen": -0.12169494479894638, "rewards/margins": 1.9714738130569458, "rewards/rejected": -2.0931687355041504, "step": 3632 }, { "epoch": 0.8, "learning_rate": 9.958193551064617e-06, "logits/chosen": -0.7654458284378052, "logits/rejected": -0.7645830512046814, "logps/chosen": -41.11368942260742, "logps/rejected": -19.93262481689453, "loss": 0.6049, "rewards/accuracies": 0.0, "rewards/chosen": -1.226295828819275, "rewards/margins": -0.36993736028671265, "rewards/rejected": -0.8563584685325623, "step": 3633 }, { "epoch": 0.8, "learning_rate": 9.95796194114333e-06, "logits/chosen": -1.0433557033538818, "logits/rejected": -1.02468740940094, "logps/chosen": -78.14527130126953, "logps/rejected": -94.697509765625, "loss": 0.1041, "rewards/accuracies": 1.0, "rewards/chosen": -0.6674522757530212, "rewards/margins": 3.68332576751709, "rewards/rejected": -4.350778102874756, "step": 3634 }, { "epoch": 0.8, "learning_rate": 9.957729694135852e-06, "logits/chosen": -0.6082669496536255, "logits/rejected": -0.5921967029571533, "logps/chosen": -89.53834533691406, "logps/rejected": -152.12416076660156, "loss": 0.091, "rewards/accuracies": 1.0, "rewards/chosen": -1.8744087219238281, "rewards/margins": 1.61328125, "rewards/rejected": -3.487689971923828, "step": 3635 }, { "epoch": 0.8, "learning_rate": 9.957496810072027e-06, "logits/chosen": -1.298524022102356, "logits/rejected": -1.519343376159668, "logps/chosen": -238.24853515625, "logps/rejected": -73.81915283203125, "loss": 0.2242, "rewards/accuracies": 1.0, "rewards/chosen": -1.789819359779358, "rewards/margins": 1.476171612739563, "rewards/rejected": -3.265990972518921, "step": 3636 }, { "epoch": 0.81, "learning_rate": 9.957263288981779e-06, "logits/chosen": -1.0995886325836182, "logits/rejected": -1.0995886325836182, "logps/chosen": -107.9508056640625, "logps/rejected": -107.9508056640625, "loss": 0.3551, "rewards/accuracies": 0.0, "rewards/chosen": -2.940516710281372, "rewards/margins": 0.0, "rewards/rejected": -2.940516710281372, "step": 3637 }, { "epoch": 0.81, "learning_rate": 9.957029130895116e-06, "logits/chosen": -0.7063688635826111, "logits/rejected": -0.6793454885482788, "logps/chosen": -186.94508361816406, "logps/rejected": -265.15020751953125, "loss": 0.3694, "rewards/accuracies": 0.0, "rewards/chosen": -3.494206190109253, "rewards/margins": -0.05812835693359375, "rewards/rejected": -3.436077833175659, "step": 3638 }, { "epoch": 0.81, "learning_rate": 9.956794335842126e-06, "logits/chosen": -1.4550093412399292, "logits/rejected": -1.599425196647644, "logps/chosen": -180.80987548828125, "logps/rejected": -136.7991943359375, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": 1.1847671270370483, "rewards/margins": 8.232463836669922, "rewards/rejected": -7.047696590423584, "step": 3639 }, { "epoch": 0.81, "learning_rate": 9.956558903852978e-06, "logits/chosen": -0.9776795506477356, "logits/rejected": -0.9748443961143494, "logps/chosen": -50.008079528808594, "logps/rejected": -103.84671783447266, "loss": 0.1854, "rewards/accuracies": 1.0, "rewards/chosen": -0.0660938248038292, "rewards/margins": 0.9250729084014893, "rewards/rejected": -0.9911667108535767, "step": 3640 }, { "epoch": 0.81, "learning_rate": 9.956322834957929e-06, "logits/chosen": -1.0836697816848755, "logits/rejected": -1.0638352632522583, "logps/chosen": -111.04845428466797, "logps/rejected": -107.16842651367188, "loss": 0.5473, "rewards/accuracies": 0.0, "rewards/chosen": -1.0854698419570923, "rewards/margins": -0.6771888732910156, "rewards/rejected": -0.4082809388637543, "step": 3641 }, { "epoch": 0.81, "learning_rate": 9.956086129187308e-06, "logits/chosen": -1.1526509523391724, "logits/rejected": -1.1428498029708862, "logps/chosen": -118.57199096679688, "logps/rejected": -140.2338409423828, "loss": 0.0784, "rewards/accuracies": 1.0, "rewards/chosen": -0.8803039789199829, "rewards/margins": 1.9821730852127075, "rewards/rejected": -2.8624770641326904, "step": 3642 }, { "epoch": 0.81, "learning_rate": 9.955848786571534e-06, "logits/chosen": -1.1029523611068726, "logits/rejected": -1.0690217018127441, "logps/chosen": -75.01168823242188, "logps/rejected": -91.89421081542969, "loss": 0.1796, "rewards/accuracies": 1.0, "rewards/chosen": 0.4851264953613281, "rewards/margins": 2.1545839309692383, "rewards/rejected": -1.6694573163986206, "step": 3643 }, { "epoch": 0.81, "learning_rate": 9.955610807141105e-06, "logits/chosen": -1.2838369607925415, "logits/rejected": -1.3105766773223877, "logps/chosen": -88.48104858398438, "logps/rejected": -127.00311279296875, "loss": 0.0392, "rewards/accuracies": 1.0, "rewards/chosen": -0.07440948486328125, "rewards/margins": 3.83280873298645, "rewards/rejected": -3.9072182178497314, "step": 3644 }, { "epoch": 0.81, "learning_rate": 9.9553721909266e-06, "logits/chosen": -0.9351306557655334, "logits/rejected": -0.720583438873291, "logps/chosen": -126.58586883544922, "logps/rejected": -724.1094970703125, "loss": 0.0308, "rewards/accuracies": 1.0, "rewards/chosen": -0.7391144037246704, "rewards/margins": 56.2147216796875, "rewards/rejected": -56.953834533691406, "step": 3645 }, { "epoch": 0.81, "learning_rate": 9.95513293795868e-06, "logits/chosen": -1.2226223945617676, "logits/rejected": -1.3243601322174072, "logps/chosen": -213.7388916015625, "logps/rejected": -89.68331909179688, "loss": 0.0595, "rewards/accuracies": 1.0, "rewards/chosen": -2.656359910964966, "rewards/margins": 2.087045907974243, "rewards/rejected": -4.743405818939209, "step": 3646 }, { "epoch": 0.81, "learning_rate": 9.95489304826809e-06, "logits/chosen": -1.1271394491195679, "logits/rejected": -1.2215797901153564, "logps/chosen": -236.71151733398438, "logps/rejected": -129.09707641601562, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": 0.9935516715049744, "rewards/margins": 4.591573238372803, "rewards/rejected": -3.5980217456817627, "step": 3647 }, { "epoch": 0.81, "learning_rate": 9.954652521885656e-06, "logits/chosen": -0.9863665699958801, "logits/rejected": -0.9695910811424255, "logps/chosen": -111.37583923339844, "logps/rejected": -217.344482421875, "loss": 0.2259, "rewards/accuracies": 1.0, "rewards/chosen": 0.09986495971679688, "rewards/margins": 0.6427574157714844, "rewards/rejected": -0.5428924560546875, "step": 3648 }, { "epoch": 0.81, "learning_rate": 9.954411358842282e-06, "logits/chosen": -1.0326870679855347, "logits/rejected": -1.0662740468978882, "logps/chosen": -111.52680206298828, "logps/rejected": -105.66979217529297, "loss": 2.0083, "rewards/accuracies": 0.0, "rewards/chosen": -1.6960563659667969, "rewards/margins": -0.24266433715820312, "rewards/rejected": -1.4533920288085938, "step": 3649 }, { "epoch": 0.81, "learning_rate": 9.954169559168958e-06, "logits/chosen": -0.963657796382904, "logits/rejected": -1.1445432901382446, "logps/chosen": -213.30056762695312, "logps/rejected": -157.8560028076172, "loss": 0.9063, "rewards/accuracies": 1.0, "rewards/chosen": 0.10112304985523224, "rewards/margins": 4.1318206787109375, "rewards/rejected": -4.030697822570801, "step": 3650 }, { "epoch": 0.81, "learning_rate": 9.953927122896756e-06, "logits/chosen": -1.1496188640594482, "logits/rejected": -1.1496188640594482, "logps/chosen": -191.45848083496094, "logps/rejected": -191.45848083496094, "loss": 0.5229, "rewards/accuracies": 0.0, "rewards/chosen": -2.6128463745117188, "rewards/margins": 0.0, "rewards/rejected": -2.6128463745117188, "step": 3651 }, { "epoch": 0.81, "learning_rate": 9.953684050056827e-06, "logits/chosen": -0.9338581562042236, "logits/rejected": -0.911041259765625, "logps/chosen": -84.19813537597656, "logps/rejected": -125.08464813232422, "loss": 0.2706, "rewards/accuracies": 1.0, "rewards/chosen": -1.374603271484375, "rewards/margins": 0.33177411556243896, "rewards/rejected": -1.706377387046814, "step": 3652 }, { "epoch": 0.81, "learning_rate": 9.953440340680407e-06, "logits/chosen": -1.0361400842666626, "logits/rejected": -0.9941890239715576, "logps/chosen": -82.73489379882812, "logps/rejected": -196.84579467773438, "loss": 1.2246, "rewards/accuracies": 1.0, "rewards/chosen": 1.337977647781372, "rewards/margins": 4.621467590332031, "rewards/rejected": -3.283489942550659, "step": 3653 }, { "epoch": 0.81, "learning_rate": 9.95319599479881e-06, "logits/chosen": -1.0386650562286377, "logits/rejected": -1.0168614387512207, "logps/chosen": -107.79570770263672, "logps/rejected": -189.08900451660156, "loss": 0.2242, "rewards/accuracies": 1.0, "rewards/chosen": -0.7870048880577087, "rewards/margins": 0.5834526419639587, "rewards/rejected": -1.3704575300216675, "step": 3654 }, { "epoch": 0.81, "learning_rate": 9.952951012443434e-06, "logits/chosen": -0.522950291633606, "logits/rejected": -0.5616388320922852, "logps/chosen": -175.37435913085938, "logps/rejected": -185.42391967773438, "loss": 0.5419, "rewards/accuracies": 0.0, "rewards/chosen": 1.8085311651229858, "rewards/margins": -0.6501115560531616, "rewards/rejected": 2.4586427211761475, "step": 3655 }, { "epoch": 0.81, "learning_rate": 9.952705393645761e-06, "logits/chosen": -1.193259596824646, "logits/rejected": -1.193259596824646, "logps/chosen": -199.82667541503906, "logps/rejected": -199.82667541503906, "loss": 0.3473, "rewards/accuracies": 0.0, "rewards/chosen": -2.4172561168670654, "rewards/margins": 0.0, "rewards/rejected": -2.4172561168670654, "step": 3656 }, { "epoch": 0.81, "learning_rate": 9.952459138437352e-06, "logits/chosen": -1.233780860900879, "logits/rejected": -1.2182639837265015, "logps/chosen": -76.39737701416016, "logps/rejected": -111.52611541748047, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": 0.45028457045555115, "rewards/margins": 3.160146951675415, "rewards/rejected": -2.709862470626831, "step": 3657 }, { "epoch": 0.81, "learning_rate": 9.952212246849847e-06, "logits/chosen": -0.7203667759895325, "logits/rejected": -0.7152039408683777, "logps/chosen": -162.42222595214844, "logps/rejected": -254.7560577392578, "loss": 0.2523, "rewards/accuracies": 1.0, "rewards/chosen": -0.9737884402275085, "rewards/margins": 0.4265076518058777, "rewards/rejected": -1.4002960920333862, "step": 3658 }, { "epoch": 0.81, "learning_rate": 9.951964718914972e-06, "logits/chosen": -0.7771387100219727, "logits/rejected": -0.7655937671661377, "logps/chosen": -214.60130310058594, "logps/rejected": -182.62985229492188, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": 0.9462143182754517, "rewards/margins": 5.1895952224731445, "rewards/rejected": -4.243381023406982, "step": 3659 }, { "epoch": 0.81, "learning_rate": 9.951716554664537e-06, "logits/chosen": -0.8751806020736694, "logits/rejected": -0.8746663928031921, "logps/chosen": -78.6042251586914, "logps/rejected": -53.70088195800781, "loss": 0.2206, "rewards/accuracies": 1.0, "rewards/chosen": 0.6166038513183594, "rewards/margins": 0.6096245050430298, "rewards/rejected": 0.0069793700240552425, "step": 3660 }, { "epoch": 0.81, "learning_rate": 9.951467754130429e-06, "logits/chosen": -1.396636724472046, "logits/rejected": -1.382492184638977, "logps/chosen": -80.85234069824219, "logps/rejected": -109.3034896850586, "loss": 0.1006, "rewards/accuracies": 1.0, "rewards/chosen": 0.42386171221733093, "rewards/margins": 1.514722466468811, "rewards/rejected": -1.0908607244491577, "step": 3661 }, { "epoch": 0.81, "learning_rate": 9.951218317344615e-06, "logits/chosen": -1.2092669010162354, "logits/rejected": -1.2182083129882812, "logps/chosen": -85.81829833984375, "logps/rejected": -97.02928924560547, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": 0.04490203782916069, "rewards/margins": 6.067531585693359, "rewards/rejected": -6.022629737854004, "step": 3662 }, { "epoch": 0.81, "learning_rate": 9.950968244339152e-06, "logits/chosen": -0.48983851075172424, "logits/rejected": -0.48983851075172424, "logps/chosen": -91.01469421386719, "logps/rejected": -91.01469421386719, "loss": 0.4407, "rewards/accuracies": 0.0, "rewards/chosen": -3.6022706031799316, "rewards/margins": 0.0, "rewards/rejected": -3.6022706031799316, "step": 3663 }, { "epoch": 0.81, "learning_rate": 9.95071753514617e-06, "logits/chosen": -0.7716497778892517, "logits/rejected": -0.7763705253601074, "logps/chosen": -189.9572296142578, "logps/rejected": -163.02883911132812, "loss": 0.3388, "rewards/accuracies": 1.0, "rewards/chosen": 0.02783966064453125, "rewards/margins": 0.09055023640394211, "rewards/rejected": -0.06271057575941086, "step": 3664 }, { "epoch": 0.81, "learning_rate": 9.950466189797885e-06, "logits/chosen": -0.9495805501937866, "logits/rejected": -0.9402901530265808, "logps/chosen": -102.73583984375, "logps/rejected": -161.71253967285156, "loss": 0.0426, "rewards/accuracies": 1.0, "rewards/chosen": 0.32007598876953125, "rewards/margins": 2.4444077014923096, "rewards/rejected": -2.1243317127227783, "step": 3665 }, { "epoch": 0.81, "learning_rate": 9.950214208326598e-06, "logits/chosen": -0.9674314856529236, "logits/rejected": -1.006905436515808, "logps/chosen": -109.3974609375, "logps/rejected": -76.09202575683594, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.7517113089561462, "rewards/margins": 4.764556884765625, "rewards/rejected": -5.516268253326416, "step": 3666 }, { "epoch": 0.81, "learning_rate": 9.949961590764682e-06, "logits/chosen": -1.0647523403167725, "logits/rejected": -1.10403573513031, "logps/chosen": -140.80233764648438, "logps/rejected": -112.93142700195312, "loss": 0.0659, "rewards/accuracies": 1.0, "rewards/chosen": 0.7857696413993835, "rewards/margins": 1.9914093017578125, "rewards/rejected": -1.2056397199630737, "step": 3667 }, { "epoch": 0.81, "learning_rate": 9.949708337144603e-06, "logits/chosen": -0.7881942391395569, "logits/rejected": -0.8361149430274963, "logps/chosen": -190.71621704101562, "logps/rejected": -116.13813781738281, "loss": 0.8851, "rewards/accuracies": 0.0, "rewards/chosen": -0.8473846316337585, "rewards/margins": -1.527696967124939, "rewards/rejected": 0.6803123354911804, "step": 3668 }, { "epoch": 0.81, "learning_rate": 9.949454447498901e-06, "logits/chosen": -0.9797618985176086, "logits/rejected": -0.9912821054458618, "logps/chosen": -113.04957580566406, "logps/rejected": -252.51040649414062, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.03261718899011612, "rewards/margins": 5.455493450164795, "rewards/rejected": -5.488110542297363, "step": 3669 }, { "epoch": 0.81, "learning_rate": 9.949199921860202e-06, "logits/chosen": -0.7663719654083252, "logits/rejected": -0.8317806720733643, "logps/chosen": -133.1513671875, "logps/rejected": -45.98701858520508, "loss": 0.0489, "rewards/accuracies": 1.0, "rewards/chosen": -0.7074036002159119, "rewards/margins": 2.290044069290161, "rewards/rejected": -2.9974477291107178, "step": 3670 }, { "epoch": 0.81, "learning_rate": 9.94894476026121e-06, "logits/chosen": -1.1238291263580322, "logits/rejected": -1.063966989517212, "logps/chosen": -79.84622955322266, "logps/rejected": -120.39830780029297, "loss": 0.1113, "rewards/accuracies": 1.0, "rewards/chosen": 0.3241081237792969, "rewards/margins": 1.3906601667404175, "rewards/rejected": -1.0665520429611206, "step": 3671 }, { "epoch": 0.81, "learning_rate": 9.948688962734711e-06, "logits/chosen": -0.9037010669708252, "logits/rejected": -0.574036180973053, "logps/chosen": -276.5205078125, "logps/rejected": -567.6835327148438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.24456481635570526, "rewards/margins": 47.03784942626953, "rewards/rejected": -46.79328536987305, "step": 3672 }, { "epoch": 0.81, "learning_rate": 9.94843252931358e-06, "logits/chosen": -0.9354564547538757, "logits/rejected": -0.9354564547538757, "logps/chosen": -174.37930297851562, "logps/rejected": -174.37930297851562, "loss": 0.4113, "rewards/accuracies": 0.0, "rewards/chosen": -2.1398346424102783, "rewards/margins": 0.0, "rewards/rejected": -2.1398346424102783, "step": 3673 }, { "epoch": 0.81, "learning_rate": 9.948175460030762e-06, "logits/chosen": -0.8389997482299805, "logits/rejected": -0.9185761213302612, "logps/chosen": -171.64964294433594, "logps/rejected": -144.23953247070312, "loss": 0.1201, "rewards/accuracies": 1.0, "rewards/chosen": 1.8751205205917358, "rewards/margins": 6.085919380187988, "rewards/rejected": -4.210798740386963, "step": 3674 }, { "epoch": 0.81, "learning_rate": 9.947917754919293e-06, "logits/chosen": -1.1398667097091675, "logits/rejected": -1.1661964654922485, "logps/chosen": -223.05828857421875, "logps/rejected": -147.54098510742188, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 2.4283387660980225, "rewards/margins": 5.310775279998779, "rewards/rejected": -2.882436513900757, "step": 3675 }, { "epoch": 0.81, "learning_rate": 9.947659414012287e-06, "logits/chosen": -0.6315803527832031, "logits/rejected": -0.6241253614425659, "logps/chosen": -122.59527587890625, "logps/rejected": -188.55203247070312, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -0.19028472900390625, "rewards/margins": 4.6197190284729, "rewards/rejected": -4.810003757476807, "step": 3676 }, { "epoch": 0.81, "learning_rate": 9.94740043734294e-06, "logits/chosen": -0.8879587650299072, "logits/rejected": -0.8829755783081055, "logps/chosen": -71.54586791992188, "logps/rejected": -128.3392791748047, "loss": 0.2244, "rewards/accuracies": 1.0, "rewards/chosen": -1.0173416137695312, "rewards/margins": 0.6480636596679688, "rewards/rejected": -1.6654052734375, "step": 3677 }, { "epoch": 0.81, "learning_rate": 9.947140824944533e-06, "logits/chosen": -1.112012267112732, "logits/rejected": -1.1583442687988281, "logps/chosen": -114.95449829101562, "logps/rejected": -111.64149475097656, "loss": 0.1563, "rewards/accuracies": 1.0, "rewards/chosen": -0.9809479117393494, "rewards/margins": 2.898655652999878, "rewards/rejected": -3.879603624343872, "step": 3678 }, { "epoch": 0.81, "learning_rate": 9.946880576850418e-06, "logits/chosen": -1.5219087600708008, "logits/rejected": -1.4779306650161743, "logps/chosen": -73.656005859375, "logps/rejected": -125.29710388183594, "loss": 0.0772, "rewards/accuracies": 1.0, "rewards/chosen": 0.4303291440010071, "rewards/margins": 1.8278090953826904, "rewards/rejected": -1.3974800109863281, "step": 3679 }, { "epoch": 0.81, "learning_rate": 9.946619693094044e-06, "logits/chosen": -1.0561174154281616, "logits/rejected": -0.9717012643814087, "logps/chosen": -204.71620178222656, "logps/rejected": -236.57867431640625, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": 3.169271945953369, "rewards/margins": 3.2793595790863037, "rewards/rejected": -0.11008758842945099, "step": 3680 }, { "epoch": 0.81, "learning_rate": 9.94635817370893e-06, "logits/chosen": -1.10146164894104, "logits/rejected": -1.1037081480026245, "logps/chosen": -195.36485290527344, "logps/rejected": -267.892822265625, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": 1.6279922723770142, "rewards/margins": 6.092280387878418, "rewards/rejected": -4.464288234710693, "step": 3681 }, { "epoch": 0.81, "learning_rate": 9.94609601872868e-06, "logits/chosen": -0.8490332365036011, "logits/rejected": -1.0146960020065308, "logps/chosen": -197.15904235839844, "logps/rejected": -62.37122344970703, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": 0.6940414309501648, "rewards/margins": 3.2302887439727783, "rewards/rejected": -2.5362472534179688, "step": 3682 }, { "epoch": 0.82, "learning_rate": 9.945833228186984e-06, "logits/chosen": -1.0250052213668823, "logits/rejected": -1.0521074533462524, "logps/chosen": -153.26512145996094, "logps/rejected": -116.84967041015625, "loss": 0.1542, "rewards/accuracies": 1.0, "rewards/chosen": -0.14695587754249573, "rewards/margins": 1.082484483718872, "rewards/rejected": -1.2294403314590454, "step": 3683 }, { "epoch": 0.82, "learning_rate": 9.945569802117604e-06, "logits/chosen": -0.8618968725204468, "logits/rejected": -0.8618968725204468, "logps/chosen": -188.2188720703125, "logps/rejected": -188.2188720703125, "loss": 0.968, "rewards/accuracies": 0.0, "rewards/chosen": -3.0482819080352783, "rewards/margins": 0.0, "rewards/rejected": -3.0482819080352783, "step": 3684 }, { "epoch": 0.82, "learning_rate": 9.945305740554397e-06, "logits/chosen": -0.7390071153640747, "logits/rejected": -0.7622828483581543, "logps/chosen": -145.5643310546875, "logps/rejected": -134.7870330810547, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": -0.2944900691509247, "rewards/margins": 3.7377212047576904, "rewards/rejected": -4.0322113037109375, "step": 3685 }, { "epoch": 0.82, "learning_rate": 9.945041043531289e-06, "logits/chosen": -0.9534022808074951, "logits/rejected": -0.9496361613273621, "logps/chosen": -69.31478881835938, "logps/rejected": -149.71017456054688, "loss": 0.1058, "rewards/accuracies": 1.0, "rewards/chosen": 1.2079269886016846, "rewards/margins": 4.151551246643066, "rewards/rejected": -2.943624258041382, "step": 3686 }, { "epoch": 0.82, "learning_rate": 9.944775711082296e-06, "logits/chosen": -1.1317269802093506, "logits/rejected": -1.1654229164123535, "logps/chosen": -110.67220306396484, "logps/rejected": -152.06689453125, "loss": 0.4852, "rewards/accuracies": 1.0, "rewards/chosen": 0.11711349338293076, "rewards/margins": 3.249640703201294, "rewards/rejected": -3.1325271129608154, "step": 3687 }, { "epoch": 0.82, "learning_rate": 9.944509743241508e-06, "logits/chosen": -0.9630371332168579, "logits/rejected": -0.9709562063217163, "logps/chosen": -103.61437225341797, "logps/rejected": -136.3483428955078, "loss": 0.3813, "rewards/accuracies": 0.0, "rewards/chosen": 1.217433214187622, "rewards/margins": -0.1280151605606079, "rewards/rejected": 1.34544837474823, "step": 3688 }, { "epoch": 0.82, "learning_rate": 9.944243140043106e-06, "logits/chosen": -1.0223336219787598, "logits/rejected": -0.5440518260002136, "logps/chosen": -120.86856842041016, "logps/rejected": -622.5087280273438, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 1.1100701093673706, "rewards/margins": 32.09880828857422, "rewards/rejected": -30.988739013671875, "step": 3689 }, { "epoch": 0.82, "learning_rate": 9.943975901521347e-06, "logits/chosen": -0.9079505801200867, "logits/rejected": -0.9240054488182068, "logps/chosen": -107.21131896972656, "logps/rejected": -77.51166534423828, "loss": 0.0619, "rewards/accuracies": 1.0, "rewards/chosen": 0.09138946980237961, "rewards/margins": 2.112300395965576, "rewards/rejected": -2.0209109783172607, "step": 3690 }, { "epoch": 0.82, "learning_rate": 9.943708027710567e-06, "logits/chosen": -1.1772181987762451, "logits/rejected": -1.157886028289795, "logps/chosen": -85.85186004638672, "logps/rejected": -134.603515625, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": 0.6300804018974304, "rewards/margins": 3.7229485511779785, "rewards/rejected": -3.0928680896759033, "step": 3691 }, { "epoch": 0.82, "learning_rate": 9.943439518645193e-06, "logits/chosen": -0.806849479675293, "logits/rejected": -0.8144581317901611, "logps/chosen": -92.90841674804688, "logps/rejected": -68.83014678955078, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -0.7028419375419617, "rewards/margins": 3.1326122283935547, "rewards/rejected": -3.835454225540161, "step": 3692 }, { "epoch": 0.82, "learning_rate": 9.943170374359722e-06, "logits/chosen": -1.1886271238327026, "logits/rejected": -1.1702193021774292, "logps/chosen": -93.83912658691406, "logps/rejected": -162.99127197265625, "loss": 0.115, "rewards/accuracies": 1.0, "rewards/chosen": 1.0117660760879517, "rewards/margins": 1.3675094842910767, "rewards/rejected": -0.355743408203125, "step": 3693 }, { "epoch": 0.82, "learning_rate": 9.942900594888743e-06, "logits/chosen": -0.7479019165039062, "logits/rejected": -0.7916662693023682, "logps/chosen": -96.90625, "logps/rejected": -53.69178771972656, "loss": 0.0786, "rewards/accuracies": 1.0, "rewards/chosen": -0.9180183410644531, "rewards/margins": 2.240114688873291, "rewards/rejected": -3.158133029937744, "step": 3694 }, { "epoch": 0.82, "learning_rate": 9.94263018026692e-06, "logits/chosen": -0.8107254505157471, "logits/rejected": -0.7984756827354431, "logps/chosen": -179.09820556640625, "logps/rejected": -118.25863647460938, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": 1.843164086341858, "rewards/margins": 4.545253276824951, "rewards/rejected": -2.7020890712738037, "step": 3695 }, { "epoch": 0.82, "learning_rate": 9.942359130528998e-06, "logits/chosen": -0.9785401821136475, "logits/rejected": -1.0672553777694702, "logps/chosen": -273.494873046875, "logps/rejected": -152.85394287109375, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -0.0046905516646802425, "rewards/margins": 4.18682336807251, "rewards/rejected": -4.191514015197754, "step": 3696 }, { "epoch": 0.82, "learning_rate": 9.942087445709811e-06, "logits/chosen": -1.1889616250991821, "logits/rejected": -1.2005541324615479, "logps/chosen": -165.0968017578125, "logps/rejected": -94.43610382080078, "loss": 0.2623, "rewards/accuracies": 1.0, "rewards/chosen": -5.104584693908691, "rewards/margins": 0.38310670852661133, "rewards/rejected": -5.487691402435303, "step": 3697 }, { "epoch": 0.82, "learning_rate": 9.941815125844267e-06, "logits/chosen": -1.2088826894760132, "logits/rejected": -1.2088826894760132, "logps/chosen": -216.277587890625, "logps/rejected": -216.277587890625, "loss": 0.35, "rewards/accuracies": 0.0, "rewards/chosen": -3.787187337875366, "rewards/margins": 0.0, "rewards/rejected": -3.787187337875366, "step": 3698 }, { "epoch": 0.82, "learning_rate": 9.94154217096736e-06, "logits/chosen": -0.9633185863494873, "logits/rejected": -0.9415777325630188, "logps/chosen": -90.5184326171875, "logps/rejected": -116.11277770996094, "loss": 0.1501, "rewards/accuracies": 1.0, "rewards/chosen": 0.3682563900947571, "rewards/margins": 1.1296226978302002, "rewards/rejected": -0.7613663077354431, "step": 3699 }, { "epoch": 0.82, "learning_rate": 9.941268581114162e-06, "logits/chosen": -1.022897481918335, "logits/rejected": -1.0222244262695312, "logps/chosen": -74.561279296875, "logps/rejected": -58.982147216796875, "loss": 0.5238, "rewards/accuracies": 0.0, "rewards/chosen": -0.32813262939453125, "rewards/margins": -0.6046603918075562, "rewards/rejected": 0.2765277922153473, "step": 3700 }, { "epoch": 0.82, "learning_rate": 9.94099435631983e-06, "logits/chosen": -0.956520676612854, "logits/rejected": -0.9210883378982544, "logps/chosen": -86.56001281738281, "logps/rejected": -147.2296600341797, "loss": 0.5747, "rewards/accuracies": 0.0, "rewards/chosen": 0.6570450067520142, "rewards/margins": -0.40875244140625, "rewards/rejected": 1.0657974481582642, "step": 3701 }, { "epoch": 0.82, "learning_rate": 9.940719496619601e-06, "logits/chosen": -1.0823262929916382, "logits/rejected": -1.1289973258972168, "logps/chosen": -82.918701171875, "logps/rejected": -87.50306701660156, "loss": 0.1212, "rewards/accuracies": 1.0, "rewards/chosen": 0.655474841594696, "rewards/margins": 2.1332290172576904, "rewards/rejected": -1.4777542352676392, "step": 3702 }, { "epoch": 0.82, "learning_rate": 9.940444002048794e-06, "logits/chosen": -1.0419777631759644, "logits/rejected": -1.1313246488571167, "logps/chosen": -226.38409423828125, "logps/rejected": -100.04786682128906, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 1.7367095947265625, "rewards/margins": 7.158812999725342, "rewards/rejected": -5.422103404998779, "step": 3703 }, { "epoch": 0.82, "learning_rate": 9.94016787264281e-06, "logits/chosen": -1.051885724067688, "logits/rejected": -1.0031473636627197, "logps/chosen": -109.94984436035156, "logps/rejected": -151.6960906982422, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 1.571698784828186, "rewards/margins": 6.649430274963379, "rewards/rejected": -5.077731609344482, "step": 3704 }, { "epoch": 0.82, "learning_rate": 9.939891108437129e-06, "logits/chosen": -1.0247527360916138, "logits/rejected": -1.0527055263519287, "logps/chosen": -133.38055419921875, "logps/rejected": -98.7706298828125, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": 1.6638458967208862, "rewards/margins": 3.1263954639434814, "rewards/rejected": -1.4625495672225952, "step": 3705 }, { "epoch": 0.82, "learning_rate": 9.939613709467317e-06, "logits/chosen": -0.7706505060195923, "logits/rejected": -0.8254773616790771, "logps/chosen": -211.19073486328125, "logps/rejected": -151.55677795410156, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": -1.338616967201233, "rewards/margins": 3.0026049613952637, "rewards/rejected": -4.341221809387207, "step": 3706 }, { "epoch": 0.82, "learning_rate": 9.939335675769017e-06, "logits/chosen": -0.7867586612701416, "logits/rejected": -0.8401687741279602, "logps/chosen": -199.74673461914062, "logps/rejected": -152.54824829101562, "loss": 1.4548, "rewards/accuracies": 0.0, "rewards/chosen": -2.30419921875, "rewards/margins": -2.8385863304138184, "rewards/rejected": 0.5343872308731079, "step": 3707 }, { "epoch": 0.82, "learning_rate": 9.939057007377955e-06, "logits/chosen": -0.981609582901001, "logits/rejected": -0.9717496037483215, "logps/chosen": -183.3368377685547, "logps/rejected": -226.06114196777344, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 3.2503068447113037, "rewards/margins": 5.241067886352539, "rewards/rejected": -1.9907608032226562, "step": 3708 }, { "epoch": 0.82, "learning_rate": 9.938777704329943e-06, "logits/chosen": -0.9656473994255066, "logits/rejected": -0.98811274766922, "logps/chosen": -102.14664459228516, "logps/rejected": -121.87374877929688, "loss": 0.6943, "rewards/accuracies": 0.0, "rewards/chosen": -0.7211189270019531, "rewards/margins": -0.00455474853515625, "rewards/rejected": -0.7165641784667969, "step": 3709 }, { "epoch": 0.82, "learning_rate": 9.938497766660869e-06, "logits/chosen": -1.1091248989105225, "logits/rejected": -1.1238411664962769, "logps/chosen": -162.69497680664062, "logps/rejected": -125.60420989990234, "loss": 0.4231, "rewards/accuracies": 1.0, "rewards/chosen": 1.6742538213729858, "rewards/margins": 3.4930930137634277, "rewards/rejected": -1.8188393115997314, "step": 3710 }, { "epoch": 0.82, "learning_rate": 9.938217194406701e-06, "logits/chosen": -0.891391396522522, "logits/rejected": -0.9037539958953857, "logps/chosen": -97.31944274902344, "logps/rejected": -81.43408203125, "loss": 0.1942, "rewards/accuracies": 1.0, "rewards/chosen": 0.5331520438194275, "rewards/margins": 0.9731239676475525, "rewards/rejected": -0.439971923828125, "step": 3711 }, { "epoch": 0.82, "learning_rate": 9.937935987603497e-06, "logits/chosen": -0.9637729525566101, "logits/rejected": -1.0579748153686523, "logps/chosen": -159.99002075195312, "logps/rejected": -108.96241760253906, "loss": 0.0705, "rewards/accuracies": 1.0, "rewards/chosen": 0.5303711295127869, "rewards/margins": 5.688353061676025, "rewards/rejected": -5.157981872558594, "step": 3712 }, { "epoch": 0.82, "learning_rate": 9.937654146287388e-06, "logits/chosen": -1.3499774932861328, "logits/rejected": -1.3540027141571045, "logps/chosen": -117.23636627197266, "logps/rejected": -98.13565063476562, "loss": 0.313, "rewards/accuracies": 1.0, "rewards/chosen": 0.6665641665458679, "rewards/margins": 0.21125486493110657, "rewards/rejected": 0.45530930161476135, "step": 3713 }, { "epoch": 0.82, "learning_rate": 9.937371670494591e-06, "logits/chosen": -0.8359938263893127, "logits/rejected": -0.8439748287200928, "logps/chosen": -162.10365295410156, "logps/rejected": -157.15423583984375, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": 2.8708481788635254, "rewards/margins": 8.103294372558594, "rewards/rejected": -5.232446193695068, "step": 3714 }, { "epoch": 0.82, "learning_rate": 9.937088560261404e-06, "logits/chosen": -1.1694344282150269, "logits/rejected": -1.1505376100540161, "logps/chosen": -89.04703521728516, "logps/rejected": -137.08889770507812, "loss": 0.0637, "rewards/accuracies": 1.0, "rewards/chosen": 1.7673988342285156, "rewards/margins": 2.1852028369903564, "rewards/rejected": -0.41780397295951843, "step": 3715 }, { "epoch": 0.82, "learning_rate": 9.936804815624205e-06, "logits/chosen": -1.0577507019042969, "logits/rejected": -1.1003631353378296, "logps/chosen": -167.71865844726562, "logps/rejected": -108.6053237915039, "loss": 0.0334, "rewards/accuracies": 1.0, "rewards/chosen": 2.0053818225860596, "rewards/margins": 3.0411415100097656, "rewards/rejected": -1.0357598066329956, "step": 3716 }, { "epoch": 0.82, "learning_rate": 9.936520436619455e-06, "logits/chosen": -1.1293598413467407, "logits/rejected": -1.075061559677124, "logps/chosen": -114.462646484375, "logps/rejected": -142.2319793701172, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -0.20319823920726776, "rewards/margins": 6.0620527267456055, "rewards/rejected": -6.265251159667969, "step": 3717 }, { "epoch": 0.82, "learning_rate": 9.936235423283696e-06, "logits/chosen": -1.4035019874572754, "logits/rejected": -1.3892951011657715, "logps/chosen": -99.6414566040039, "logps/rejected": -92.37030029296875, "loss": 0.2683, "rewards/accuracies": 1.0, "rewards/chosen": -0.021570587530732155, "rewards/margins": 0.34319227933883667, "rewards/rejected": -0.3647628724575043, "step": 3718 }, { "epoch": 0.82, "learning_rate": 9.935949775653554e-06, "logits/chosen": -1.2071988582611084, "logits/rejected": -1.0425686836242676, "logps/chosen": -177.5906524658203, "logps/rejected": -343.98565673828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.3894805908203125, "rewards/margins": 11.696556091308594, "rewards/rejected": -11.307075500488281, "step": 3719 }, { "epoch": 0.82, "learning_rate": 9.935663493765726e-06, "logits/chosen": -0.9916160702705383, "logits/rejected": -1.009899616241455, "logps/chosen": -192.00286865234375, "logps/rejected": -136.87429809570312, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": 1.006384253501892, "rewards/margins": 3.970184326171875, "rewards/rejected": -2.9638001918792725, "step": 3720 }, { "epoch": 0.82, "learning_rate": 9.935376577657008e-06, "logits/chosen": -1.1802480220794678, "logits/rejected": -1.1457144021987915, "logps/chosen": -43.179832458496094, "logps/rejected": -85.65025329589844, "loss": 0.2022, "rewards/accuracies": 1.0, "rewards/chosen": 0.7922237515449524, "rewards/margins": 5.336700439453125, "rewards/rejected": -4.544476509094238, "step": 3721 }, { "epoch": 0.82, "learning_rate": 9.935089027364264e-06, "logits/chosen": -1.0680193901062012, "logits/rejected": -1.2437559366226196, "logps/chosen": -208.07522583007812, "logps/rejected": -92.34077453613281, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 1.2430862188339233, "rewards/margins": 6.474300861358643, "rewards/rejected": -5.23121452331543, "step": 3722 }, { "epoch": 0.82, "learning_rate": 9.934800842924443e-06, "logits/chosen": -1.3873714208602905, "logits/rejected": -1.4346202611923218, "logps/chosen": -103.90753173828125, "logps/rejected": -112.57246398925781, "loss": 0.0981, "rewards/accuracies": 1.0, "rewards/chosen": 0.6474258303642273, "rewards/margins": 7.488264560699463, "rewards/rejected": -6.84083890914917, "step": 3723 }, { "epoch": 0.82, "learning_rate": 9.934512024374577e-06, "logits/chosen": -1.1987425088882446, "logits/rejected": -1.0814083814620972, "logps/chosen": -230.9386749267578, "logps/rejected": -259.2010498046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 3.9684433937072754, "rewards/margins": 8.351231575012207, "rewards/rejected": -4.382788181304932, "step": 3724 }, { "epoch": 0.82, "learning_rate": 9.934222571751777e-06, "logits/chosen": -0.8540841341018677, "logits/rejected": -0.8819504976272583, "logps/chosen": -251.05343627929688, "logps/rejected": -148.84117126464844, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": 1.3688690662384033, "rewards/margins": 5.384322166442871, "rewards/rejected": -4.015453338623047, "step": 3725 }, { "epoch": 0.82, "learning_rate": 9.933932485093239e-06, "logits/chosen": -1.0803592205047607, "logits/rejected": -1.1092684268951416, "logps/chosen": -85.12892150878906, "logps/rejected": -92.48954010009766, "loss": 0.1956, "rewards/accuracies": 1.0, "rewards/chosen": -1.6533355712890625, "rewards/margins": 0.7369675636291504, "rewards/rejected": -2.390303134918213, "step": 3726 }, { "epoch": 0.82, "learning_rate": 9.933641764436237e-06, "logits/chosen": -1.078140377998352, "logits/rejected": -0.7124068737030029, "logps/chosen": -285.6441650390625, "logps/rejected": -180.3881378173828, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 3.1244049072265625, "rewards/margins": 14.42684268951416, "rewards/rejected": -11.302437782287598, "step": 3727 }, { "epoch": 0.83, "learning_rate": 9.933350409818128e-06, "logits/chosen": -0.8532493710517883, "logits/rejected": -0.9610887169837952, "logps/chosen": -190.81716918945312, "logps/rejected": -196.86737060546875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 1.681182861328125, "rewards/margins": 7.657693386077881, "rewards/rejected": -5.976510524749756, "step": 3728 }, { "epoch": 0.83, "learning_rate": 9.933058421276351e-06, "logits/chosen": -1.0346301794052124, "logits/rejected": -1.0209424495697021, "logps/chosen": -92.56130981445312, "logps/rejected": -157.44076538085938, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 0.7556236386299133, "rewards/margins": 6.5474934577941895, "rewards/rejected": -5.791869640350342, "step": 3729 }, { "epoch": 0.83, "learning_rate": 9.932765798848428e-06, "logits/chosen": -0.8654299378395081, "logits/rejected": -0.773177444934845, "logps/chosen": -154.40182495117188, "logps/rejected": -134.77728271484375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 3.667553663253784, "rewards/margins": 6.816282749176025, "rewards/rejected": -3.148729085922241, "step": 3730 }, { "epoch": 0.83, "learning_rate": 9.932472542571954e-06, "logits/chosen": -1.2666548490524292, "logits/rejected": -0.1160263940691948, "logps/chosen": -76.50347137451172, "logps/rejected": -268.8990173339844, "loss": 0.132, "rewards/accuracies": 1.0, "rewards/chosen": 1.0161141157150269, "rewards/margins": 12.55007266998291, "rewards/rejected": -11.533958435058594, "step": 3731 }, { "epoch": 0.83, "learning_rate": 9.932178652484617e-06, "logits/chosen": -0.8503219485282898, "logits/rejected": -0.8239452242851257, "logps/chosen": -51.58575439453125, "logps/rejected": -114.84236145019531, "loss": 0.1694, "rewards/accuracies": 1.0, "rewards/chosen": -0.11166610568761826, "rewards/margins": 1.717597246170044, "rewards/rejected": -1.8292633295059204, "step": 3732 }, { "epoch": 0.83, "learning_rate": 9.931884128624181e-06, "logits/chosen": -1.4066864252090454, "logits/rejected": -1.4410699605941772, "logps/chosen": -94.57484436035156, "logps/rejected": -90.4833984375, "loss": 1.197, "rewards/accuracies": 1.0, "rewards/chosen": 0.5722091794013977, "rewards/margins": 7.156037330627441, "rewards/rejected": -6.583827972412109, "step": 3733 }, { "epoch": 0.83, "learning_rate": 9.93158897102849e-06, "logits/chosen": -1.187570333480835, "logits/rejected": -1.2379549741744995, "logps/chosen": -202.49993896484375, "logps/rejected": -150.95323181152344, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": 2.7394073009490967, "rewards/margins": 3.2636046409606934, "rewards/rejected": -0.5241973996162415, "step": 3734 }, { "epoch": 0.83, "learning_rate": 9.93129317973547e-06, "logits/chosen": -1.0271785259246826, "logits/rejected": -1.0097376108169556, "logps/chosen": -119.166015625, "logps/rejected": -115.76264953613281, "loss": 0.0605, "rewards/accuracies": 1.0, "rewards/chosen": 0.6384048461914062, "rewards/margins": 3.4676361083984375, "rewards/rejected": -2.8292312622070312, "step": 3735 }, { "epoch": 0.83, "learning_rate": 9.930996754783134e-06, "logits/chosen": -1.0553805828094482, "logits/rejected": -1.184621810913086, "logps/chosen": -212.12997436523438, "logps/rejected": -82.50701904296875, "loss": 0.0588, "rewards/accuracies": 1.0, "rewards/chosen": -0.657910168170929, "rewards/margins": 5.051285266876221, "rewards/rejected": -5.709195613861084, "step": 3736 }, { "epoch": 0.83, "learning_rate": 9.930699696209566e-06, "logits/chosen": -1.1428879499435425, "logits/rejected": -1.0670098066329956, "logps/chosen": -108.7171630859375, "logps/rejected": -175.287109375, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/chosen": -0.34408798813819885, "rewards/margins": 2.462836503982544, "rewards/rejected": -2.80692458152771, "step": 3737 }, { "epoch": 0.83, "learning_rate": 9.93040200405294e-06, "logits/chosen": -1.2745060920715332, "logits/rejected": -1.2908633947372437, "logps/chosen": -64.16889953613281, "logps/rejected": -64.00677490234375, "loss": 0.0883, "rewards/accuracies": 1.0, "rewards/chosen": -0.007890320383012295, "rewards/margins": 1.6477577686309814, "rewards/rejected": -1.655648112297058, "step": 3738 }, { "epoch": 0.83, "learning_rate": 9.930103678351511e-06, "logits/chosen": -1.148831844329834, "logits/rejected": -0.48898592591285706, "logps/chosen": -109.06484985351562, "logps/rejected": -537.1466674804688, "loss": 0.0638, "rewards/accuracies": 1.0, "rewards/chosen": -0.6978203058242798, "rewards/margins": 28.791461944580078, "rewards/rejected": -29.489282608032227, "step": 3739 }, { "epoch": 0.83, "learning_rate": 9.92980471914361e-06, "logits/chosen": -0.8177036643028259, "logits/rejected": -0.7846232652664185, "logps/chosen": -85.19993591308594, "logps/rejected": -94.54547119140625, "loss": 0.108, "rewards/accuracies": 1.0, "rewards/chosen": 0.376626580953598, "rewards/margins": 1.4612693786621094, "rewards/rejected": -1.084642767906189, "step": 3740 }, { "epoch": 0.83, "learning_rate": 9.929505126467653e-06, "logits/chosen": -0.8116573691368103, "logits/rejected": -0.7823261022567749, "logps/chosen": -56.081886291503906, "logps/rejected": -103.8978500366211, "loss": 0.1302, "rewards/accuracies": 1.0, "rewards/chosen": 0.15150223672389984, "rewards/margins": 1.2126922607421875, "rewards/rejected": -1.0611900091171265, "step": 3741 }, { "epoch": 0.83, "learning_rate": 9.929204900362137e-06, "logits/chosen": -1.6523511409759521, "logits/rejected": -1.5513172149658203, "logps/chosen": -144.27459716796875, "logps/rejected": -324.1619567871094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.5616516470909119, "rewards/margins": 9.090968132019043, "rewards/rejected": -8.529316902160645, "step": 3742 }, { "epoch": 0.83, "learning_rate": 9.928904040865642e-06, "logits/chosen": -1.3467447757720947, "logits/rejected": -1.2047282457351685, "logps/chosen": -88.0017318725586, "logps/rejected": -231.13931274414062, "loss": 0.1171, "rewards/accuracies": 1.0, "rewards/chosen": 0.35696181654930115, "rewards/margins": 1.567731499671936, "rewards/rejected": -1.2107696533203125, "step": 3743 }, { "epoch": 0.83, "learning_rate": 9.928602548016826e-06, "logits/chosen": -0.9011938571929932, "logits/rejected": -0.9636668562889099, "logps/chosen": -219.1546173095703, "logps/rejected": -205.33901977539062, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 1.3137344121932983, "rewards/margins": 9.787581443786621, "rewards/rejected": -8.473847389221191, "step": 3744 }, { "epoch": 0.83, "learning_rate": 9.92830042185443e-06, "logits/chosen": -0.9486587047576904, "logits/rejected": -0.9948381781578064, "logps/chosen": -181.38327026367188, "logps/rejected": -104.35475158691406, "loss": 0.0629, "rewards/accuracies": 1.0, "rewards/chosen": -0.23280182480812073, "rewards/margins": 2.07646107673645, "rewards/rejected": -2.309262990951538, "step": 3745 }, { "epoch": 0.83, "learning_rate": 9.927997662417277e-06, "logits/chosen": -1.0755014419555664, "logits/rejected": -1.0017701387405396, "logps/chosen": -167.77481079101562, "logps/rejected": -220.32781982421875, "loss": 0.0648, "rewards/accuracies": 1.0, "rewards/chosen": 2.247241258621216, "rewards/margins": 2.0050904750823975, "rewards/rejected": 0.24215088784694672, "step": 3746 }, { "epoch": 0.83, "learning_rate": 9.927694269744273e-06, "logits/chosen": -1.0431925058364868, "logits/rejected": -1.0394792556762695, "logps/chosen": -180.54983520507812, "logps/rejected": -202.89520263671875, "loss": 0.2141, "rewards/accuracies": 1.0, "rewards/chosen": 0.8712097406387329, "rewards/margins": 6.144772529602051, "rewards/rejected": -5.273562908172607, "step": 3747 }, { "epoch": 0.83, "learning_rate": 9.9273902438744e-06, "logits/chosen": -1.312454342842102, "logits/rejected": -1.2771605253219604, "logps/chosen": -110.66111755371094, "logps/rejected": -145.2972412109375, "loss": 0.1017, "rewards/accuracies": 1.0, "rewards/chosen": 1.0232056379318237, "rewards/margins": 6.089529037475586, "rewards/rejected": -5.066323280334473, "step": 3748 }, { "epoch": 0.83, "learning_rate": 9.927085584846725e-06, "logits/chosen": -1.0465673208236694, "logits/rejected": -1.0066208839416504, "logps/chosen": -147.81710815429688, "logps/rejected": -215.9637451171875, "loss": 0.0317, "rewards/accuracies": 1.0, "rewards/chosen": -0.0553741455078125, "rewards/margins": 2.80859375, "rewards/rejected": -2.8639678955078125, "step": 3749 }, { "epoch": 0.83, "learning_rate": 9.926780292700397e-06, "logits/chosen": -1.1026172637939453, "logits/rejected": -1.0743457078933716, "logps/chosen": -80.69318389892578, "logps/rejected": -108.59550476074219, "loss": 0.2194, "rewards/accuracies": 1.0, "rewards/chosen": -0.1860343962907791, "rewards/margins": 0.6066093444824219, "rewards/rejected": -0.7926437258720398, "step": 3750 }, { "epoch": 0.83, "learning_rate": 9.926474367474646e-06, "logits/chosen": -1.4423866271972656, "logits/rejected": -1.6096707582473755, "logps/chosen": -249.65634155273438, "logps/rejected": -93.5488510131836, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": 1.0080779790878296, "rewards/margins": 3.7177681922912598, "rewards/rejected": -2.7096900939941406, "step": 3751 }, { "epoch": 0.83, "learning_rate": 9.92616780920878e-06, "logits/chosen": -1.3145530223846436, "logits/rejected": -1.395569086074829, "logps/chosen": -248.10301208496094, "logps/rejected": -148.63629150390625, "loss": 0.1747, "rewards/accuracies": 1.0, "rewards/chosen": 0.5503768920898438, "rewards/margins": 0.8717712163925171, "rewards/rejected": -0.3213943541049957, "step": 3752 }, { "epoch": 0.83, "learning_rate": 9.925860617942195e-06, "logits/chosen": -1.1676068305969238, "logits/rejected": -1.1676068305969238, "logps/chosen": -69.00471496582031, "logps/rejected": -69.00471496582031, "loss": 0.3502, "rewards/accuracies": 0.0, "rewards/chosen": -5.073329448699951, "rewards/margins": 0.0, "rewards/rejected": -5.073329448699951, "step": 3753 }, { "epoch": 0.83, "learning_rate": 9.92555279371436e-06, "logits/chosen": -0.917803943157196, "logits/rejected": -0.9026135802268982, "logps/chosen": -113.4481430053711, "logps/rejected": -136.90615844726562, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -2.419196367263794, "rewards/margins": 4.27678108215332, "rewards/rejected": -6.695977687835693, "step": 3754 }, { "epoch": 0.83, "learning_rate": 9.925244336564831e-06, "logits/chosen": -1.2052817344665527, "logits/rejected": -1.1347137689590454, "logps/chosen": -86.49189758300781, "logps/rejected": -121.70208740234375, "loss": 0.6582, "rewards/accuracies": 0.0, "rewards/chosen": -4.060817718505859, "rewards/margins": -0.7994856834411621, "rewards/rejected": -3.2613320350646973, "step": 3755 }, { "epoch": 0.83, "learning_rate": 9.924935246533249e-06, "logits/chosen": -1.0149755477905273, "logits/rejected": -0.9932499527931213, "logps/chosen": -100.49845123291016, "logps/rejected": -190.45748901367188, "loss": 0.8288, "rewards/accuracies": 0.0, "rewards/chosen": -0.1017662063241005, "rewards/margins": -1.2384605407714844, "rewards/rejected": 1.136694312095642, "step": 3756 }, { "epoch": 0.83, "learning_rate": 9.924625523659324e-06, "logits/chosen": -0.9557256698608398, "logits/rejected": -0.8960447907447815, "logps/chosen": -188.50302124023438, "logps/rejected": -374.26959228515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.3632049560546875, "rewards/margins": 17.941286087036133, "rewards/rejected": -16.578081130981445, "step": 3757 }, { "epoch": 0.83, "learning_rate": 9.924315167982858e-06, "logits/chosen": -1.5762916803359985, "logits/rejected": -1.586232304573059, "logps/chosen": -119.47936248779297, "logps/rejected": -127.84949493408203, "loss": 1.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.16539840400218964, "rewards/margins": 3.9579360485076904, "rewards/rejected": -4.1233344078063965, "step": 3758 }, { "epoch": 0.83, "learning_rate": 9.924004179543728e-06, "logits/chosen": -1.1657993793487549, "logits/rejected": -1.1539512872695923, "logps/chosen": -57.054325103759766, "logps/rejected": -148.50057983398438, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 0.2683429718017578, "rewards/margins": 5.021297931671143, "rewards/rejected": -4.752954959869385, "step": 3759 }, { "epoch": 0.83, "learning_rate": 9.923692558381902e-06, "logits/chosen": -1.3421273231506348, "logits/rejected": -1.3404490947723389, "logps/chosen": -67.86294555664062, "logps/rejected": -149.37974548339844, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 1.5247825384140015, "rewards/margins": 7.148067474365234, "rewards/rejected": -5.623284816741943, "step": 3760 }, { "epoch": 0.83, "learning_rate": 9.923380304537417e-06, "logits/chosen": -1.038845181465149, "logits/rejected": -1.046675443649292, "logps/chosen": -148.3438720703125, "logps/rejected": -184.0594482421875, "loss": 0.0634, "rewards/accuracies": 1.0, "rewards/chosen": -1.7985382080078125, "rewards/margins": 2.0932068824768066, "rewards/rejected": -3.891745090484619, "step": 3761 }, { "epoch": 0.83, "learning_rate": 9.923067418050399e-06, "logits/chosen": -1.0796269178390503, "logits/rejected": -1.0194483995437622, "logps/chosen": -178.98779296875, "logps/rejected": -319.41424560546875, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": -0.05017242580652237, "rewards/margins": 2.927354574203491, "rewards/rejected": -2.977526903152466, "step": 3762 }, { "epoch": 0.83, "learning_rate": 9.922753898961052e-06, "logits/chosen": -1.4061317443847656, "logits/rejected": -1.4090849161148071, "logps/chosen": -132.75115966796875, "logps/rejected": -112.49520874023438, "loss": 0.1256, "rewards/accuracies": 1.0, "rewards/chosen": -1.0764251947402954, "rewards/margins": 1.4290114641189575, "rewards/rejected": -2.505436658859253, "step": 3763 }, { "epoch": 0.83, "learning_rate": 9.922439747309663e-06, "logits/chosen": -0.9531845450401306, "logits/rejected": -0.9419963359832764, "logps/chosen": -116.92955780029297, "logps/rejected": -159.86886596679688, "loss": 0.0633, "rewards/accuracies": 1.0, "rewards/chosen": -0.12018050998449326, "rewards/margins": 2.4505836963653564, "rewards/rejected": -2.5707643032073975, "step": 3764 }, { "epoch": 0.83, "learning_rate": 9.922124963136599e-06, "logits/chosen": -1.2180484533309937, "logits/rejected": -1.1765323877334595, "logps/chosen": -193.79171752929688, "logps/rejected": -168.55258178710938, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -0.1698043793439865, "rewards/margins": 3.378079414367676, "rewards/rejected": -3.5478837490081787, "step": 3765 }, { "epoch": 0.83, "learning_rate": 9.92180954648231e-06, "logits/chosen": -1.286281943321228, "logits/rejected": -1.361410140991211, "logps/chosen": -242.04025268554688, "logps/rejected": -206.99075317382812, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 3.13140869140625, "rewards/margins": 10.143013000488281, "rewards/rejected": -7.011604309082031, "step": 3766 }, { "epoch": 0.83, "learning_rate": 9.921493497387327e-06, "logits/chosen": -0.9168528318405151, "logits/rejected": -0.8578169345855713, "logps/chosen": -171.3853302001953, "logps/rejected": -188.30819702148438, "loss": 0.1498, "rewards/accuracies": 1.0, "rewards/chosen": 3.3616225719451904, "rewards/margins": 6.578547477722168, "rewards/rejected": -3.2169251441955566, "step": 3767 }, { "epoch": 0.83, "learning_rate": 9.921176815892259e-06, "logits/chosen": -1.0078096389770508, "logits/rejected": -0.9935136437416077, "logps/chosen": -83.50846862792969, "logps/rejected": -66.87442016601562, "loss": 0.0629, "rewards/accuracies": 1.0, "rewards/chosen": -1.7577491998672485, "rewards/margins": 2.0104241371154785, "rewards/rejected": -3.7681732177734375, "step": 3768 }, { "epoch": 0.83, "learning_rate": 9.920859502037801e-06, "logits/chosen": -1.2231119871139526, "logits/rejected": -1.1748580932617188, "logps/chosen": -226.496337890625, "logps/rejected": -242.42938232421875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": 2.7771668434143066, "rewards/margins": 5.601031303405762, "rewards/rejected": -2.823864698410034, "step": 3769 }, { "epoch": 0.83, "learning_rate": 9.920541555864726e-06, "logits/chosen": -1.2362052202224731, "logits/rejected": -1.248471736907959, "logps/chosen": -133.62362670898438, "logps/rejected": -110.8685302734375, "loss": 0.1218, "rewards/accuracies": 1.0, "rewards/chosen": -4.74001407623291, "rewards/margins": 1.2879509925842285, "rewards/rejected": -6.027965068817139, "step": 3770 }, { "epoch": 0.83, "learning_rate": 9.920222977413892e-06, "logits/chosen": -1.4657535552978516, "logits/rejected": -1.4973537921905518, "logps/chosen": -101.20664978027344, "logps/rejected": -141.8355255126953, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": 0.9947540163993835, "rewards/margins": 3.8646461963653564, "rewards/rejected": -2.869892120361328, "step": 3771 }, { "epoch": 0.83, "learning_rate": 9.919903766726229e-06, "logits/chosen": -1.2762049436569214, "logits/rejected": -0.9783244729042053, "logps/chosen": -90.8876953125, "logps/rejected": -469.04193115234375, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": -1.2920242547988892, "rewards/margins": 20.216472625732422, "rewards/rejected": -21.50849723815918, "step": 3772 }, { "epoch": 0.84, "learning_rate": 9.919583923842763e-06, "logits/chosen": -1.2364078760147095, "logits/rejected": -1.2898402214050293, "logps/chosen": -174.47439575195312, "logps/rejected": -196.53990173339844, "loss": 0.0304, "rewards/accuracies": 1.0, "rewards/chosen": 0.125019833445549, "rewards/margins": 6.570704936981201, "rewards/rejected": -6.445684909820557, "step": 3773 }, { "epoch": 0.84, "learning_rate": 9.919263448804589e-06, "logits/chosen": -1.1010171175003052, "logits/rejected": -1.0718424320220947, "logps/chosen": -105.97852325439453, "logps/rejected": -104.9220962524414, "loss": 0.5356, "rewards/accuracies": 1.0, "rewards/chosen": 0.9477149844169617, "rewards/margins": 4.5007805824279785, "rewards/rejected": -3.553065538406372, "step": 3774 }, { "epoch": 0.84, "learning_rate": 9.918942341652885e-06, "logits/chosen": -0.9174149036407471, "logits/rejected": -0.9281601309776306, "logps/chosen": -154.035888671875, "logps/rejected": -95.34828186035156, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": 0.7137695550918579, "rewards/margins": 2.991283416748047, "rewards/rejected": -2.2775139808654785, "step": 3775 }, { "epoch": 0.84, "learning_rate": 9.918620602428916e-06, "logits/chosen": -1.3700453042984009, "logits/rejected": -1.3815088272094727, "logps/chosen": -124.8941421508789, "logps/rejected": -160.53053283691406, "loss": 0.5156, "rewards/accuracies": 1.0, "rewards/chosen": -0.17038802802562714, "rewards/margins": 3.4138023853302, "rewards/rejected": -3.5841903686523438, "step": 3776 }, { "epoch": 0.84, "learning_rate": 9.918298231174023e-06, "logits/chosen": -1.2667938470840454, "logits/rejected": -1.2182425260543823, "logps/chosen": -78.53682708740234, "logps/rejected": -125.29498291015625, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": 0.026871491223573685, "rewards/margins": 3.630138397216797, "rewards/rejected": -3.603266954421997, "step": 3777 }, { "epoch": 0.84, "learning_rate": 9.917975227929631e-06, "logits/chosen": -1.1310412883758545, "logits/rejected": -1.0777651071548462, "logps/chosen": -266.7796325683594, "logps/rejected": -314.48895263671875, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -1.0585235357284546, "rewards/margins": 3.038177490234375, "rewards/rejected": -4.096701145172119, "step": 3778 }, { "epoch": 0.84, "learning_rate": 9.917651592737245e-06, "logits/chosen": -1.4541717767715454, "logits/rejected": -1.5130267143249512, "logps/chosen": -233.75469970703125, "logps/rejected": -87.10609436035156, "loss": 0.2038, "rewards/accuracies": 1.0, "rewards/chosen": -0.7044662833213806, "rewards/margins": 0.895011842250824, "rewards/rejected": -1.5994781255722046, "step": 3779 }, { "epoch": 0.84, "learning_rate": 9.91732732563845e-06, "logits/chosen": -1.32106351852417, "logits/rejected": -1.3444998264312744, "logps/chosen": -141.54022216796875, "logps/rejected": -115.24725341796875, "loss": 0.3663, "rewards/accuracies": 1.0, "rewards/chosen": 1.1612595319747925, "rewards/margins": 4.443253993988037, "rewards/rejected": -3.281994581222534, "step": 3780 }, { "epoch": 0.84, "learning_rate": 9.917002426674916e-06, "logits/chosen": -1.3332229852676392, "logits/rejected": -1.3815985918045044, "logps/chosen": -139.9884490966797, "logps/rejected": -116.73259735107422, "loss": 0.3719, "rewards/accuracies": 0.0, "rewards/chosen": -1.033625841140747, "rewards/margins": -0.09004521369934082, "rewards/rejected": -0.9435806274414062, "step": 3781 }, { "epoch": 0.84, "learning_rate": 9.91667689588839e-06, "logits/chosen": -1.2009902000427246, "logits/rejected": -1.1747283935546875, "logps/chosen": -72.52839660644531, "logps/rejected": -155.9002685546875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.8516921997070312, "rewards/margins": 5.104963779449463, "rewards/rejected": -6.956655979156494, "step": 3782 }, { "epoch": 0.84, "learning_rate": 9.916350733320704e-06, "logits/chosen": -1.437479019165039, "logits/rejected": -1.4741995334625244, "logps/chosen": -71.1130142211914, "logps/rejected": -35.433197021484375, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": -0.518018364906311, "rewards/margins": 2.3133630752563477, "rewards/rejected": -2.831381320953369, "step": 3783 }, { "epoch": 0.84, "learning_rate": 9.916023939013764e-06, "logits/chosen": -1.4681397676467896, "logits/rejected": -1.4859532117843628, "logps/chosen": -179.43304443359375, "logps/rejected": -93.96285247802734, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.332208275794983, "rewards/margins": 4.464114189147949, "rewards/rejected": -5.796322345733643, "step": 3784 }, { "epoch": 0.84, "learning_rate": 9.915696513009567e-06, "logits/chosen": -1.169818639755249, "logits/rejected": -1.157877802848816, "logps/chosen": -174.87762451171875, "logps/rejected": -218.53237915039062, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.1369293928146362, "rewards/margins": 10.241909980773926, "rewards/rejected": -11.378839492797852, "step": 3785 }, { "epoch": 0.84, "learning_rate": 9.915368455350185e-06, "logits/chosen": -1.287043809890747, "logits/rejected": -1.3501477241516113, "logps/chosen": -183.4945831298828, "logps/rejected": -167.12738037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.5948073863983154, "rewards/margins": 12.501530647277832, "rewards/rejected": -8.906723022460938, "step": 3786 }, { "epoch": 0.84, "learning_rate": 9.915039766077772e-06, "logits/chosen": -0.8639785647392273, "logits/rejected": -0.8639785647392273, "logps/chosen": -74.94529724121094, "logps/rejected": -74.94529724121094, "loss": 0.3574, "rewards/accuracies": 0.0, "rewards/chosen": -5.92789888381958, "rewards/margins": 0.0, "rewards/rejected": -5.92789888381958, "step": 3787 }, { "epoch": 0.84, "learning_rate": 9.914710445234567e-06, "logits/chosen": -1.158076286315918, "logits/rejected": -1.158076286315918, "logps/chosen": -109.96566772460938, "logps/rejected": -109.96566772460938, "loss": 0.3869, "rewards/accuracies": 0.0, "rewards/chosen": -1.6388282775878906, "rewards/margins": 0.0, "rewards/rejected": -1.6388282775878906, "step": 3788 }, { "epoch": 0.84, "learning_rate": 9.914380492862883e-06, "logits/chosen": -1.1013638973236084, "logits/rejected": -1.0221918821334839, "logps/chosen": -190.0835723876953, "logps/rejected": -292.83758544921875, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -0.2629714906215668, "rewards/margins": 3.5325868129730225, "rewards/rejected": -3.795558214187622, "step": 3789 }, { "epoch": 0.84, "learning_rate": 9.91404990900512e-06, "logits/chosen": -1.1529265642166138, "logits/rejected": -1.064015507698059, "logps/chosen": -145.27017211914062, "logps/rejected": -260.3554382324219, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 0.695208728313446, "rewards/margins": 4.984217643737793, "rewards/rejected": -4.289009094238281, "step": 3790 }, { "epoch": 0.84, "learning_rate": 9.913718693703755e-06, "logits/chosen": -1.3907215595245361, "logits/rejected": -1.3897703886032104, "logps/chosen": -130.60855102539062, "logps/rejected": -212.69227600097656, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/chosen": 1.4566329717636108, "rewards/margins": 2.3870773315429688, "rewards/rejected": -0.9304443597793579, "step": 3791 }, { "epoch": 0.84, "learning_rate": 9.91338684700135e-06, "logits/chosen": -1.2949196100234985, "logits/rejected": -1.2738913297653198, "logps/chosen": -74.24742126464844, "logps/rejected": -110.99234008789062, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": 1.0377235412597656, "rewards/margins": 3.189237356185913, "rewards/rejected": -2.1515138149261475, "step": 3792 }, { "epoch": 0.84, "learning_rate": 9.91305436894055e-06, "logits/chosen": -0.9606518745422363, "logits/rejected": -0.8884385824203491, "logps/chosen": -238.82998657226562, "logps/rejected": -438.53924560546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 3.868734836578369, "rewards/margins": 6.102228164672852, "rewards/rejected": -2.2334930896759033, "step": 3793 }, { "epoch": 0.84, "learning_rate": 9.912721259564072e-06, "logits/chosen": -1.522636890411377, "logits/rejected": -1.4949750900268555, "logps/chosen": -78.05258178710938, "logps/rejected": -167.42715454101562, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.6469101309776306, "rewards/margins": 4.3056960105896, "rewards/rejected": -4.952606201171875, "step": 3794 }, { "epoch": 0.84, "learning_rate": 9.91238751891472e-06, "logits/chosen": -1.1460684537887573, "logits/rejected": -1.19138503074646, "logps/chosen": -119.81448364257812, "logps/rejected": -187.15725708007812, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.145008847117424, "rewards/margins": 7.10703706741333, "rewards/rejected": -7.25204610824585, "step": 3795 }, { "epoch": 0.84, "learning_rate": 9.912053147035383e-06, "logits/chosen": -1.3357009887695312, "logits/rejected": -1.303261160850525, "logps/chosen": -111.85392761230469, "logps/rejected": -189.03013610839844, "loss": 0.2809, "rewards/accuracies": 1.0, "rewards/chosen": -2.8897247314453125, "rewards/margins": 2.1897568702697754, "rewards/rejected": -5.079481601715088, "step": 3796 }, { "epoch": 0.84, "learning_rate": 9.911718143969024e-06, "logits/chosen": -1.3392523527145386, "logits/rejected": -1.3392523527145386, "logps/chosen": -205.98828125, "logps/rejected": -205.98828125, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.36002516746521, "rewards/margins": 0.0, "rewards/rejected": -2.36002516746521, "step": 3797 }, { "epoch": 0.84, "learning_rate": 9.911382509758692e-06, "logits/chosen": -1.3050519227981567, "logits/rejected": -1.1624643802642822, "logps/chosen": -147.1563720703125, "logps/rejected": -267.3353271484375, "loss": 0.2694, "rewards/accuracies": 1.0, "rewards/chosen": -1.0647400617599487, "rewards/margins": 0.36176300048828125, "rewards/rejected": -1.42650306224823, "step": 3798 }, { "epoch": 0.84, "learning_rate": 9.911046244447515e-06, "logits/chosen": -1.2560933828353882, "logits/rejected": -1.2560933828353882, "logps/chosen": -144.87171936035156, "logps/rejected": -144.87171936035156, "loss": 0.378, "rewards/accuracies": 0.0, "rewards/chosen": -2.1540818214416504, "rewards/margins": 0.0, "rewards/rejected": -2.1540818214416504, "step": 3799 }, { "epoch": 0.84, "learning_rate": 9.910709348078699e-06, "logits/chosen": -1.2864547967910767, "logits/rejected": -1.3107385635375977, "logps/chosen": -122.05553436279297, "logps/rejected": -128.01345825195312, "loss": 0.4164, "rewards/accuracies": 0.0, "rewards/chosen": -0.6596572995185852, "rewards/margins": -0.2213279902935028, "rewards/rejected": -0.4383293092250824, "step": 3800 }, { "epoch": 0.84, "learning_rate": 9.910371820695538e-06, "logits/chosen": -1.3899468183517456, "logits/rejected": -1.3501378297805786, "logps/chosen": -107.35091400146484, "logps/rejected": -289.2322998046875, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": 2.272258758544922, "rewards/margins": 5.962459564208984, "rewards/rejected": -3.6902008056640625, "step": 3801 }, { "epoch": 0.84, "learning_rate": 9.910033662341403e-06, "logits/chosen": -1.0295180082321167, "logits/rejected": -0.96544349193573, "logps/chosen": -90.46575164794922, "logps/rejected": -196.05471801757812, "loss": 0.1232, "rewards/accuracies": 1.0, "rewards/chosen": -0.14395295083522797, "rewards/margins": 1.2878341674804688, "rewards/rejected": -1.431787133216858, "step": 3802 }, { "epoch": 0.84, "learning_rate": 9.909694873059745e-06, "logits/chosen": -1.2860723733901978, "logits/rejected": -1.2752411365509033, "logps/chosen": -103.30918884277344, "logps/rejected": -94.31204223632812, "loss": 0.5828, "rewards/accuracies": 0.0, "rewards/chosen": -2.0175704956054688, "rewards/margins": -0.7914512157440186, "rewards/rejected": -1.2261192798614502, "step": 3803 }, { "epoch": 0.84, "learning_rate": 9.909355452894098e-06, "logits/chosen": -1.187591552734375, "logits/rejected": -1.1571522951126099, "logps/chosen": -146.55984497070312, "logps/rejected": -274.50238037109375, "loss": 1.1222, "rewards/accuracies": 1.0, "rewards/chosen": -5.693981170654297, "rewards/margins": 0.5470099449157715, "rewards/rejected": -6.240991115570068, "step": 3804 }, { "epoch": 0.84, "learning_rate": 9.909015401888077e-06, "logits/chosen": -1.0597974061965942, "logits/rejected": -1.0799204111099243, "logps/chosen": -118.36181640625, "logps/rejected": -93.97384643554688, "loss": 0.4449, "rewards/accuracies": 0.0, "rewards/chosen": -1.1547974348068237, "rewards/margins": -0.3449547290802002, "rewards/rejected": -0.8098427057266235, "step": 3805 }, { "epoch": 0.84, "learning_rate": 9.908674720085378e-06, "logits/chosen": -1.2655906677246094, "logits/rejected": -1.2655906677246094, "logps/chosen": -159.12069702148438, "logps/rejected": -159.12069702148438, "loss": 0.3472, "rewards/accuracies": 0.0, "rewards/chosen": -2.6124267578125, "rewards/margins": 0.0, "rewards/rejected": -2.6124267578125, "step": 3806 }, { "epoch": 0.84, "learning_rate": 9.908333407529779e-06, "logits/chosen": -1.2577428817749023, "logits/rejected": -1.1782044172286987, "logps/chosen": -88.34147644042969, "logps/rejected": -196.31390380859375, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4021904170513153, "rewards/margins": 3.6699440479278564, "rewards/rejected": -4.072134494781494, "step": 3807 }, { "epoch": 0.84, "learning_rate": 9.907991464265136e-06, "logits/chosen": -1.2813911437988281, "logits/rejected": -1.2707773447036743, "logps/chosen": -101.73357391357422, "logps/rejected": -107.98481750488281, "loss": 0.2474, "rewards/accuracies": 1.0, "rewards/chosen": -1.466561198234558, "rewards/margins": 0.5439132452011108, "rewards/rejected": -2.010474443435669, "step": 3808 }, { "epoch": 0.84, "learning_rate": 9.907648890335387e-06, "logits/chosen": -0.9075120091438293, "logits/rejected": -0.9165389537811279, "logps/chosen": -68.16993713378906, "logps/rejected": -103.83460998535156, "loss": 0.4222, "rewards/accuracies": 1.0, "rewards/chosen": -1.7758598327636719, "rewards/margins": 0.15264058113098145, "rewards/rejected": -1.9285004138946533, "step": 3809 }, { "epoch": 0.84, "learning_rate": 9.907305685784553e-06, "logits/chosen": -1.3887616395950317, "logits/rejected": -1.4153101444244385, "logps/chosen": -139.9269256591797, "logps/rejected": -122.59120178222656, "loss": 0.093, "rewards/accuracies": 1.0, "rewards/chosen": -5.6340861320495605, "rewards/margins": 2.2190909385681152, "rewards/rejected": -7.853177070617676, "step": 3810 }, { "epoch": 0.84, "learning_rate": 9.906961850656737e-06, "logits/chosen": -1.2561534643173218, "logits/rejected": -1.2561534643173218, "logps/chosen": -86.05360412597656, "logps/rejected": -86.05360412597656, "loss": 0.7192, "rewards/accuracies": 0.0, "rewards/chosen": -5.072761058807373, "rewards/margins": 0.0, "rewards/rejected": -5.072761058807373, "step": 3811 }, { "epoch": 0.84, "learning_rate": 9.906617384996118e-06, "logits/chosen": -1.1509524583816528, "logits/rejected": -1.1529499292373657, "logps/chosen": -87.98802947998047, "logps/rejected": -161.58334350585938, "loss": 0.2083, "rewards/accuracies": 1.0, "rewards/chosen": -1.3018051385879517, "rewards/margins": 2.4238104820251465, "rewards/rejected": -3.7256157398223877, "step": 3812 }, { "epoch": 0.84, "learning_rate": 9.906272288846962e-06, "logits/chosen": -1.2011358737945557, "logits/rejected": -1.3131763935089111, "logps/chosen": -164.9101104736328, "logps/rejected": -76.33941650390625, "loss": 0.269, "rewards/accuracies": 1.0, "rewards/chosen": -4.462090492248535, "rewards/margins": 0.34186553955078125, "rewards/rejected": -4.803956031799316, "step": 3813 }, { "epoch": 0.84, "learning_rate": 9.90592656225361e-06, "logits/chosen": -1.29918372631073, "logits/rejected": -1.2588139772415161, "logps/chosen": -74.13849639892578, "logps/rejected": -119.63298797607422, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": 0.7975074648857117, "rewards/margins": 3.307936191558838, "rewards/rejected": -2.5104286670684814, "step": 3814 }, { "epoch": 0.84, "learning_rate": 9.905580205260487e-06, "logits/chosen": -1.1903107166290283, "logits/rejected": -1.2034435272216797, "logps/chosen": -210.5977783203125, "logps/rejected": -257.9071960449219, "loss": 0.1643, "rewards/accuracies": 1.0, "rewards/chosen": -0.6839508414268494, "rewards/margins": 0.9460143446922302, "rewards/rejected": -1.6299651861190796, "step": 3815 }, { "epoch": 0.84, "learning_rate": 9.905233217912102e-06, "logits/chosen": -1.1965217590332031, "logits/rejected": -1.1965217590332031, "logps/chosen": -83.3832015991211, "logps/rejected": -83.3832015991211, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -1.7982810735702515, "rewards/margins": 0.0, "rewards/rejected": -1.7982810735702515, "step": 3816 }, { "epoch": 0.84, "learning_rate": 9.904885600253038e-06, "logits/chosen": -1.446834921836853, "logits/rejected": -1.4925295114517212, "logps/chosen": -214.97691345214844, "logps/rejected": -263.9254455566406, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 0.2647384703159332, "rewards/margins": 4.96875, "rewards/rejected": -4.7040114402771, "step": 3817 }, { "epoch": 0.85, "learning_rate": 9.904537352327968e-06, "logits/chosen": -1.5161463022232056, "logits/rejected": -1.4805270433425903, "logps/chosen": -114.09710693359375, "logps/rejected": -172.86746215820312, "loss": 0.2332, "rewards/accuracies": 1.0, "rewards/chosen": 1.077950358390808, "rewards/margins": 2.090679168701172, "rewards/rejected": -1.0127289295196533, "step": 3818 }, { "epoch": 0.85, "learning_rate": 9.904188474181637e-06, "logits/chosen": -1.5357824563980103, "logits/rejected": -1.5624761581420898, "logps/chosen": -109.92008209228516, "logps/rejected": -114.04396057128906, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -3.3087050914764404, "rewards/margins": 4.476138114929199, "rewards/rejected": -7.784843444824219, "step": 3819 }, { "epoch": 0.85, "learning_rate": 9.903838965858877e-06, "logits/chosen": -1.1290721893310547, "logits/rejected": -1.095674991607666, "logps/chosen": -123.05329895019531, "logps/rejected": -156.34695434570312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.8771514892578125, "rewards/margins": 5.906901836395264, "rewards/rejected": -5.029750347137451, "step": 3820 }, { "epoch": 0.85, "learning_rate": 9.9034888274046e-06, "logits/chosen": -0.8671870827674866, "logits/rejected": -0.7978402972221375, "logps/chosen": -211.94039916992188, "logps/rejected": -314.3192443847656, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -3.2436859607696533, "rewards/margins": 3.877716302871704, "rewards/rejected": -7.121402263641357, "step": 3821 }, { "epoch": 0.85, "learning_rate": 9.903138058863793e-06, "logits/chosen": -1.0056127309799194, "logits/rejected": -0.9560148119926453, "logps/chosen": -152.2825164794922, "logps/rejected": -182.7528076171875, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -2.0270402431488037, "rewards/margins": 4.45957088470459, "rewards/rejected": -6.486611366271973, "step": 3822 }, { "epoch": 0.85, "learning_rate": 9.902786660281533e-06, "logits/chosen": -1.1859179735183716, "logits/rejected": -1.1237711906433105, "logps/chosen": -222.45338439941406, "logps/rejected": -235.71871948242188, "loss": 0.4586, "rewards/accuracies": 0.0, "rewards/chosen": 0.3084045350551605, "rewards/margins": -0.39471742510795593, "rewards/rejected": 0.7031219601631165, "step": 3823 }, { "epoch": 0.85, "learning_rate": 9.902434631702976e-06, "logits/chosen": -0.9556702971458435, "logits/rejected": -0.9234536290168762, "logps/chosen": -170.66961669921875, "logps/rejected": -187.0821990966797, "loss": 0.5434, "rewards/accuracies": 0.0, "rewards/chosen": -2.1005921363830566, "rewards/margins": -0.6731095314025879, "rewards/rejected": -1.4274826049804688, "step": 3824 }, { "epoch": 0.85, "learning_rate": 9.902081973173352e-06, "logits/chosen": -1.4631966352462769, "logits/rejected": -1.549017310142517, "logps/chosen": -80.5155029296875, "logps/rejected": -69.52095031738281, "loss": 0.061, "rewards/accuracies": 1.0, "rewards/chosen": -0.49578019976615906, "rewards/margins": 4.769109725952148, "rewards/rejected": -5.264889717102051, "step": 3825 }, { "epoch": 0.85, "learning_rate": 9.901728684737977e-06, "logits/chosen": -1.2926039695739746, "logits/rejected": -1.1862003803253174, "logps/chosen": -152.70750427246094, "logps/rejected": -326.3726806640625, "loss": 0.3628, "rewards/accuracies": 1.0, "rewards/chosen": -6.36349630355835, "rewards/margins": 0.08693695068359375, "rewards/rejected": -6.450433254241943, "step": 3826 }, { "epoch": 0.85, "learning_rate": 9.901374766442252e-06, "logits/chosen": -1.3180744647979736, "logits/rejected": -1.272279143333435, "logps/chosen": -35.84959411621094, "logps/rejected": -212.61219787597656, "loss": 0.4501, "rewards/accuracies": 1.0, "rewards/chosen": -0.49727746844291687, "rewards/margins": 4.061041831970215, "rewards/rejected": -4.558319091796875, "step": 3827 }, { "epoch": 0.85, "learning_rate": 9.901020218331652e-06, "logits/chosen": -1.4745664596557617, "logits/rejected": -1.675467610359192, "logps/chosen": -115.9966049194336, "logps/rejected": -69.50794219970703, "loss": 0.1437, "rewards/accuracies": 1.0, "rewards/chosen": -3.035693407058716, "rewards/margins": 1.116734266281128, "rewards/rejected": -4.152427673339844, "step": 3828 }, { "epoch": 0.85, "learning_rate": 9.900665040451735e-06, "logits/chosen": -1.2172632217407227, "logits/rejected": -1.3122024536132812, "logps/chosen": -258.0667724609375, "logps/rejected": -173.20635986328125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.4079040586948395, "rewards/margins": 7.29805326461792, "rewards/rejected": -6.890149116516113, "step": 3829 }, { "epoch": 0.85, "learning_rate": 9.90030923284814e-06, "logits/chosen": -0.8637695908546448, "logits/rejected": -0.4250846803188324, "logps/chosen": -112.65404510498047, "logps/rejected": -592.0634765625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.051612138748169, "rewards/margins": 39.2370719909668, "rewards/rejected": -40.2886848449707, "step": 3830 }, { "epoch": 0.85, "learning_rate": 9.89995279556659e-06, "logits/chosen": -0.9269997477531433, "logits/rejected": -0.9459945559501648, "logps/chosen": -180.59976196289062, "logps/rejected": -78.35419464111328, "loss": 0.1716, "rewards/accuracies": 1.0, "rewards/chosen": -4.3714141845703125, "rewards/margins": 0.9874267578125, "rewards/rejected": -5.3588409423828125, "step": 3831 }, { "epoch": 0.85, "learning_rate": 9.899595728652883e-06, "logits/chosen": -0.938117504119873, "logits/rejected": -1.0102440118789673, "logps/chosen": -142.17507934570312, "logps/rejected": -190.96826171875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.5685211420059204, "rewards/margins": 5.18060302734375, "rewards/rejected": -6.749124050140381, "step": 3832 }, { "epoch": 0.85, "learning_rate": 9.899238032152907e-06, "logits/chosen": -1.042241096496582, "logits/rejected": -1.0404469966888428, "logps/chosen": -66.89971923828125, "logps/rejected": -63.43632125854492, "loss": 0.3435, "rewards/accuracies": 1.0, "rewards/chosen": -0.7339927554130554, "rewards/margins": 0.020542144775390625, "rewards/rejected": -0.754534900188446, "step": 3833 }, { "epoch": 0.85, "learning_rate": 9.898879706112618e-06, "logits/chosen": -1.0588184595108032, "logits/rejected": -1.0973899364471436, "logps/chosen": -112.54693603515625, "logps/rejected": -127.37208557128906, "loss": 0.2242, "rewards/accuracies": 1.0, "rewards/chosen": 0.7243087887763977, "rewards/margins": 0.570819079875946, "rewards/rejected": 0.15348969399929047, "step": 3834 }, { "epoch": 0.85, "learning_rate": 9.898520750578065e-06, "logits/chosen": -0.9275575876235962, "logits/rejected": -0.9436686635017395, "logps/chosen": -141.94528198242188, "logps/rejected": -161.94735717773438, "loss": 0.8285, "rewards/accuracies": 0.0, "rewards/chosen": -0.679333508014679, "rewards/margins": -0.03281557559967041, "rewards/rejected": -0.6465179324150085, "step": 3835 }, { "epoch": 0.85, "learning_rate": 9.898161165595371e-06, "logits/chosen": -1.0328892469406128, "logits/rejected": -1.0174520015716553, "logps/chosen": -115.24052429199219, "logps/rejected": -172.51266479492188, "loss": 0.0841, "rewards/accuracies": 1.0, "rewards/chosen": -0.2845146358013153, "rewards/margins": 2.093869686126709, "rewards/rejected": -2.3783843517303467, "step": 3836 }, { "epoch": 0.85, "learning_rate": 9.897800951210741e-06, "logits/chosen": -1.4659981727600098, "logits/rejected": -1.5638591051101685, "logps/chosen": -132.6364288330078, "logps/rejected": -113.02554321289062, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.4760269224643707, "rewards/margins": 8.157308578491211, "rewards/rejected": -7.681281566619873, "step": 3837 }, { "epoch": 0.85, "learning_rate": 9.897440107470463e-06, "logits/chosen": -1.1426100730895996, "logits/rejected": -1.0822633504867554, "logps/chosen": -209.27749633789062, "logps/rejected": -277.8056335449219, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/chosen": 1.0776245594024658, "rewards/margins": 7.603629112243652, "rewards/rejected": -6.526004314422607, "step": 3838 }, { "epoch": 0.85, "learning_rate": 9.897078634420905e-06, "logits/chosen": -1.0603948831558228, "logits/rejected": -1.0488982200622559, "logps/chosen": -107.76625061035156, "logps/rejected": -170.50466918945312, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -2.34869384765625, "rewards/margins": 3.129690647125244, "rewards/rejected": -5.478384494781494, "step": 3839 }, { "epoch": 0.85, "learning_rate": 9.896716532108515e-06, "logits/chosen": -1.0799659490585327, "logits/rejected": -0.3196706175804138, "logps/chosen": -89.50204467773438, "logps/rejected": -291.52484130859375, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": 1.128315806388855, "rewards/margins": 21.694032669067383, "rewards/rejected": -20.565717697143555, "step": 3840 }, { "epoch": 0.85, "learning_rate": 9.896353800579823e-06, "logits/chosen": -1.0633354187011719, "logits/rejected": -1.1958175897598267, "logps/chosen": -226.98486328125, "logps/rejected": -257.6324768066406, "loss": 0.0926, "rewards/accuracies": 1.0, "rewards/chosen": -5.156707763671875, "rewards/margins": 12.487020492553711, "rewards/rejected": -17.643728256225586, "step": 3841 }, { "epoch": 0.85, "learning_rate": 9.895990439881436e-06, "logits/chosen": -0.78118896484375, "logits/rejected": -0.7992210388183594, "logps/chosen": -167.98089599609375, "logps/rejected": -126.34481048583984, "loss": 0.9628, "rewards/accuracies": 0.0, "rewards/chosen": -1.9318665266036987, "rewards/margins": -0.9671463370323181, "rewards/rejected": -0.9647201895713806, "step": 3842 }, { "epoch": 0.85, "learning_rate": 9.895626450060047e-06, "logits/chosen": -1.2423288822174072, "logits/rejected": -1.2390445470809937, "logps/chosen": -81.62862396240234, "logps/rejected": -62.809364318847656, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.270028680562973, "rewards/margins": 3.5627126693725586, "rewards/rejected": -3.8327412605285645, "step": 3843 }, { "epoch": 0.85, "learning_rate": 9.89526183116243e-06, "logits/chosen": -1.0890662670135498, "logits/rejected": -0.7976745963096619, "logps/chosen": -180.97613525390625, "logps/rejected": -537.2390747070312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.812103271484375, "rewards/margins": 46.9720344543457, "rewards/rejected": -46.15993118286133, "step": 3844 }, { "epoch": 0.85, "learning_rate": 9.894896583235434e-06, "logits/chosen": -1.0821553468704224, "logits/rejected": -1.0821020603179932, "logps/chosen": -103.5157470703125, "logps/rejected": -113.89049530029297, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -2.8550820350646973, "rewards/margins": 3.4180421829223633, "rewards/rejected": -6.2731242179870605, "step": 3845 }, { "epoch": 0.85, "learning_rate": 9.894530706325994e-06, "logits/chosen": -0.9601417183876038, "logits/rejected": -0.9601417183876038, "logps/chosen": -142.30178833007812, "logps/rejected": -142.30178833007812, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -1.5931122303009033, "rewards/margins": 0.0, "rewards/rejected": -1.5931122303009033, "step": 3846 }, { "epoch": 0.85, "learning_rate": 9.894164200481124e-06, "logits/chosen": -0.9682796001434326, "logits/rejected": -1.027513027191162, "logps/chosen": -193.60552978515625, "logps/rejected": -92.3826904296875, "loss": 0.1435, "rewards/accuracies": 1.0, "rewards/chosen": -0.07716064900159836, "rewards/margins": 6.022885799407959, "rewards/rejected": -6.100046634674072, "step": 3847 }, { "epoch": 0.85, "learning_rate": 9.89379706574792e-06, "logits/chosen": -0.8410792946815491, "logits/rejected": -0.8410792946815491, "logps/chosen": -117.19523620605469, "logps/rejected": -117.19523620605469, "loss": 0.3472, "rewards/accuracies": 0.0, "rewards/chosen": -2.2586236000061035, "rewards/margins": 0.0, "rewards/rejected": -2.2586236000061035, "step": 3848 }, { "epoch": 0.85, "learning_rate": 9.893429302173558e-06, "logits/chosen": -1.0929604768753052, "logits/rejected": -1.1447104215621948, "logps/chosen": -98.66560363769531, "logps/rejected": -180.294189453125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.9491737484931946, "rewards/margins": 9.11133098602295, "rewards/rejected": -8.16215705871582, "step": 3849 }, { "epoch": 0.85, "learning_rate": 9.893060909805294e-06, "logits/chosen": -1.2140122652053833, "logits/rejected": -1.2140122652053833, "logps/chosen": -121.37527465820312, "logps/rejected": -121.37527465820312, "loss": 0.3592, "rewards/accuracies": 0.0, "rewards/chosen": -3.3337600231170654, "rewards/margins": 0.0, "rewards/rejected": -3.3337600231170654, "step": 3850 }, { "epoch": 0.85, "learning_rate": 9.892691888690466e-06, "logits/chosen": -1.1338902711868286, "logits/rejected": -1.2838417291641235, "logps/chosen": -248.97105407714844, "logps/rejected": -148.97457885742188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.7622634768486023, "rewards/margins": 10.238049507141113, "rewards/rejected": -9.475786209106445, "step": 3851 }, { "epoch": 0.85, "learning_rate": 9.892322238876492e-06, "logits/chosen": -0.79905766248703, "logits/rejected": -0.7885413765907288, "logps/chosen": -106.41239929199219, "logps/rejected": -110.32292175292969, "loss": 0.4314, "rewards/accuracies": 1.0, "rewards/chosen": -3.3105735778808594, "rewards/margins": 0.030439138412475586, "rewards/rejected": -3.341012716293335, "step": 3852 }, { "epoch": 0.85, "learning_rate": 9.89195196041087e-06, "logits/chosen": -1.0207680463790894, "logits/rejected": -0.9971666932106018, "logps/chosen": -149.25802612304688, "logps/rejected": -141.6878662109375, "loss": 0.1738, "rewards/accuracies": 1.0, "rewards/chosen": -0.5742706656455994, "rewards/margins": 1.179192304611206, "rewards/rejected": -1.7534630298614502, "step": 3853 }, { "epoch": 0.85, "learning_rate": 9.891581053341182e-06, "logits/chosen": -0.9638588428497314, "logits/rejected": -1.0383177995681763, "logps/chosen": -160.73013305664062, "logps/rejected": -158.86639404296875, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": 1.5146652460098267, "rewards/margins": 3.5817551612854004, "rewards/rejected": -2.067089796066284, "step": 3854 }, { "epoch": 0.85, "learning_rate": 9.891209517715088e-06, "logits/chosen": -0.8879330158233643, "logits/rejected": -0.8939452171325684, "logps/chosen": -121.5058364868164, "logps/rejected": -179.66183471679688, "loss": 0.4346, "rewards/accuracies": 1.0, "rewards/chosen": 0.5431548953056335, "rewards/margins": 5.343985080718994, "rewards/rejected": -4.800830364227295, "step": 3855 }, { "epoch": 0.85, "learning_rate": 9.890837353580327e-06, "logits/chosen": -0.934421181678772, "logits/rejected": -0.9534350037574768, "logps/chosen": -86.21031951904297, "logps/rejected": -60.499691009521484, "loss": 0.2115, "rewards/accuracies": 1.0, "rewards/chosen": -1.1310760974884033, "rewards/margins": 0.6416527032852173, "rewards/rejected": -1.7727288007736206, "step": 3856 }, { "epoch": 0.85, "learning_rate": 9.890464560984725e-06, "logits/chosen": -1.5377075672149658, "logits/rejected": -1.598192572593689, "logps/chosen": -97.85269165039062, "logps/rejected": -132.11912536621094, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.4879867732524872, "rewards/margins": 5.079280376434326, "rewards/rejected": -5.567266941070557, "step": 3857 }, { "epoch": 0.85, "learning_rate": 9.890091139976183e-06, "logits/chosen": -0.9837865233421326, "logits/rejected": -1.0117262601852417, "logps/chosen": -76.13248443603516, "logps/rejected": -63.46776580810547, "loss": 0.142, "rewards/accuracies": 1.0, "rewards/chosen": -3.174697160720825, "rewards/margins": 1.131446123123169, "rewards/rejected": -4.306143283843994, "step": 3858 }, { "epoch": 0.85, "learning_rate": 9.889717090602685e-06, "logits/chosen": -1.0401297807693481, "logits/rejected": -1.0487419366836548, "logps/chosen": -104.53996276855469, "logps/rejected": -184.12619018554688, "loss": 0.2105, "rewards/accuracies": 1.0, "rewards/chosen": 0.0016540527576580644, "rewards/margins": 0.6976165771484375, "rewards/rejected": -0.6959625482559204, "step": 3859 }, { "epoch": 0.85, "learning_rate": 9.889342412912296e-06, "logits/chosen": -1.195175290107727, "logits/rejected": -1.2468717098236084, "logps/chosen": -231.15652465820312, "logps/rejected": -206.7525634765625, "loss": 0.1288, "rewards/accuracies": 1.0, "rewards/chosen": -0.371224969625473, "rewards/margins": 5.871440410614014, "rewards/rejected": -6.2426652908325195, "step": 3860 }, { "epoch": 0.85, "learning_rate": 9.88896710695316e-06, "logits/chosen": -1.2673392295837402, "logits/rejected": -1.275634765625, "logps/chosen": -122.85550689697266, "logps/rejected": -134.2736358642578, "loss": 0.0446, "rewards/accuracies": 1.0, "rewards/chosen": -1.2959831953048706, "rewards/margins": 2.3732352256774902, "rewards/rejected": -3.6692185401916504, "step": 3861 }, { "epoch": 0.85, "learning_rate": 9.888591172773502e-06, "logits/chosen": -1.056325912475586, "logits/rejected": -1.0719261169433594, "logps/chosen": -84.40830993652344, "logps/rejected": -94.72667694091797, "loss": 0.0429, "rewards/accuracies": 1.0, "rewards/chosen": -0.7842170596122742, "rewards/margins": 2.4118218421936035, "rewards/rejected": -3.1960389614105225, "step": 3862 }, { "epoch": 0.86, "learning_rate": 9.888214610421633e-06, "logits/chosen": -1.1473721265792847, "logits/rejected": -1.1473721265792847, "logps/chosen": -126.31232452392578, "logps/rejected": -126.31232452392578, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": 0.7957755923271179, "rewards/margins": 0.0, "rewards/rejected": 0.7957755923271179, "step": 3863 }, { "epoch": 0.86, "learning_rate": 9.887837419945937e-06, "logits/chosen": -1.1553996801376343, "logits/rejected": -1.103761911392212, "logps/chosen": -142.39488220214844, "logps/rejected": -107.07022094726562, "loss": 0.3489, "rewards/accuracies": 1.0, "rewards/chosen": -0.7755767703056335, "rewards/margins": 0.9385033249855042, "rewards/rejected": -1.7140800952911377, "step": 3864 }, { "epoch": 0.86, "learning_rate": 9.887459601394881e-06, "logits/chosen": -1.1460068225860596, "logits/rejected": -1.341188669204712, "logps/chosen": -243.14047241210938, "logps/rejected": -67.83958435058594, "loss": 0.1364, "rewards/accuracies": 1.0, "rewards/chosen": 1.3321411609649658, "rewards/margins": 4.516887664794922, "rewards/rejected": -3.184746265411377, "step": 3865 }, { "epoch": 0.86, "learning_rate": 9.887081154817015e-06, "logits/chosen": -0.9159684777259827, "logits/rejected": -0.9735379815101624, "logps/chosen": -85.81011199951172, "logps/rejected": -86.53578186035156, "loss": 0.3546, "rewards/accuracies": 1.0, "rewards/chosen": -1.662940263748169, "rewards/margins": 4.118708610534668, "rewards/rejected": -5.781649112701416, "step": 3866 }, { "epoch": 0.86, "learning_rate": 9.88670208026097e-06, "logits/chosen": -0.9912461042404175, "logits/rejected": -0.9336175918579102, "logps/chosen": -105.905517578125, "logps/rejected": -267.67010498046875, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": 1.0566933155059814, "rewards/margins": 6.304781913757324, "rewards/rejected": -5.248088359832764, "step": 3867 }, { "epoch": 0.86, "learning_rate": 9.886322377775455e-06, "logits/chosen": -1.2102137804031372, "logits/rejected": -1.1499251127243042, "logps/chosen": -96.46772766113281, "logps/rejected": -177.5230712890625, "loss": 0.4522, "rewards/accuracies": 0.0, "rewards/chosen": -0.14323273301124573, "rewards/margins": -0.3855224847793579, "rewards/rejected": 0.242289736866951, "step": 3868 }, { "epoch": 0.86, "learning_rate": 9.885942047409262e-06, "logits/chosen": -1.3625823259353638, "logits/rejected": -1.3617788553237915, "logps/chosen": -67.27536010742188, "logps/rejected": -148.47882080078125, "loss": 1.1231, "rewards/accuracies": 1.0, "rewards/chosen": -0.06886138767004013, "rewards/margins": 4.067633628845215, "rewards/rejected": -4.136495113372803, "step": 3869 }, { "epoch": 0.86, "learning_rate": 9.885561089211259e-06, "logits/chosen": -1.1420459747314453, "logits/rejected": -1.1177973747253418, "logps/chosen": -92.340087890625, "logps/rejected": -160.4754180908203, "loss": 0.2267, "rewards/accuracies": 1.0, "rewards/chosen": 0.6801597476005554, "rewards/margins": 2.064406633377075, "rewards/rejected": -1.384246826171875, "step": 3870 }, { "epoch": 0.86, "learning_rate": 9.885179503230403e-06, "logits/chosen": -1.3196632862091064, "logits/rejected": -1.4338791370391846, "logps/chosen": -170.0657958984375, "logps/rejected": -49.69096374511719, "loss": 0.4862, "rewards/accuracies": 0.0, "rewards/chosen": -0.11339416354894638, "rewards/margins": -0.3687705993652344, "rewards/rejected": 0.2553764283657074, "step": 3871 }, { "epoch": 0.86, "learning_rate": 9.884797289515723e-06, "logits/chosen": -1.427344799041748, "logits/rejected": -1.3890470266342163, "logps/chosen": -211.63528442382812, "logps/rejected": -281.101806640625, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": 3.0983550548553467, "rewards/margins": 6.246068000793457, "rewards/rejected": -3.1477127075195312, "step": 3872 }, { "epoch": 0.86, "learning_rate": 9.884414448116335e-06, "logits/chosen": -1.2856221199035645, "logits/rejected": -1.2643932104110718, "logps/chosen": -114.62968444824219, "logps/rejected": -161.01522827148438, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 0.6610275506973267, "rewards/margins": 4.579579830169678, "rewards/rejected": -3.9185523986816406, "step": 3873 }, { "epoch": 0.86, "learning_rate": 9.88403097908143e-06, "logits/chosen": -0.8747868537902832, "logits/rejected": -0.8640111684799194, "logps/chosen": -137.25279235839844, "logps/rejected": -175.0819091796875, "loss": 0.1905, "rewards/accuracies": 1.0, "rewards/chosen": 0.6264801025390625, "rewards/margins": 0.8601104617118835, "rewards/rejected": -0.23363037407398224, "step": 3874 }, { "epoch": 0.86, "learning_rate": 9.883646882460287e-06, "logits/chosen": -1.4252662658691406, "logits/rejected": -1.3904210329055786, "logps/chosen": -114.04711151123047, "logps/rejected": -133.68511962890625, "loss": 0.1236, "rewards/accuracies": 1.0, "rewards/chosen": -0.5236396789550781, "rewards/margins": 1.384626030921936, "rewards/rejected": -1.9082657098770142, "step": 3875 }, { "epoch": 0.86, "learning_rate": 9.883262158302259e-06, "logits/chosen": -0.8544196486473083, "logits/rejected": -0.8544196486473083, "logps/chosen": -110.74919891357422, "logps/rejected": -110.74919891357422, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -1.3930275440216064, "rewards/margins": 0.0, "rewards/rejected": -1.3930275440216064, "step": 3876 }, { "epoch": 0.86, "learning_rate": 9.882876806656783e-06, "logits/chosen": -1.3658934831619263, "logits/rejected": -0.8316420316696167, "logps/chosen": -143.67227172851562, "logps/rejected": -372.458984375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.2409164905548096, "rewards/margins": 23.171571731567383, "rewards/rejected": -25.41248893737793, "step": 3877 }, { "epoch": 0.86, "learning_rate": 9.882490827573375e-06, "logits/chosen": -1.1518652439117432, "logits/rejected": -1.1587022542953491, "logps/chosen": -89.06365203857422, "logps/rejected": -75.8248519897461, "loss": 0.5374, "rewards/accuracies": 1.0, "rewards/chosen": -1.1791877746582031, "rewards/margins": 4.020694255828857, "rewards/rejected": -5.1998820304870605, "step": 3878 }, { "epoch": 0.86, "learning_rate": 9.882104221101634e-06, "logits/chosen": -1.412191390991211, "logits/rejected": -1.4009767770767212, "logps/chosen": -101.13168334960938, "logps/rejected": -120.74131774902344, "loss": 0.2193, "rewards/accuracies": 1.0, "rewards/chosen": -0.2950241267681122, "rewards/margins": 0.880242109298706, "rewards/rejected": -1.1752662658691406, "step": 3879 }, { "epoch": 0.86, "learning_rate": 9.881716987291235e-06, "logits/chosen": -0.8327972292900085, "logits/rejected": -0.8976604342460632, "logps/chosen": -230.9424591064453, "logps/rejected": -133.6654815673828, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 1.3111008405685425, "rewards/margins": 5.497308731079102, "rewards/rejected": -4.1862077713012695, "step": 3880 }, { "epoch": 0.86, "learning_rate": 9.88132912619194e-06, "logits/chosen": -1.3536185026168823, "logits/rejected": -1.3536185026168823, "logps/chosen": -172.97528076171875, "logps/rejected": -172.97528076171875, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.480954170227051, "rewards/margins": 0.0, "rewards/rejected": -4.480954170227051, "step": 3881 }, { "epoch": 0.86, "learning_rate": 9.880940637853585e-06, "logits/chosen": -1.171146273612976, "logits/rejected": -1.171146273612976, "logps/chosen": -169.04193115234375, "logps/rejected": -169.04193115234375, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.475781440734863, "rewards/margins": 0.0, "rewards/rejected": -5.475781440734863, "step": 3882 }, { "epoch": 0.86, "learning_rate": 9.880551522326093e-06, "logits/chosen": -1.1457524299621582, "logits/rejected": -1.121645450592041, "logps/chosen": -92.919189453125, "logps/rejected": -187.8372039794922, "loss": 0.7097, "rewards/accuracies": 1.0, "rewards/chosen": -0.007244873326271772, "rewards/margins": 0.06056976690888405, "rewards/rejected": -0.06781464070081711, "step": 3883 }, { "epoch": 0.86, "learning_rate": 9.880161779659463e-06, "logits/chosen": -1.1664707660675049, "logits/rejected": -1.4676258563995361, "logps/chosen": -224.34835815429688, "logps/rejected": -103.97267150878906, "loss": 0.1568, "rewards/accuracies": 1.0, "rewards/chosen": -0.3514816462993622, "rewards/margins": 6.4519124031066895, "rewards/rejected": -6.803393840789795, "step": 3884 }, { "epoch": 0.86, "learning_rate": 9.879771409903775e-06, "logits/chosen": -1.3053961992263794, "logits/rejected": -1.4535564184188843, "logps/chosen": -159.97869873046875, "logps/rejected": -149.13401794433594, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": 0.6092270016670227, "rewards/margins": 5.526491165161133, "rewards/rejected": -4.917263984680176, "step": 3885 }, { "epoch": 0.86, "learning_rate": 9.879380413109193e-06, "logits/chosen": -1.490006446838379, "logits/rejected": -1.4563955068588257, "logps/chosen": -75.7843246459961, "logps/rejected": -153.33570861816406, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/chosen": -2.6456456184387207, "rewards/margins": 3.8049182891845703, "rewards/rejected": -6.450563907623291, "step": 3886 }, { "epoch": 0.86, "learning_rate": 9.878988789325955e-06, "logits/chosen": -1.4332619905471802, "logits/rejected": -1.542277216911316, "logps/chosen": -204.62164306640625, "logps/rejected": -150.99392700195312, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 2.5125367641448975, "rewards/margins": 7.672621726989746, "rewards/rejected": -5.1600847244262695, "step": 3887 }, { "epoch": 0.86, "learning_rate": 9.878596538604388e-06, "logits/chosen": -1.32844877243042, "logits/rejected": -1.2612167596817017, "logps/chosen": -82.23495483398438, "logps/rejected": -195.525390625, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -0.2381736785173416, "rewards/margins": 3.2635841369628906, "rewards/rejected": -3.501757860183716, "step": 3888 }, { "epoch": 0.86, "learning_rate": 9.878203660994894e-06, "logits/chosen": -1.0946221351623535, "logits/rejected": -1.014190673828125, "logps/chosen": -138.67457580566406, "logps/rejected": -317.97674560546875, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -0.02470703236758709, "rewards/margins": 13.984875679016113, "rewards/rejected": -14.00958251953125, "step": 3889 }, { "epoch": 0.86, "learning_rate": 9.877810156547956e-06, "logits/chosen": -1.1413798332214355, "logits/rejected": -1.1413798332214355, "logps/chosen": -195.96206665039062, "logps/rejected": -195.96206665039062, "loss": 0.7512, "rewards/accuracies": 0.0, "rewards/chosen": -5.2149553298950195, "rewards/margins": 0.0, "rewards/rejected": -5.2149553298950195, "step": 3890 }, { "epoch": 0.86, "learning_rate": 9.877416025314139e-06, "logits/chosen": -1.0389219522476196, "logits/rejected": -0.9913609623908997, "logps/chosen": -182.59146118164062, "logps/rejected": -242.86868286132812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 1.1538788080215454, "rewards/margins": 12.27513313293457, "rewards/rejected": -11.121253967285156, "step": 3891 }, { "epoch": 0.86, "learning_rate": 9.877021267344087e-06, "logits/chosen": -1.218216896057129, "logits/rejected": -1.218216896057129, "logps/chosen": -279.84649658203125, "logps/rejected": -279.84649658203125, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -1.4111053943634033, "rewards/margins": 0.0, "rewards/rejected": -1.4111053943634033, "step": 3892 }, { "epoch": 0.86, "learning_rate": 9.876625882688526e-06, "logits/chosen": -0.9322617053985596, "logits/rejected": -0.4790053963661194, "logps/chosen": -216.50466918945312, "logps/rejected": -563.5626220703125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.41827699542045593, "rewards/margins": 45.66299057006836, "rewards/rejected": -45.244712829589844, "step": 3893 }, { "epoch": 0.86, "learning_rate": 9.876229871398263e-06, "logits/chosen": -1.2244991064071655, "logits/rejected": -1.198566198348999, "logps/chosen": -223.20132446289062, "logps/rejected": -188.66481018066406, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": 0.6865783929824829, "rewards/margins": 2.7539567947387695, "rewards/rejected": -2.067378282546997, "step": 3894 }, { "epoch": 0.86, "learning_rate": 9.875833233524183e-06, "logits/chosen": -1.2318272590637207, "logits/rejected": -1.2457077503204346, "logps/chosen": -104.1661148071289, "logps/rejected": -127.76008605957031, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": 0.2483467161655426, "rewards/margins": 6.047454833984375, "rewards/rejected": -5.799108028411865, "step": 3895 }, { "epoch": 0.86, "learning_rate": 9.875435969117254e-06, "logits/chosen": -0.9294084906578064, "logits/rejected": -0.8662672638893127, "logps/chosen": -60.97484588623047, "logps/rejected": -148.93304443359375, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": 0.1882476806640625, "rewards/margins": 3.848555088043213, "rewards/rejected": -3.6603074073791504, "step": 3896 }, { "epoch": 0.86, "learning_rate": 9.875038078228522e-06, "logits/chosen": -1.1963640451431274, "logits/rejected": -1.1818081140518188, "logps/chosen": -271.5181884765625, "logps/rejected": -295.4068603515625, "loss": 0.2273, "rewards/accuracies": 1.0, "rewards/chosen": -2.4413392543792725, "rewards/margins": 0.5523223876953125, "rewards/rejected": -2.993661642074585, "step": 3897 }, { "epoch": 0.86, "learning_rate": 9.874639560909118e-06, "logits/chosen": -1.342803955078125, "logits/rejected": -1.282545804977417, "logps/chosen": -113.56137084960938, "logps/rejected": -243.56341552734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.4811569154262543, "rewards/margins": 6.915977478027344, "rewards/rejected": -6.434820652008057, "step": 3898 }, { "epoch": 0.86, "learning_rate": 9.87424041721025e-06, "logits/chosen": -1.0906331539154053, "logits/rejected": -0.6107569336891174, "logps/chosen": -218.864990234375, "logps/rejected": -657.8923950195312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.644580066204071, "rewards/margins": 46.550811767578125, "rewards/rejected": -47.19539260864258, "step": 3899 }, { "epoch": 0.86, "learning_rate": 9.873840647183204e-06, "logits/chosen": -1.3662917613983154, "logits/rejected": -1.4005861282348633, "logps/chosen": -167.34291076660156, "logps/rejected": -65.49754333496094, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": 0.6904007196426392, "rewards/margins": 4.857569694519043, "rewards/rejected": -4.167169094085693, "step": 3900 }, { "epoch": 0.86, "learning_rate": 9.87344025087935e-06, "logits/chosen": -1.5902884006500244, "logits/rejected": -1.4896886348724365, "logps/chosen": -95.0378189086914, "logps/rejected": -232.74560546875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.2485625743865967, "rewards/margins": 5.508369445800781, "rewards/rejected": -8.756932258605957, "step": 3901 }, { "epoch": 0.86, "learning_rate": 9.87303922835014e-06, "logits/chosen": -1.2942068576812744, "logits/rejected": -1.3530534505844116, "logps/chosen": -236.27352905273438, "logps/rejected": -184.39710998535156, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.0150421857833862, "rewards/margins": 4.8011932373046875, "rewards/rejected": -5.816235542297363, "step": 3902 }, { "epoch": 0.86, "learning_rate": 9.872637579647105e-06, "logits/chosen": -1.4021986722946167, "logits/rejected": -1.4160833358764648, "logps/chosen": -97.0872573852539, "logps/rejected": -106.65250396728516, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -1.4274482727050781, "rewards/margins": 3.6987290382385254, "rewards/rejected": -5.1261773109436035, "step": 3903 }, { "epoch": 0.86, "learning_rate": 9.872235304821853e-06, "logits/chosen": -1.0036590099334717, "logits/rejected": -0.8686898350715637, "logps/chosen": -113.27272033691406, "logps/rejected": -180.31927490234375, "loss": 0.4418, "rewards/accuracies": 0.0, "rewards/chosen": -5.088475704193115, "rewards/margins": -0.34511613845825195, "rewards/rejected": -4.743359565734863, "step": 3904 }, { "epoch": 0.86, "learning_rate": 9.871832403926077e-06, "logits/chosen": -1.418121099472046, "logits/rejected": -1.418121099472046, "logps/chosen": -87.29067993164062, "logps/rejected": -87.29067993164062, "loss": 0.5142, "rewards/accuracies": 0.0, "rewards/chosen": -0.7627243399620056, "rewards/margins": 0.0, "rewards/rejected": -0.7627243399620056, "step": 3905 }, { "epoch": 0.86, "learning_rate": 9.871428877011549e-06, "logits/chosen": -1.5096040964126587, "logits/rejected": -1.5203996896743774, "logps/chosen": -104.83984375, "logps/rejected": -158.383056640625, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.238337755203247, "rewards/margins": 7.045241355895996, "rewards/rejected": -8.283578872680664, "step": 3906 }, { "epoch": 0.86, "learning_rate": 9.87102472413012e-06, "logits/chosen": -0.8863471150398254, "logits/rejected": -0.8473560810089111, "logps/chosen": -209.4044647216797, "logps/rejected": -387.76141357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.455397129058838, "rewards/margins": 11.11595344543457, "rewards/rejected": -8.660555839538574, "step": 3907 }, { "epoch": 0.86, "learning_rate": 9.870619945333727e-06, "logits/chosen": -1.474152684211731, "logits/rejected": -1.411015272140503, "logps/chosen": -123.59100341796875, "logps/rejected": -219.53880310058594, "loss": 0.653, "rewards/accuracies": 0.0, "rewards/chosen": -5.783118724822998, "rewards/margins": -0.9666652679443359, "rewards/rejected": -4.816453456878662, "step": 3908 }, { "epoch": 0.87, "learning_rate": 9.870214540674377e-06, "logits/chosen": -0.8362870812416077, "logits/rejected": -0.8641862869262695, "logps/chosen": -90.05043029785156, "logps/rejected": -96.75790405273438, "loss": 0.2071, "rewards/accuracies": 1.0, "rewards/chosen": -1.3846282958984375, "rewards/margins": 3.6486973762512207, "rewards/rejected": -5.033325672149658, "step": 3909 }, { "epoch": 0.87, "learning_rate": 9.869808510204165e-06, "logits/chosen": -1.3750224113464355, "logits/rejected": -1.2865113019943237, "logps/chosen": -130.44317626953125, "logps/rejected": -219.36041259765625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.36443328857421875, "rewards/margins": 5.117059230804443, "rewards/rejected": -5.481492519378662, "step": 3910 }, { "epoch": 0.87, "learning_rate": 9.869401853975268e-06, "logits/chosen": -1.2737239599227905, "logits/rejected": -1.2749180793762207, "logps/chosen": -86.92662048339844, "logps/rejected": -143.1328125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.03221283107995987, "rewards/margins": 5.507748603820801, "rewards/rejected": -5.539961338043213, "step": 3911 }, { "epoch": 0.87, "learning_rate": 9.868994572039939e-06, "logits/chosen": -1.1036348342895508, "logits/rejected": -1.2383559942245483, "logps/chosen": -205.5600128173828, "logps/rejected": -216.92074584960938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.8069869875907898, "rewards/margins": 9.351661682128906, "rewards/rejected": -8.54467487335205, "step": 3912 }, { "epoch": 0.87, "learning_rate": 9.86858666445051e-06, "logits/chosen": -1.3117791414260864, "logits/rejected": -1.342928171157837, "logps/chosen": -94.47232055664062, "logps/rejected": -114.91629028320312, "loss": 0.476, "rewards/accuracies": 1.0, "rewards/chosen": -0.3696250915527344, "rewards/margins": 3.8026232719421387, "rewards/rejected": -4.172248363494873, "step": 3913 }, { "epoch": 0.87, "learning_rate": 9.8681781312594e-06, "logits/chosen": -1.220800757408142, "logits/rejected": -1.2813634872436523, "logps/chosen": -120.90690612792969, "logps/rejected": -193.6513214111328, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.8246612548828125, "rewards/margins": 12.033281326293945, "rewards/rejected": -12.857942581176758, "step": 3914 }, { "epoch": 0.87, "learning_rate": 9.867768972519101e-06, "logits/chosen": -1.3977599143981934, "logits/rejected": -1.3972152471542358, "logps/chosen": -190.68154907226562, "logps/rejected": -349.8013000488281, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 2.642263889312744, "rewards/margins": 11.83564567565918, "rewards/rejected": -9.193381309509277, "step": 3915 }, { "epoch": 0.87, "learning_rate": 9.867359188282193e-06, "logits/chosen": -1.0907994508743286, "logits/rejected": -1.0907994508743286, "logps/chosen": -147.66299438476562, "logps/rejected": -147.66299438476562, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.357495069503784, "rewards/margins": 0.0, "rewards/rejected": -2.357495069503784, "step": 3916 }, { "epoch": 0.87, "learning_rate": 9.86694877860133e-06, "logits/chosen": -1.409332275390625, "logits/rejected": -1.4434009790420532, "logps/chosen": -160.78512573242188, "logps/rejected": -144.90994262695312, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": -1.9348785877227783, "rewards/margins": 2.91086745262146, "rewards/rejected": -4.845746040344238, "step": 3917 }, { "epoch": 0.87, "learning_rate": 9.866537743529247e-06, "logits/chosen": -1.3524866104125977, "logits/rejected": -1.3748070001602173, "logps/chosen": -115.42160034179688, "logps/rejected": -91.39266967773438, "loss": 0.448, "rewards/accuracies": 1.0, "rewards/chosen": 0.15957413613796234, "rewards/margins": 1.4916954040527344, "rewards/rejected": -1.3321212530136108, "step": 3918 }, { "epoch": 0.87, "learning_rate": 9.866126083118765e-06, "logits/chosen": -1.1536378860473633, "logits/rejected": -1.2189977169036865, "logps/chosen": -184.86634826660156, "logps/rejected": -176.64215087890625, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": 0.9998337030410767, "rewards/margins": 7.479516506195068, "rewards/rejected": -6.479682922363281, "step": 3919 }, { "epoch": 0.87, "learning_rate": 9.865713797422778e-06, "logits/chosen": -1.2025078535079956, "logits/rejected": -1.1558115482330322, "logps/chosen": -83.08853912353516, "logps/rejected": -170.63333129882812, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.8402275443077087, "rewards/margins": 6.097846031188965, "rewards/rejected": -6.938073635101318, "step": 3920 }, { "epoch": 0.87, "learning_rate": 9.865300886494264e-06, "logits/chosen": -1.7721378803253174, "logits/rejected": -1.7562540769577026, "logps/chosen": -120.60360717773438, "logps/rejected": -117.62228393554688, "loss": 0.1102, "rewards/accuracies": 1.0, "rewards/chosen": -1.4914398193359375, "rewards/margins": 1.7439064979553223, "rewards/rejected": -3.2353463172912598, "step": 3921 }, { "epoch": 0.87, "learning_rate": 9.864887350386284e-06, "logits/chosen": -1.5541741847991943, "logits/rejected": -1.376197099685669, "logps/chosen": -114.17634582519531, "logps/rejected": -253.63446044921875, "loss": 0.0932, "rewards/accuracies": 1.0, "rewards/chosen": -2.847372531890869, "rewards/margins": 1.5851960182189941, "rewards/rejected": -4.432568550109863, "step": 3922 }, { "epoch": 0.87, "learning_rate": 9.864473189151972e-06, "logits/chosen": -1.6317249536514282, "logits/rejected": -1.6317249536514282, "logps/chosen": -96.88633728027344, "logps/rejected": -96.88633728027344, "loss": 0.3469, "rewards/accuracies": 0.0, "rewards/chosen": -3.80124831199646, "rewards/margins": 0.0, "rewards/rejected": -3.80124831199646, "step": 3923 }, { "epoch": 0.87, "learning_rate": 9.864058402844553e-06, "logits/chosen": -1.1229826211929321, "logits/rejected": -1.1188441514968872, "logps/chosen": -175.72674560546875, "logps/rejected": -273.0927734375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 1.4000107049942017, "rewards/margins": 5.5781450271606445, "rewards/rejected": -4.178134441375732, "step": 3924 }, { "epoch": 0.87, "learning_rate": 9.863642991517317e-06, "logits/chosen": -1.2450768947601318, "logits/rejected": -1.29301917552948, "logps/chosen": -290.0719909667969, "logps/rejected": -193.8286590576172, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 2.8705291748046875, "rewards/margins": 6.816832065582275, "rewards/rejected": -3.946302890777588, "step": 3925 }, { "epoch": 0.87, "learning_rate": 9.863226955223653e-06, "logits/chosen": -1.3868567943572998, "logits/rejected": -1.3759821653366089, "logps/chosen": -95.76229095458984, "logps/rejected": -190.81210327148438, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.5166336297988892, "rewards/margins": 7.181251525878906, "rewards/rejected": -6.664618015289307, "step": 3926 }, { "epoch": 0.87, "learning_rate": 9.862810294017014e-06, "logits/chosen": -1.3417589664459229, "logits/rejected": -1.363402247428894, "logps/chosen": -115.36885833740234, "logps/rejected": -150.95559692382812, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/chosen": -2.0992438793182373, "rewards/margins": 2.92927622795105, "rewards/rejected": -5.028520107269287, "step": 3927 }, { "epoch": 0.87, "learning_rate": 9.86239300795094e-06, "logits/chosen": -1.2160383462905884, "logits/rejected": -1.063228726387024, "logps/chosen": -159.54693603515625, "logps/rejected": -166.86813354492188, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": 3.8584868907928467, "rewards/margins": 3.875410318374634, "rewards/rejected": -0.01692352257668972, "step": 3928 }, { "epoch": 0.87, "learning_rate": 9.861975097079057e-06, "logits/chosen": -1.2817376852035522, "logits/rejected": -1.3539819717407227, "logps/chosen": -199.26708984375, "logps/rejected": -78.30328369140625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.499197393655777, "rewards/margins": 5.555060386657715, "rewards/rejected": -5.055862903594971, "step": 3929 }, { "epoch": 0.87, "learning_rate": 9.861556561455061e-06, "logits/chosen": -1.1675885915756226, "logits/rejected": -1.149095058441162, "logps/chosen": -149.64617919921875, "logps/rejected": -179.15414428710938, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -2.4336915016174316, "rewards/margins": 3.8867812156677246, "rewards/rejected": -6.320472717285156, "step": 3930 }, { "epoch": 0.87, "learning_rate": 9.861137401132733e-06, "logits/chosen": -0.9722816348075867, "logits/rejected": -0.9401031136512756, "logps/chosen": -69.95008850097656, "logps/rejected": -136.1838836669922, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": 0.07972107082605362, "rewards/margins": 4.045872688293457, "rewards/rejected": -3.966151475906372, "step": 3931 }, { "epoch": 0.87, "learning_rate": 9.860717616165934e-06, "logits/chosen": -1.2076568603515625, "logits/rejected": -1.3244822025299072, "logps/chosen": -208.53692626953125, "logps/rejected": -191.25347900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.1735504865646362, "rewards/margins": 12.915166854858398, "rewards/rejected": -11.741616249084473, "step": 3932 }, { "epoch": 0.87, "learning_rate": 9.860297206608606e-06, "logits/chosen": -1.2957972288131714, "logits/rejected": -1.2867932319641113, "logps/chosen": -83.91433715820312, "logps/rejected": -148.6277313232422, "loss": 0.0315, "rewards/accuracies": 1.0, "rewards/chosen": 0.3003067076206207, "rewards/margins": 2.7482919692993164, "rewards/rejected": -2.4479851722717285, "step": 3933 }, { "epoch": 0.87, "learning_rate": 9.859876172514773e-06, "logits/chosen": -1.0570379495620728, "logits/rejected": -1.044998049736023, "logps/chosen": -122.19586944580078, "logps/rejected": -167.23959350585938, "loss": 0.1324, "rewards/accuracies": 1.0, "rewards/chosen": -3.366283416748047, "rewards/margins": 1.1939568519592285, "rewards/rejected": -4.560240268707275, "step": 3934 }, { "epoch": 0.87, "learning_rate": 9.859454513938534e-06, "logits/chosen": -1.4096063375473022, "logits/rejected": -1.424904704093933, "logps/chosen": -190.85162353515625, "logps/rejected": -192.70956420898438, "loss": 0.0835, "rewards/accuracies": 1.0, "rewards/chosen": -0.9742431640625, "rewards/margins": 4.590442180633545, "rewards/rejected": -5.564685344696045, "step": 3935 }, { "epoch": 0.87, "learning_rate": 9.859032230934071e-06, "logits/chosen": -1.25599205493927, "logits/rejected": -1.1252647638320923, "logps/chosen": -81.53263854980469, "logps/rejected": -268.6262512207031, "loss": 1.118, "rewards/accuracies": 0.0, "rewards/chosen": -1.625161051750183, "rewards/margins": -2.0791680812835693, "rewards/rejected": 0.45400696992874146, "step": 3936 }, { "epoch": 0.87, "learning_rate": 9.858609323555646e-06, "logits/chosen": -0.8071272969245911, "logits/rejected": -0.8071272969245911, "logps/chosen": -48.81219482421875, "logps/rejected": -48.81219482421875, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -3.257927656173706, "rewards/margins": 0.0, "rewards/rejected": -3.257927656173706, "step": 3937 }, { "epoch": 0.87, "learning_rate": 9.858185791857604e-06, "logits/chosen": -1.4208475351333618, "logits/rejected": -1.3301609754562378, "logps/chosen": -122.4369888305664, "logps/rejected": -167.34194946289062, "loss": 0.3844, "rewards/accuracies": 0.0, "rewards/chosen": -2.024336338043213, "rewards/margins": -0.12114107608795166, "rewards/rejected": -1.9031952619552612, "step": 3938 }, { "epoch": 0.87, "learning_rate": 9.857761635894367e-06, "logits/chosen": -1.4502533674240112, "logits/rejected": -1.443249225616455, "logps/chosen": -120.50584411621094, "logps/rejected": -106.96001434326172, "loss": 0.1365, "rewards/accuracies": 1.0, "rewards/chosen": 0.17616501450538635, "rewards/margins": 7.9997453689575195, "rewards/rejected": -7.823580265045166, "step": 3939 }, { "epoch": 0.87, "learning_rate": 9.857336855720439e-06, "logits/chosen": -1.5827704668045044, "logits/rejected": -1.54851496219635, "logps/chosen": -130.13262939453125, "logps/rejected": -147.63499450683594, "loss": 0.1421, "rewards/accuracies": 1.0, "rewards/chosen": -3.5024964809417725, "rewards/margins": 1.1128098964691162, "rewards/rejected": -4.615306377410889, "step": 3940 }, { "epoch": 0.87, "learning_rate": 9.856911451390399e-06, "logits/chosen": -0.915321409702301, "logits/rejected": -0.89314204454422, "logps/chosen": -243.2014923095703, "logps/rejected": -256.9557800292969, "loss": 0.5861, "rewards/accuracies": 1.0, "rewards/chosen": 2.618443250656128, "rewards/margins": 7.093907356262207, "rewards/rejected": -4.4754638671875, "step": 3941 }, { "epoch": 0.87, "learning_rate": 9.856485422958913e-06, "logits/chosen": -1.1678465604782104, "logits/rejected": -1.0851513147354126, "logps/chosen": -254.78184509277344, "logps/rejected": -332.91119384765625, "loss": 0.2327, "rewards/accuracies": 1.0, "rewards/chosen": -1.340901255607605, "rewards/margins": 0.5232894420623779, "rewards/rejected": -1.864190697669983, "step": 3942 }, { "epoch": 0.87, "learning_rate": 9.856058770480726e-06, "logits/chosen": -1.1252599954605103, "logits/rejected": -1.0522956848144531, "logps/chosen": -99.79069519042969, "logps/rejected": -298.8196716308594, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.6604965329170227, "rewards/margins": 5.300010681152344, "rewards/rejected": -5.960507392883301, "step": 3943 }, { "epoch": 0.87, "learning_rate": 9.855631494010661e-06, "logits/chosen": -1.0746170282363892, "logits/rejected": -1.0546784400939941, "logps/chosen": -95.16390991210938, "logps/rejected": -162.77886962890625, "loss": 3.0578, "rewards/accuracies": 0.0, "rewards/chosen": -1.0906174182891846, "rewards/margins": -0.3032364249229431, "rewards/rejected": -0.7873809933662415, "step": 3944 }, { "epoch": 0.87, "learning_rate": 9.855203593603622e-06, "logits/chosen": -0.8155975341796875, "logits/rejected": -0.7710121870040894, "logps/chosen": -141.99188232421875, "logps/rejected": -247.28170776367188, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 1.560540795326233, "rewards/margins": 5.620770454406738, "rewards/rejected": -4.060229778289795, "step": 3945 }, { "epoch": 0.87, "learning_rate": 9.85477506931459e-06, "logits/chosen": -0.9919545650482178, "logits/rejected": -0.9919545650482178, "logps/chosen": -45.78419494628906, "logps/rejected": -45.78419494628906, "loss": 2.8708, "rewards/accuracies": 0.0, "rewards/chosen": -0.4730842709541321, "rewards/margins": 0.0, "rewards/rejected": -0.4730842709541321, "step": 3946 }, { "epoch": 0.87, "learning_rate": 9.854345921198637e-06, "logits/chosen": -1.211289644241333, "logits/rejected": -1.184023380279541, "logps/chosen": -149.15386962890625, "logps/rejected": -206.12075805664062, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.4083847105503082, "rewards/margins": 5.601251602172852, "rewards/rejected": -5.192866802215576, "step": 3947 }, { "epoch": 0.87, "learning_rate": 9.853916149310898e-06, "logits/chosen": -1.0882656574249268, "logits/rejected": -1.0494661331176758, "logps/chosen": -196.5049285888672, "logps/rejected": -310.64654541015625, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": 3.226405382156372, "rewards/margins": 17.763105392456055, "rewards/rejected": -14.536700248718262, "step": 3948 }, { "epoch": 0.87, "learning_rate": 9.853485753706603e-06, "logits/chosen": -1.2204562425613403, "logits/rejected": -1.2204562425613403, "logps/chosen": -91.9022445678711, "logps/rejected": -91.9022445678711, "loss": 0.3561, "rewards/accuracies": 0.0, "rewards/chosen": -3.380788803100586, "rewards/margins": 0.0, "rewards/rejected": -3.380788803100586, "step": 3949 }, { "epoch": 0.87, "learning_rate": 9.853054734441059e-06, "logits/chosen": -1.299335241317749, "logits/rejected": -1.2743066549301147, "logps/chosen": -84.80265808105469, "logps/rejected": -178.3971405029297, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.0880584716796875, "rewards/margins": 6.322578430175781, "rewards/rejected": -6.410636901855469, "step": 3950 }, { "epoch": 0.87, "learning_rate": 9.852623091569646e-06, "logits/chosen": -0.9802312254905701, "logits/rejected": -0.9871848225593567, "logps/chosen": -87.71560668945312, "logps/rejected": -95.45903778076172, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": 1.3118103742599487, "rewards/margins": 5.210179805755615, "rewards/rejected": -3.898369550704956, "step": 3951 }, { "epoch": 0.87, "learning_rate": 9.852190825147831e-06, "logits/chosen": -1.4411729574203491, "logits/rejected": -1.4606339931488037, "logps/chosen": -140.84442138671875, "logps/rejected": -82.52497100830078, "loss": 0.2824, "rewards/accuracies": 1.0, "rewards/chosen": -5.13812255859375, "rewards/margins": 0.2796182632446289, "rewards/rejected": -5.417740821838379, "step": 3952 }, { "epoch": 0.87, "learning_rate": 9.85175793523116e-06, "logits/chosen": -1.4607203006744385, "logits/rejected": -1.5751926898956299, "logps/chosen": -224.583740234375, "logps/rejected": -183.03346252441406, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": 1.319494605064392, "rewards/margins": 8.400927543640137, "rewards/rejected": -7.081433296203613, "step": 3953 }, { "epoch": 0.88, "learning_rate": 9.851324421875256e-06, "logits/chosen": -1.1459051370620728, "logits/rejected": -1.1417732238769531, "logps/chosen": -66.87483215332031, "logps/rejected": -136.27284240722656, "loss": 0.5467, "rewards/accuracies": 1.0, "rewards/chosen": 0.02073516882956028, "rewards/margins": 2.674675941467285, "rewards/rejected": -2.6539406776428223, "step": 3954 }, { "epoch": 0.88, "learning_rate": 9.850890285135829e-06, "logits/chosen": -1.4392495155334473, "logits/rejected": -1.3786228895187378, "logps/chosen": -127.55736541748047, "logps/rejected": -189.61830139160156, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -1.0418328046798706, "rewards/margins": 4.009696006774902, "rewards/rejected": -5.0515289306640625, "step": 3955 }, { "epoch": 0.88, "learning_rate": 9.850455525068658e-06, "logits/chosen": -1.2088571786880493, "logits/rejected": -1.1860238313674927, "logps/chosen": -121.62416076660156, "logps/rejected": -164.31613159179688, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.971699595451355, "rewards/margins": 4.535288333892822, "rewards/rejected": -6.506988048553467, "step": 3956 }, { "epoch": 0.88, "learning_rate": 9.850020141729615e-06, "logits/chosen": -1.2706823348999023, "logits/rejected": -1.3284724950790405, "logps/chosen": -167.7821502685547, "logps/rejected": -197.9242706298828, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.3910049498081207, "rewards/margins": 4.021978855133057, "rewards/rejected": -4.4129838943481445, "step": 3957 }, { "epoch": 0.88, "learning_rate": 9.849584135174642e-06, "logits/chosen": -1.0796077251434326, "logits/rejected": -1.030981183052063, "logps/chosen": -300.4200439453125, "logps/rejected": -450.7518310546875, "loss": 0.8321, "rewards/accuracies": 0.0, "rewards/chosen": -1.05609130859375, "rewards/margins": -1.4525543451309204, "rewards/rejected": 0.396463006734848, "step": 3958 }, { "epoch": 0.88, "learning_rate": 9.849147505459766e-06, "logits/chosen": -0.9288139939308167, "logits/rejected": -1.0452579259872437, "logps/chosen": -168.54257202148438, "logps/rejected": -100.5746078491211, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.2233612537384033, "rewards/margins": 4.835614204406738, "rewards/rejected": -6.0589752197265625, "step": 3959 }, { "epoch": 0.88, "learning_rate": 9.848710252641092e-06, "logits/chosen": -1.3491501808166504, "logits/rejected": -1.4222583770751953, "logps/chosen": -186.34439086914062, "logps/rejected": -185.8951416015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 2.0040252208709717, "rewards/margins": 7.961056709289551, "rewards/rejected": -5.95703125, "step": 3960 }, { "epoch": 0.88, "learning_rate": 9.848272376774807e-06, "logits/chosen": -1.17791748046875, "logits/rejected": -1.17791748046875, "logps/chosen": -153.819091796875, "logps/rejected": -153.819091796875, "loss": 0.347, "rewards/accuracies": 0.0, "rewards/chosen": -3.7537155151367188, "rewards/margins": 0.0, "rewards/rejected": -3.7537155151367188, "step": 3961 }, { "epoch": 0.88, "learning_rate": 9.847833877917177e-06, "logits/chosen": -1.3625320196151733, "logits/rejected": -1.3070307970046997, "logps/chosen": -80.7929458618164, "logps/rejected": -232.48416137695312, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -0.030927276238799095, "rewards/margins": 8.78984260559082, "rewards/rejected": -8.820770263671875, "step": 3962 }, { "epoch": 0.88, "learning_rate": 9.847394756124547e-06, "logits/chosen": -1.273258924484253, "logits/rejected": -1.2857251167297363, "logps/chosen": -206.57943725585938, "logps/rejected": -223.4242706298828, "loss": 0.2976, "rewards/accuracies": 1.0, "rewards/chosen": 2.6716156005859375, "rewards/margins": 7.246025085449219, "rewards/rejected": -4.574409484863281, "step": 3963 }, { "epoch": 0.88, "learning_rate": 9.846955011453343e-06, "logits/chosen": -1.42500638961792, "logits/rejected": -1.4083863496780396, "logps/chosen": -143.23446655273438, "logps/rejected": -209.74127197265625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.5385063886642456, "rewards/margins": 5.417838096618652, "rewards/rejected": -6.9563446044921875, "step": 3964 }, { "epoch": 0.88, "learning_rate": 9.846514643960072e-06, "logits/chosen": -1.054903268814087, "logits/rejected": -1.087480068206787, "logps/chosen": -114.884033203125, "logps/rejected": -109.1562728881836, "loss": 0.2783, "rewards/accuracies": 1.0, "rewards/chosen": -1.1069427728652954, "rewards/margins": 0.30605244636535645, "rewards/rejected": -1.4129952192306519, "step": 3965 }, { "epoch": 0.88, "learning_rate": 9.846073653701321e-06, "logits/chosen": -1.27851402759552, "logits/rejected": -1.2829844951629639, "logps/chosen": -94.39111328125, "logps/rejected": -98.74464416503906, "loss": 0.4392, "rewards/accuracies": 0.0, "rewards/chosen": -2.00288462638855, "rewards/margins": -0.3415931463241577, "rewards/rejected": -1.661291480064392, "step": 3966 }, { "epoch": 0.88, "learning_rate": 9.845632040733754e-06, "logits/chosen": -1.5266191959381104, "logits/rejected": -1.5060648918151855, "logps/chosen": -87.46295928955078, "logps/rejected": -153.9251251220703, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -0.2931671142578125, "rewards/margins": 4.009583950042725, "rewards/rejected": -4.302751064300537, "step": 3967 }, { "epoch": 0.88, "learning_rate": 9.845189805114119e-06, "logits/chosen": -0.7550899982452393, "logits/rejected": -0.5703393816947937, "logps/chosen": -265.6285095214844, "logps/rejected": -480.9284362792969, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -3.464094638824463, "rewards/margins": 4.2529401779174805, "rewards/rejected": -7.717034816741943, "step": 3968 }, { "epoch": 0.88, "learning_rate": 9.844746946899241e-06, "logits/chosen": -1.185540795326233, "logits/rejected": -1.1927461624145508, "logps/chosen": -168.76602172851562, "logps/rejected": -116.50556945800781, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.02533874474465847, "rewards/margins": 6.949591159820557, "rewards/rejected": -6.9749298095703125, "step": 3969 }, { "epoch": 0.88, "learning_rate": 9.844303466146027e-06, "logits/chosen": -1.1413339376449585, "logits/rejected": -1.0935492515563965, "logps/chosen": -101.67552947998047, "logps/rejected": -189.29397583007812, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.733014702796936, "rewards/margins": 4.187746524810791, "rewards/rejected": -5.9207611083984375, "step": 3970 }, { "epoch": 0.88, "learning_rate": 9.843859362911463e-06, "logits/chosen": -1.4979661703109741, "logits/rejected": -1.431306004524231, "logps/chosen": -123.49658203125, "logps/rejected": -175.67486572265625, "loss": 0.1324, "rewards/accuracies": 1.0, "rewards/chosen": -2.3088271617889404, "rewards/margins": 2.634233236312866, "rewards/rejected": -4.943060398101807, "step": 3971 }, { "epoch": 0.88, "learning_rate": 9.843414637252615e-06, "logits/chosen": -1.3238142728805542, "logits/rejected": -1.2641366720199585, "logps/chosen": -182.96670532226562, "logps/rejected": -331.4869384765625, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -2.639779806137085, "rewards/margins": 3.454702615737915, "rewards/rejected": -6.094482421875, "step": 3972 }, { "epoch": 0.88, "learning_rate": 9.842969289226629e-06, "logits/chosen": -1.19719398021698, "logits/rejected": -1.186950922012329, "logps/chosen": -172.4158172607422, "logps/rejected": -221.3155975341797, "loss": 0.2832, "rewards/accuracies": 1.0, "rewards/chosen": -1.1327866315841675, "rewards/margins": 7.9184980392456055, "rewards/rejected": -9.051284790039062, "step": 3973 }, { "epoch": 0.88, "learning_rate": 9.842523318890733e-06, "logits/chosen": -1.3837614059448242, "logits/rejected": -1.3136682510375977, "logps/chosen": -261.3541564941406, "logps/rejected": -338.17657470703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.9811341762542725, "rewards/margins": 8.008618354797363, "rewards/rejected": -10.989752769470215, "step": 3974 }, { "epoch": 0.88, "learning_rate": 9.84207672630223e-06, "logits/chosen": -1.5260077714920044, "logits/rejected": -1.5192385911941528, "logps/chosen": -113.64337158203125, "logps/rejected": -103.49131774902344, "loss": 0.2739, "rewards/accuracies": 1.0, "rewards/chosen": -1.9571762084960938, "rewards/margins": 1.2506310939788818, "rewards/rejected": -3.2078073024749756, "step": 3975 }, { "epoch": 0.88, "learning_rate": 9.84162951151851e-06, "logits/chosen": -1.2925643920898438, "logits/rejected": -1.3864871263504028, "logps/chosen": -283.56744384765625, "logps/rejected": -196.42079162597656, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.9762725830078125, "rewards/margins": 6.915113925933838, "rewards/rejected": -5.938841342926025, "step": 3976 }, { "epoch": 0.88, "learning_rate": 9.841181674597034e-06, "logits/chosen": -0.9905571937561035, "logits/rejected": -0.9905571937561035, "logps/chosen": -146.29727172851562, "logps/rejected": -146.29727172851562, "loss": 0.3692, "rewards/accuracies": 0.0, "rewards/chosen": -5.9388427734375, "rewards/margins": 0.0, "rewards/rejected": -5.9388427734375, "step": 3977 }, { "epoch": 0.88, "learning_rate": 9.840733215595351e-06, "logits/chosen": -1.0285954475402832, "logits/rejected": -1.0008001327514648, "logps/chosen": -207.36196899414062, "logps/rejected": -214.59072875976562, "loss": 0.3503, "rewards/accuracies": 1.0, "rewards/chosen": 1.302252173423767, "rewards/margins": 4.906922817230225, "rewards/rejected": -3.604670763015747, "step": 3978 }, { "epoch": 0.88, "learning_rate": 9.840284134571088e-06, "logits/chosen": -1.1210230588912964, "logits/rejected": -1.144948959350586, "logps/chosen": -110.10574340820312, "logps/rejected": -99.48838806152344, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -2.641998291015625, "rewards/margins": 4.390132427215576, "rewards/rejected": -7.032130718231201, "step": 3979 }, { "epoch": 0.88, "learning_rate": 9.83983443158195e-06, "logits/chosen": -1.3095602989196777, "logits/rejected": -0.8113369345664978, "logps/chosen": -139.49652099609375, "logps/rejected": -390.40069580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.696022868156433, "rewards/margins": 21.609703063964844, "rewards/rejected": -23.30572509765625, "step": 3980 }, { "epoch": 0.88, "learning_rate": 9.839384106685721e-06, "logits/chosen": -1.132257103919983, "logits/rejected": -1.138430118560791, "logps/chosen": -134.72116088867188, "logps/rejected": -50.847251892089844, "loss": 0.4099, "rewards/accuracies": 1.0, "rewards/chosen": -1.493133544921875, "rewards/margins": 2.0019805431365967, "rewards/rejected": -3.4951140880584717, "step": 3981 }, { "epoch": 0.88, "learning_rate": 9.838933159940266e-06, "logits/chosen": -1.2450133562088013, "logits/rejected": -1.2450133562088013, "logps/chosen": -168.13758850097656, "logps/rejected": -168.13758850097656, "loss": 0.3534, "rewards/accuracies": 0.0, "rewards/chosen": -3.2206344604492188, "rewards/margins": 0.0, "rewards/rejected": -3.2206344604492188, "step": 3982 }, { "epoch": 0.88, "learning_rate": 9.838481591403536e-06, "logits/chosen": -1.251147747039795, "logits/rejected": -1.1914944648742676, "logps/chosen": -96.31950378417969, "logps/rejected": -154.44473266601562, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": -1.1929504871368408, "rewards/margins": 2.536954402923584, "rewards/rejected": -3.729904890060425, "step": 3983 }, { "epoch": 0.88, "learning_rate": 9.83802940113355e-06, "logits/chosen": -1.1501011848449707, "logits/rejected": -1.115752100944519, "logps/chosen": -201.22119140625, "logps/rejected": -245.98245239257812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 1.1440277099609375, "rewards/margins": 9.0074462890625, "rewards/rejected": -7.8634185791015625, "step": 3984 }, { "epoch": 0.88, "learning_rate": 9.837576589188418e-06, "logits/chosen": -1.4156291484832764, "logits/rejected": -1.6033008098602295, "logps/chosen": -202.32240295410156, "logps/rejected": -103.966064453125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.05247955396771431, "rewards/margins": 5.979443073272705, "rewards/rejected": -6.031922817230225, "step": 3985 }, { "epoch": 0.88, "learning_rate": 9.837123155626323e-06, "logits/chosen": -1.7289892435073853, "logits/rejected": -1.7260420322418213, "logps/chosen": -95.08746337890625, "logps/rejected": -182.41482543945312, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.6358444094657898, "rewards/margins": 4.915562629699707, "rewards/rejected": -5.5514068603515625, "step": 3986 }, { "epoch": 0.88, "learning_rate": 9.836669100505532e-06, "logits/chosen": -0.9751315116882324, "logits/rejected": -0.9167175889015198, "logps/chosen": -170.03045654296875, "logps/rejected": -201.2752685546875, "loss": 0.0303, "rewards/accuracies": 1.0, "rewards/chosen": 1.0981369018554688, "rewards/margins": 2.7828049659729004, "rewards/rejected": -1.684667944908142, "step": 3987 }, { "epoch": 0.88, "learning_rate": 9.836214423884387e-06, "logits/chosen": -1.1207218170166016, "logits/rejected": -1.1075246334075928, "logps/chosen": -109.15707397460938, "logps/rejected": -169.45672607421875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.6596328616142273, "rewards/margins": 5.895262241363525, "rewards/rejected": -6.554894924163818, "step": 3988 }, { "epoch": 0.88, "learning_rate": 9.835759125821314e-06, "logits/chosen": -1.1506526470184326, "logits/rejected": -1.0616611242294312, "logps/chosen": -216.913330078125, "logps/rejected": -312.96112060546875, "loss": 0.1196, "rewards/accuracies": 1.0, "rewards/chosen": -0.24937744438648224, "rewards/margins": 6.307081699371338, "rewards/rejected": -6.556458950042725, "step": 3989 }, { "epoch": 0.88, "learning_rate": 9.83530320637482e-06, "logits/chosen": -1.2679098844528198, "logits/rejected": -1.3180184364318848, "logps/chosen": -258.7572937011719, "logps/rejected": -92.88327026367188, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.03299255296587944, "rewards/margins": 5.235525131225586, "rewards/rejected": -5.202532768249512, "step": 3990 }, { "epoch": 0.88, "learning_rate": 9.834846665603486e-06, "logits/chosen": -1.4901719093322754, "logits/rejected": -1.5381433963775635, "logps/chosen": -121.13783264160156, "logps/rejected": -114.1746826171875, "loss": 0.4108, "rewards/accuracies": 1.0, "rewards/chosen": -1.823034644126892, "rewards/margins": 1.9879745244979858, "rewards/rejected": -3.811009168624878, "step": 3991 }, { "epoch": 0.88, "learning_rate": 9.834389503565978e-06, "logits/chosen": -1.8575551509857178, "logits/rejected": -1.8895701169967651, "logps/chosen": -111.84707641601562, "logps/rejected": -140.66323852539062, "loss": 0.0851, "rewards/accuracies": 1.0, "rewards/chosen": -1.430079698562622, "rewards/margins": 1.9599831104278564, "rewards/rejected": -3.3900628089904785, "step": 3992 }, { "epoch": 0.88, "learning_rate": 9.833931720321042e-06, "logits/chosen": -1.3373522758483887, "logits/rejected": -1.3221049308776855, "logps/chosen": -102.83307647705078, "logps/rejected": -134.99234008789062, "loss": 0.0489, "rewards/accuracies": 1.0, "rewards/chosen": -1.6560989618301392, "rewards/margins": 2.502234935760498, "rewards/rejected": -4.158333778381348, "step": 3993 }, { "epoch": 0.88, "learning_rate": 9.833473315927498e-06, "logits/chosen": -1.196204662322998, "logits/rejected": -1.244227409362793, "logps/chosen": -96.06538391113281, "logps/rejected": -149.5004119873047, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.9870582818984985, "rewards/margins": 4.952169418334961, "rewards/rejected": -5.93922758102417, "step": 3994 }, { "epoch": 0.88, "learning_rate": 9.833014290444254e-06, "logits/chosen": -1.2991032600402832, "logits/rejected": -1.3428418636322021, "logps/chosen": -113.17599487304688, "logps/rejected": -146.15573120117188, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -1.1453605890274048, "rewards/margins": 4.052750587463379, "rewards/rejected": -5.198111057281494, "step": 3995 }, { "epoch": 0.88, "learning_rate": 9.832554643930292e-06, "logits/chosen": -1.384867787361145, "logits/rejected": -1.405269980430603, "logps/chosen": -212.705810546875, "logps/rejected": -246.37550354003906, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.5362305045127869, "rewards/margins": 13.964097023010254, "rewards/rejected": -14.500327110290527, "step": 3996 }, { "epoch": 0.88, "learning_rate": 9.832094376444675e-06, "logits/chosen": -1.2213294506072998, "logits/rejected": -1.1886287927627563, "logps/chosen": -169.3084716796875, "logps/rejected": -209.9842529296875, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -1.1059616804122925, "rewards/margins": 3.671259880065918, "rewards/rejected": -4.7772216796875, "step": 3997 }, { "epoch": 0.88, "learning_rate": 9.831633488046547e-06, "logits/chosen": -1.4175080060958862, "logits/rejected": -1.3724563121795654, "logps/chosen": -152.17164611816406, "logps/rejected": -177.44464111328125, "loss": 0.0482, "rewards/accuracies": 1.0, "rewards/chosen": 0.4748367369174957, "rewards/margins": 6.369589328765869, "rewards/rejected": -5.894752502441406, "step": 3998 }, { "epoch": 0.89, "learning_rate": 9.83117197879513e-06, "logits/chosen": -1.2309552431106567, "logits/rejected": -1.1420249938964844, "logps/chosen": -202.84713745117188, "logps/rejected": -320.6040344238281, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.9076324701309204, "rewards/margins": 5.978506565093994, "rewards/rejected": -7.886138916015625, "step": 3999 }, { "epoch": 0.89, "learning_rate": 9.830709848749727e-06, "logits/chosen": -1.4125438928604126, "logits/rejected": -1.4580076932907104, "logps/chosen": -125.78942108154297, "logps/rejected": -98.71791076660156, "loss": 0.0748, "rewards/accuracies": 1.0, "rewards/chosen": 0.006367492955178022, "rewards/margins": 4.032791614532471, "rewards/rejected": -4.026423931121826, "step": 4000 }, { "epoch": 0.89, "learning_rate": 9.830247097969723e-06, "logits/chosen": -1.032030463218689, "logits/rejected": -1.0190446376800537, "logps/chosen": -89.89618682861328, "logps/rejected": -114.77766418457031, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -0.16608352959156036, "rewards/margins": 2.9240050315856934, "rewards/rejected": -3.0900886058807373, "step": 4001 }, { "epoch": 0.89, "learning_rate": 9.829783726514578e-06, "logits/chosen": -1.1066402196884155, "logits/rejected": -1.0408982038497925, "logps/chosen": -178.59210205078125, "logps/rejected": -207.74520874023438, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 1.6995849609375, "rewards/margins": 6.085641384124756, "rewards/rejected": -4.386056423187256, "step": 4002 }, { "epoch": 0.89, "learning_rate": 9.829319734443833e-06, "logits/chosen": -2.0288662910461426, "logits/rejected": -2.0585715770721436, "logps/chosen": -90.64138793945312, "logps/rejected": -80.41879272460938, "loss": 0.8778, "rewards/accuracies": 1.0, "rewards/chosen": -3.8270676136016846, "rewards/margins": 1.0140583515167236, "rewards/rejected": -4.841125965118408, "step": 4003 }, { "epoch": 0.89, "learning_rate": 9.828855121817114e-06, "logits/chosen": -1.3479399681091309, "logits/rejected": -1.3875248432159424, "logps/chosen": -159.94923400878906, "logps/rejected": -95.03057861328125, "loss": 0.6665, "rewards/accuracies": 0.0, "rewards/chosen": -3.3211960792541504, "rewards/margins": -1.0250320434570312, "rewards/rejected": -2.296164035797119, "step": 4004 }, { "epoch": 0.89, "learning_rate": 9.82838988869412e-06, "logits/chosen": -1.4397844076156616, "logits/rejected": -1.432200312614441, "logps/chosen": -97.57501220703125, "logps/rejected": -111.0639419555664, "loss": 0.1135, "rewards/accuracies": 1.0, "rewards/chosen": -2.128892660140991, "rewards/margins": 1.3671064376831055, "rewards/rejected": -3.4959990978240967, "step": 4005 }, { "epoch": 0.89, "learning_rate": 9.827924035134629e-06, "logits/chosen": -1.198353886604309, "logits/rejected": -1.094010591506958, "logps/chosen": -187.93316650390625, "logps/rejected": -431.29681396484375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 1.9231079816818237, "rewards/margins": 14.893689155578613, "rewards/rejected": -12.9705810546875, "step": 4006 }, { "epoch": 0.89, "learning_rate": 9.827457561198507e-06, "logits/chosen": -1.6858327388763428, "logits/rejected": -1.6503856182098389, "logps/chosen": -103.51313781738281, "logps/rejected": -121.77456665039062, "loss": 0.0663, "rewards/accuracies": 1.0, "rewards/chosen": -3.4806602001190186, "rewards/margins": 1.9537761211395264, "rewards/rejected": -5.434436321258545, "step": 4007 }, { "epoch": 0.89, "learning_rate": 9.826990466945695e-06, "logits/chosen": -1.2479338645935059, "logits/rejected": -1.2254942655563354, "logps/chosen": -69.29700469970703, "logps/rejected": -97.62629699707031, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": 0.3087562620639801, "rewards/margins": 2.9141626358032227, "rewards/rejected": -2.6054062843322754, "step": 4008 }, { "epoch": 0.89, "learning_rate": 9.826522752436211e-06, "logits/chosen": -1.783122181892395, "logits/rejected": -1.3305814266204834, "logps/chosen": -120.93762969970703, "logps/rejected": -630.57861328125, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": 0.5029869079589844, "rewards/margins": 53.20365905761719, "rewards/rejected": -52.7006721496582, "step": 4009 }, { "epoch": 0.89, "learning_rate": 9.826054417730156e-06, "logits/chosen": -1.360862374305725, "logits/rejected": -1.3525609970092773, "logps/chosen": -72.87794494628906, "logps/rejected": -103.96279907226562, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.2871200740337372, "rewards/margins": 5.052132606506348, "rewards/rejected": -4.765012741088867, "step": 4010 }, { "epoch": 0.89, "learning_rate": 9.825585462887709e-06, "logits/chosen": -1.3518174886703491, "logits/rejected": -1.4514275789260864, "logps/chosen": -156.92730712890625, "logps/rejected": -262.43426513671875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.9310409426689148, "rewards/margins": 5.639163494110107, "rewards/rejected": -6.570204257965088, "step": 4011 }, { "epoch": 0.89, "learning_rate": 9.825115887969131e-06, "logits/chosen": -1.697064757347107, "logits/rejected": -1.7479097843170166, "logps/chosen": -83.63402557373047, "logps/rejected": -119.73348999023438, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": -2.035677671432495, "rewards/margins": 2.537897825241089, "rewards/rejected": -4.573575496673584, "step": 4012 }, { "epoch": 0.89, "learning_rate": 9.82464569303476e-06, "logits/chosen": -1.306225299835205, "logits/rejected": -1.3305875062942505, "logps/chosen": -118.06537628173828, "logps/rejected": -130.07260131835938, "loss": 0.1632, "rewards/accuracies": 1.0, "rewards/chosen": -1.353979468345642, "rewards/margins": 0.9538270235061646, "rewards/rejected": -2.3078064918518066, "step": 4013 }, { "epoch": 0.89, "learning_rate": 9.824174878145017e-06, "logits/chosen": -1.3148126602172852, "logits/rejected": -1.402069091796875, "logps/chosen": -155.90249633789062, "logps/rejected": -107.00970458984375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.7214950323104858, "rewards/margins": 5.4081711769104, "rewards/rejected": -7.129666328430176, "step": 4014 }, { "epoch": 0.89, "learning_rate": 9.823703443360398e-06, "logits/chosen": -1.6533159017562866, "logits/rejected": -1.0040886402130127, "logps/chosen": -106.48631286621094, "logps/rejected": -481.31048583984375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 1.062231421470642, "rewards/margins": 37.98066329956055, "rewards/rejected": -36.91843032836914, "step": 4015 }, { "epoch": 0.89, "learning_rate": 9.823231388741483e-06, "logits/chosen": -1.4539964199066162, "logits/rejected": -1.4622141122817993, "logps/chosen": -220.3800048828125, "logps/rejected": -182.03834533691406, "loss": 0.5433, "rewards/accuracies": 0.0, "rewards/chosen": -3.1463119983673096, "rewards/margins": -0.6291289329528809, "rewards/rejected": -2.5171830654144287, "step": 4016 }, { "epoch": 0.89, "learning_rate": 9.822758714348928e-06, "logits/chosen": -1.1552553176879883, "logits/rejected": -1.170257329940796, "logps/chosen": -67.94065856933594, "logps/rejected": -150.863037109375, "loss": 0.1005, "rewards/accuracies": 1.0, "rewards/chosen": 0.6302536129951477, "rewards/margins": 2.019487142562866, "rewards/rejected": -1.3892334699630737, "step": 4017 }, { "epoch": 0.89, "learning_rate": 9.822285420243474e-06, "logits/chosen": -1.3016918897628784, "logits/rejected": -1.2660512924194336, "logps/chosen": -77.26032257080078, "logps/rejected": -220.24557495117188, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.1347000151872635, "rewards/margins": 6.566910266876221, "rewards/rejected": -6.701610088348389, "step": 4018 }, { "epoch": 0.89, "learning_rate": 9.821811506485934e-06, "logits/chosen": -1.4656506776809692, "logits/rejected": -1.4634743928909302, "logps/chosen": -135.94357299804688, "logps/rejected": -138.3580780029297, "loss": 0.1938, "rewards/accuracies": 1.0, "rewards/chosen": -0.9636619687080383, "rewards/margins": 0.9173012375831604, "rewards/rejected": -1.8809632062911987, "step": 4019 }, { "epoch": 0.89, "learning_rate": 9.821336973137207e-06, "logits/chosen": -1.0305836200714111, "logits/rejected": -1.0896005630493164, "logps/chosen": -234.17242431640625, "logps/rejected": -148.4579315185547, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 1.679742455482483, "rewards/margins": 5.892605781555176, "rewards/rejected": -4.212863445281982, "step": 4020 }, { "epoch": 0.89, "learning_rate": 9.820861820258269e-06, "logits/chosen": -1.3588582277297974, "logits/rejected": -1.2156851291656494, "logps/chosen": -175.74703979492188, "logps/rejected": -305.90643310546875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 3.575627088546753, "rewards/margins": 11.818477630615234, "rewards/rejected": -8.242850303649902, "step": 4021 }, { "epoch": 0.89, "learning_rate": 9.820386047910177e-06, "logits/chosen": -1.0614911317825317, "logits/rejected": -1.0071945190429688, "logps/chosen": -182.43328857421875, "logps/rejected": -209.44406127929688, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": 0.905529797077179, "rewards/margins": 4.984399795532227, "rewards/rejected": -4.078869819641113, "step": 4022 }, { "epoch": 0.89, "learning_rate": 9.819909656154066e-06, "logits/chosen": -1.4030073881149292, "logits/rejected": -1.4000225067138672, "logps/chosen": -120.44990539550781, "logps/rejected": -174.8624267578125, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.8255584836006165, "rewards/margins": 4.359867095947266, "rewards/rejected": -5.185425758361816, "step": 4023 }, { "epoch": 0.89, "learning_rate": 9.81943264505115e-06, "logits/chosen": -1.3011120557785034, "logits/rejected": -1.3762590885162354, "logps/chosen": -82.67977905273438, "logps/rejected": -68.81802368164062, "loss": 0.0876, "rewards/accuracies": 1.0, "rewards/chosen": -0.5987739562988281, "rewards/margins": 3.1470143795013428, "rewards/rejected": -3.745788335800171, "step": 4024 }, { "epoch": 0.89, "learning_rate": 9.818955014662725e-06, "logits/chosen": -1.012351393699646, "logits/rejected": -1.0343695878982544, "logps/chosen": -65.99093627929688, "logps/rejected": -165.00210571289062, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.3122138977050781, "rewards/margins": 4.42665958404541, "rewards/rejected": -4.738873481750488, "step": 4025 }, { "epoch": 0.89, "learning_rate": 9.818476765050167e-06, "logits/chosen": -1.2751140594482422, "logits/rejected": -1.2164695262908936, "logps/chosen": -65.58570861816406, "logps/rejected": -160.67164611816406, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -2.282299518585205, "rewards/margins": 2.8215246200561523, "rewards/rejected": -5.103824138641357, "step": 4026 }, { "epoch": 0.89, "learning_rate": 9.817997896274925e-06, "logits/chosen": -1.4333665370941162, "logits/rejected": -1.439339280128479, "logps/chosen": -52.54059600830078, "logps/rejected": -56.4692497253418, "loss": 0.7871, "rewards/accuracies": 0.0, "rewards/chosen": -1.0317310094833374, "rewards/margins": -1.3417266607284546, "rewards/rejected": 0.3099956512451172, "step": 4027 }, { "epoch": 0.89, "learning_rate": 9.817518408398536e-06, "logits/chosen": -1.6597026586532593, "logits/rejected": -1.6683928966522217, "logps/chosen": -129.960205078125, "logps/rejected": -127.18325805664062, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.14954224228858948, "rewards/margins": 4.250274658203125, "rewards/rejected": -4.399816989898682, "step": 4028 }, { "epoch": 0.89, "learning_rate": 9.817038301482612e-06, "logits/chosen": -1.2070053815841675, "logits/rejected": -1.2760483026504517, "logps/chosen": -125.03999328613281, "logps/rejected": -93.618408203125, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -3.791414737701416, "rewards/margins": 3.09897518157959, "rewards/rejected": -6.890389919281006, "step": 4029 }, { "epoch": 0.89, "learning_rate": 9.81655757558885e-06, "logits/chosen": -1.0679422616958618, "logits/rejected": -1.1047825813293457, "logps/chosen": -204.49227905273438, "logps/rejected": -120.923583984375, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -0.00048065185546875, "rewards/margins": 6.080392360687256, "rewards/rejected": -6.080873012542725, "step": 4030 }, { "epoch": 0.89, "learning_rate": 9.816076230779014e-06, "logits/chosen": -1.2696813344955444, "logits/rejected": -1.2957957983016968, "logps/chosen": -257.1028137207031, "logps/rejected": -190.01779174804688, "loss": 0.1363, "rewards/accuracies": 1.0, "rewards/chosen": -0.7933578491210938, "rewards/margins": 3.1296463012695312, "rewards/rejected": -3.923004150390625, "step": 4031 }, { "epoch": 0.89, "learning_rate": 9.815594267114962e-06, "logits/chosen": -1.274235725402832, "logits/rejected": -1.2974275350570679, "logps/chosen": -110.78582000732422, "logps/rejected": -142.87765502929688, "loss": 0.9037, "rewards/accuracies": 1.0, "rewards/chosen": 0.4801429808139801, "rewards/margins": 1.0939308404922485, "rewards/rejected": -0.613787829875946, "step": 4032 }, { "epoch": 0.89, "learning_rate": 9.815111684658622e-06, "logits/chosen": -1.197615385055542, "logits/rejected": -1.210880160331726, "logps/chosen": -133.5304718017578, "logps/rejected": -143.2232208251953, "loss": 0.0349, "rewards/accuracies": 1.0, "rewards/chosen": -1.086700439453125, "rewards/margins": 2.7980620861053467, "rewards/rejected": -3.8847625255584717, "step": 4033 }, { "epoch": 0.89, "learning_rate": 9.814628483472006e-06, "logits/chosen": -1.4646987915039062, "logits/rejected": -1.4646987915039062, "logps/chosen": -166.88128662109375, "logps/rejected": -166.88128662109375, "loss": 0.4379, "rewards/accuracies": 0.0, "rewards/chosen": -9.015067100524902, "rewards/margins": 0.0, "rewards/rejected": -9.015067100524902, "step": 4034 }, { "epoch": 0.89, "learning_rate": 9.814144663617204e-06, "logits/chosen": -1.4860196113586426, "logits/rejected": -1.569077491760254, "logps/chosen": -262.92388916015625, "logps/rejected": -200.20001220703125, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": 2.319894552230835, "rewards/margins": 7.925641059875488, "rewards/rejected": -5.605746746063232, "step": 4035 }, { "epoch": 0.89, "learning_rate": 9.813660225156385e-06, "logits/chosen": -1.1135038137435913, "logits/rejected": -1.1209759712219238, "logps/chosen": -104.68424987792969, "logps/rejected": -189.66807556152344, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.23092499375343323, "rewards/margins": 8.251298904418945, "rewards/rejected": -8.482223510742188, "step": 4036 }, { "epoch": 0.89, "learning_rate": 9.813175168151801e-06, "logits/chosen": -1.2238963842391968, "logits/rejected": -1.1515992879867554, "logps/chosen": -138.55728149414062, "logps/rejected": -222.59820556640625, "loss": 0.0489, "rewards/accuracies": 1.0, "rewards/chosen": -0.08223877102136612, "rewards/margins": 2.4789278507232666, "rewards/rejected": -2.561166524887085, "step": 4037 }, { "epoch": 0.89, "learning_rate": 9.812689492665777e-06, "logits/chosen": -1.4979908466339111, "logits/rejected": -1.4620251655578613, "logps/chosen": -160.12486267089844, "logps/rejected": -244.90359497070312, "loss": 0.0888, "rewards/accuracies": 1.0, "rewards/chosen": 0.9323089718818665, "rewards/margins": 1.6553635597229004, "rewards/rejected": -0.7230545282363892, "step": 4038 }, { "epoch": 0.89, "learning_rate": 9.812203198760722e-06, "logits/chosen": -1.5342212915420532, "logits/rejected": -1.5651897192001343, "logps/chosen": -166.47193908691406, "logps/rejected": -77.04585266113281, "loss": 0.2852, "rewards/accuracies": 1.0, "rewards/chosen": -4.370896339416504, "rewards/margins": 1.0527939796447754, "rewards/rejected": -5.423690319061279, "step": 4039 }, { "epoch": 0.89, "learning_rate": 9.811716286499125e-06, "logits/chosen": -1.563957691192627, "logits/rejected": -1.376362681388855, "logps/chosen": -102.91645050048828, "logps/rejected": -229.49424743652344, "loss": 2.2795, "rewards/accuracies": 0.0, "rewards/chosen": -1.610681176185608, "rewards/margins": -4.547706604003906, "rewards/rejected": 2.937025547027588, "step": 4040 }, { "epoch": 0.89, "learning_rate": 9.811228755943551e-06, "logits/chosen": -1.3448327779769897, "logits/rejected": -1.401189923286438, "logps/chosen": -103.10139465332031, "logps/rejected": -122.1581039428711, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": -0.23811569809913635, "rewards/margins": 2.233738660812378, "rewards/rejected": -2.4718544483184814, "step": 4041 }, { "epoch": 0.89, "learning_rate": 9.810740607156647e-06, "logits/chosen": -1.3156102895736694, "logits/rejected": -1.4700770378112793, "logps/chosen": -219.75778198242188, "logps/rejected": -160.60281372070312, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": 1.358052134513855, "rewards/margins": 7.272488117218018, "rewards/rejected": -5.914435863494873, "step": 4042 }, { "epoch": 0.89, "learning_rate": 9.810251840201143e-06, "logits/chosen": -1.7349549531936646, "logits/rejected": -1.8527804613113403, "logps/chosen": -136.0471954345703, "logps/rejected": -108.56397247314453, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.3432663679122925, "rewards/margins": 5.795687675476074, "rewards/rejected": -7.138954162597656, "step": 4043 }, { "epoch": 0.9, "learning_rate": 9.80976245513984e-06, "logits/chosen": -1.289985179901123, "logits/rejected": -1.3203089237213135, "logps/chosen": -103.80596160888672, "logps/rejected": -105.65612030029297, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": 0.2191169708967209, "rewards/margins": 7.950226783752441, "rewards/rejected": -7.731109619140625, "step": 4044 }, { "epoch": 0.9, "learning_rate": 9.809272452035622e-06, "logits/chosen": -1.3174294233322144, "logits/rejected": -1.3189291954040527, "logps/chosen": -106.70895385742188, "logps/rejected": -129.6063995361328, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": -1.2933235168457031, "rewards/margins": 3.916062355041504, "rewards/rejected": -5.209385871887207, "step": 4045 }, { "epoch": 0.9, "learning_rate": 9.808781830951457e-06, "logits/chosen": -0.9671342968940735, "logits/rejected": -0.9762773513793945, "logps/chosen": -243.79458618164062, "logps/rejected": -219.7732696533203, "loss": 0.0726, "rewards/accuracies": 1.0, "rewards/chosen": 1.308172583580017, "rewards/margins": 1.8657469749450684, "rewards/rejected": -0.557574450969696, "step": 4046 }, { "epoch": 0.9, "learning_rate": 9.808290591950386e-06, "logits/chosen": -1.3123985528945923, "logits/rejected": -1.255688190460205, "logps/chosen": -80.76355743408203, "logps/rejected": -203.2790069580078, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.8141472339630127, "rewards/margins": 4.243160247802734, "rewards/rejected": -6.057307720184326, "step": 4047 }, { "epoch": 0.9, "learning_rate": 9.807798735095533e-06, "logits/chosen": -1.617466926574707, "logits/rejected": -1.5102795362472534, "logps/chosen": -162.42916870117188, "logps/rejected": -326.5412902832031, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.020880103111267, "rewards/margins": 3.8213376998901367, "rewards/rejected": -4.842217922210693, "step": 4048 }, { "epoch": 0.9, "learning_rate": 9.807306260450098e-06, "logits/chosen": -1.1916985511779785, "logits/rejected": -1.2299489974975586, "logps/chosen": -198.35194396972656, "logps/rejected": -195.03585815429688, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.9221725463867188, "rewards/margins": 4.208011150360107, "rewards/rejected": -6.130183696746826, "step": 4049 }, { "epoch": 0.9, "learning_rate": 9.806813168077367e-06, "logits/chosen": -1.404030442237854, "logits/rejected": -1.4207862615585327, "logps/chosen": -104.39848327636719, "logps/rejected": -97.08761596679688, "loss": 0.2125, "rewards/accuracies": 1.0, "rewards/chosen": -0.12816773355007172, "rewards/margins": 0.7617095708847046, "rewards/rejected": -0.8898773193359375, "step": 4050 }, { "epoch": 0.9, "learning_rate": 9.806319458040701e-06, "logits/chosen": -1.3874577283859253, "logits/rejected": -1.3270560503005981, "logps/chosen": -107.81678771972656, "logps/rejected": -162.11883544921875, "loss": 0.0416, "rewards/accuracies": 1.0, "rewards/chosen": -3.575608015060425, "rewards/margins": 2.6649367809295654, "rewards/rejected": -6.24054479598999, "step": 4051 }, { "epoch": 0.9, "learning_rate": 9.805825130403536e-06, "logits/chosen": -1.5542349815368652, "logits/rejected": -1.5425087213516235, "logps/chosen": -111.17803192138672, "logps/rejected": -183.1456298828125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.2258758544921875, "rewards/margins": 6.305474758148193, "rewards/rejected": -6.079598903656006, "step": 4052 }, { "epoch": 0.9, "learning_rate": 9.805330185229397e-06, "logits/chosen": -1.5679064989089966, "logits/rejected": -1.496606469154358, "logps/chosen": -186.0060272216797, "logps/rejected": -233.59568786621094, "loss": 1.4637, "rewards/accuracies": 0.0, "rewards/chosen": -2.283146619796753, "rewards/margins": -2.853228807449341, "rewards/rejected": 0.5700821280479431, "step": 4053 }, { "epoch": 0.9, "learning_rate": 9.804834622581879e-06, "logits/chosen": -1.105299711227417, "logits/rejected": -1.1043453216552734, "logps/chosen": -78.74010467529297, "logps/rejected": -152.0776824951172, "loss": 0.0337, "rewards/accuracies": 1.0, "rewards/chosen": -1.0230019092559814, "rewards/margins": 3.5890328884124756, "rewards/rejected": -4.612034797668457, "step": 4054 }, { "epoch": 0.9, "learning_rate": 9.804338442524661e-06, "logits/chosen": -1.5109604597091675, "logits/rejected": -1.5516064167022705, "logps/chosen": -134.45883178710938, "logps/rejected": -85.13594055175781, "loss": 0.1054, "rewards/accuracies": 1.0, "rewards/chosen": -0.2978317439556122, "rewards/margins": 1.4609603881835938, "rewards/rejected": -1.7587921619415283, "step": 4055 }, { "epoch": 0.9, "learning_rate": 9.803841645121505e-06, "logits/chosen": -1.338013768196106, "logits/rejected": -1.4239375591278076, "logps/chosen": -231.45501708984375, "logps/rejected": -122.76959991455078, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.9253524541854858, "rewards/margins": 9.125570297241211, "rewards/rejected": -7.2002177238464355, "step": 4056 }, { "epoch": 0.9, "learning_rate": 9.803344230436245e-06, "logits/chosen": -1.310790777206421, "logits/rejected": -1.310790777206421, "logps/chosen": -104.01846313476562, "logps/rejected": -104.01846313476562, "loss": 0.4321, "rewards/accuracies": 0.0, "rewards/chosen": -6.245726585388184, "rewards/margins": 0.0, "rewards/rejected": -6.245726585388184, "step": 4057 }, { "epoch": 0.9, "learning_rate": 9.802846198532798e-06, "logits/chosen": -1.1288816928863525, "logits/rejected": -1.1399708986282349, "logps/chosen": -127.65275573730469, "logps/rejected": -130.35064697265625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.0512237548828125, "rewards/margins": 4.7066240310668945, "rewards/rejected": -4.757847785949707, "step": 4058 }, { "epoch": 0.9, "learning_rate": 9.80234754947516e-06, "logits/chosen": -1.4592710733413696, "logits/rejected": -1.389512300491333, "logps/chosen": -124.18988800048828, "logps/rejected": -244.4924774169922, "loss": 0.1038, "rewards/accuracies": 1.0, "rewards/chosen": 0.4967384338378906, "rewards/margins": 2.3422188758850098, "rewards/rejected": -1.8454803228378296, "step": 4059 }, { "epoch": 0.9, "learning_rate": 9.801848283327406e-06, "logits/chosen": -1.3802180290222168, "logits/rejected": -0.8312481045722961, "logps/chosen": -114.28529357910156, "logps/rejected": -738.47998046875, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -1.53423011302948, "rewards/margins": 47.520591735839844, "rewards/rejected": -49.0548210144043, "step": 4060 }, { "epoch": 0.9, "learning_rate": 9.801348400153692e-06, "logits/chosen": -1.0446968078613281, "logits/rejected": -1.0458887815475464, "logps/chosen": -179.400390625, "logps/rejected": -211.809326171875, "loss": 1.4194, "rewards/accuracies": 0.0, "rewards/chosen": -0.21692657470703125, "rewards/margins": -2.7265243530273438, "rewards/rejected": 2.5095977783203125, "step": 4061 }, { "epoch": 0.9, "learning_rate": 9.800847900018251e-06, "logits/chosen": -1.5257214307785034, "logits/rejected": -1.5016990900039673, "logps/chosen": -150.46620178222656, "logps/rejected": -268.7657775878906, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.700382947921753, "rewards/margins": 7.4002275466918945, "rewards/rejected": -11.100610733032227, "step": 4062 }, { "epoch": 0.9, "learning_rate": 9.800346782985395e-06, "logits/chosen": -1.3745911121368408, "logits/rejected": -1.3953756093978882, "logps/chosen": -116.44123840332031, "logps/rejected": -71.81558990478516, "loss": 0.1171, "rewards/accuracies": 1.0, "rewards/chosen": -4.105095863342285, "rewards/margins": 1.3392643928527832, "rewards/rejected": -5.444360256195068, "step": 4063 }, { "epoch": 0.9, "learning_rate": 9.799845049119517e-06, "logits/chosen": -1.2223423719406128, "logits/rejected": -1.1858935356140137, "logps/chosen": -192.4262237548828, "logps/rejected": -291.30242919921875, "loss": 0.4628, "rewards/accuracies": 1.0, "rewards/chosen": -4.946671962738037, "rewards/margins": 1.5769577026367188, "rewards/rejected": -6.523629665374756, "step": 4064 }, { "epoch": 0.9, "learning_rate": 9.79934269848509e-06, "logits/chosen": -1.5266541242599487, "logits/rejected": -1.475130558013916, "logps/chosen": -72.05604553222656, "logps/rejected": -124.52339172363281, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": 0.5728119015693665, "rewards/margins": 5.059915542602539, "rewards/rejected": -4.487103462219238, "step": 4065 }, { "epoch": 0.9, "learning_rate": 9.798839731146662e-06, "logits/chosen": -1.0406235456466675, "logits/rejected": -0.9661087393760681, "logps/chosen": -129.8794708251953, "logps/rejected": -155.3831024169922, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -0.7563568353652954, "rewards/margins": 3.7552132606506348, "rewards/rejected": -4.511569976806641, "step": 4066 }, { "epoch": 0.9, "learning_rate": 9.798336147168865e-06, "logits/chosen": -1.2503246068954468, "logits/rejected": -1.2503246068954468, "logps/chosen": -239.49623107910156, "logps/rejected": -239.49623107910156, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.079524517059326, "rewards/margins": 0.0, "rewards/rejected": -4.079524517059326, "step": 4067 }, { "epoch": 0.9, "learning_rate": 9.797831946616408e-06, "logits/chosen": -1.1952037811279297, "logits/rejected": -1.1421467065811157, "logps/chosen": -236.21971130371094, "logps/rejected": -262.7481689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.3145675659179688, "rewards/margins": 12.001166343688965, "rewards/rejected": -10.686598777770996, "step": 4068 }, { "epoch": 0.9, "learning_rate": 9.797327129554081e-06, "logits/chosen": -1.2356209754943848, "logits/rejected": -1.2329355478286743, "logps/chosen": -122.89949035644531, "logps/rejected": -126.10537719726562, "loss": 1.5965, "rewards/accuracies": 1.0, "rewards/chosen": -1.1983795166015625, "rewards/margins": 1.7842209339141846, "rewards/rejected": -2.982600450515747, "step": 4069 }, { "epoch": 0.9, "learning_rate": 9.796821696046748e-06, "logits/chosen": -1.0886712074279785, "logits/rejected": -1.081363320350647, "logps/chosen": -92.08460235595703, "logps/rejected": -110.6437759399414, "loss": 0.0691, "rewards/accuracies": 1.0, "rewards/chosen": -1.4604469537734985, "rewards/margins": 2.7451744079589844, "rewards/rejected": -4.205621242523193, "step": 4070 }, { "epoch": 0.9, "learning_rate": 9.79631564615936e-06, "logits/chosen": -1.47166907787323, "logits/rejected": -1.4778270721435547, "logps/chosen": -125.93677520751953, "logps/rejected": -190.78781127929688, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.3477577269077301, "rewards/margins": 5.91655969619751, "rewards/rejected": -6.264317512512207, "step": 4071 }, { "epoch": 0.9, "learning_rate": 9.79580897995694e-06, "logits/chosen": -1.6045701503753662, "logits/rejected": -1.7145429849624634, "logps/chosen": -113.42999267578125, "logps/rejected": -78.40570831298828, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.410696417093277, "rewards/margins": 6.056137561798096, "rewards/rejected": -5.645441055297852, "step": 4072 }, { "epoch": 0.9, "learning_rate": 9.795301697504595e-06, "logits/chosen": -0.9459453821182251, "logits/rejected": -0.9779538512229919, "logps/chosen": -221.81997680664062, "logps/rejected": -138.49351501464844, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -1.8382415771484375, "rewards/margins": 3.1376214027404785, "rewards/rejected": -4.975862979888916, "step": 4073 }, { "epoch": 0.9, "learning_rate": 9.794793798867512e-06, "logits/chosen": -1.2462801933288574, "logits/rejected": -1.2052253484725952, "logps/chosen": -124.24132537841797, "logps/rejected": -265.22998046875, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -1.6964317560195923, "rewards/margins": 4.081446170806885, "rewards/rejected": -5.7778778076171875, "step": 4074 }, { "epoch": 0.9, "learning_rate": 9.794285284110949e-06, "logits/chosen": -0.9647781252861023, "logits/rejected": -0.9662731289863586, "logps/chosen": -77.80131530761719, "logps/rejected": -93.74063110351562, "loss": 0.1381, "rewards/accuracies": 1.0, "rewards/chosen": -2.698558807373047, "rewards/margins": 1.1837735176086426, "rewards/rejected": -3.8823323249816895, "step": 4075 }, { "epoch": 0.9, "learning_rate": 9.793776153300253e-06, "logits/chosen": -1.1457792520523071, "logits/rejected": -1.1484616994857788, "logps/chosen": -96.2401351928711, "logps/rejected": -103.84515380859375, "loss": 0.1527, "rewards/accuracies": 1.0, "rewards/chosen": -0.5255928039550781, "rewards/margins": 1.3691376447677612, "rewards/rejected": -1.8947304487228394, "step": 4076 }, { "epoch": 0.9, "learning_rate": 9.793266406500847e-06, "logits/chosen": -1.524013638496399, "logits/rejected": -1.5919042825698853, "logps/chosen": -180.57781982421875, "logps/rejected": -239.35687255859375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.6076614260673523, "rewards/margins": 8.262396812438965, "rewards/rejected": -7.654735088348389, "step": 4077 }, { "epoch": 0.9, "learning_rate": 9.792756043778229e-06, "logits/chosen": -1.5517501831054688, "logits/rejected": -1.5655561685562134, "logps/chosen": -96.15904235839844, "logps/rejected": -189.0894775390625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 1.0863227844238281, "rewards/margins": 5.838077545166016, "rewards/rejected": -4.7517547607421875, "step": 4078 }, { "epoch": 0.9, "learning_rate": 9.79224506519798e-06, "logits/chosen": -1.0188740491867065, "logits/rejected": -0.9697564244270325, "logps/chosen": -122.61178588867188, "logps/rejected": -204.50228881835938, "loss": 0.1436, "rewards/accuracies": 1.0, "rewards/chosen": -4.029768466949463, "rewards/margins": 1.137420654296875, "rewards/rejected": -5.167189121246338, "step": 4079 }, { "epoch": 0.9, "learning_rate": 9.791733470825763e-06, "logits/chosen": -1.2664881944656372, "logits/rejected": -1.2188652753829956, "logps/chosen": -63.50836944580078, "logps/rejected": -111.62556457519531, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": 0.20179367065429688, "rewards/margins": 3.461850881576538, "rewards/rejected": -3.260057210922241, "step": 4080 }, { "epoch": 0.9, "learning_rate": 9.791221260727313e-06, "logits/chosen": -1.1869289875030518, "logits/rejected": -1.1662076711654663, "logps/chosen": -89.55203247070312, "logps/rejected": -89.64682006835938, "loss": 1.4934, "rewards/accuracies": 0.0, "rewards/chosen": -3.2685298919677734, "rewards/margins": -0.0670173168182373, "rewards/rejected": -3.201512575149536, "step": 4081 }, { "epoch": 0.9, "learning_rate": 9.790708434968448e-06, "logits/chosen": -1.2447203397750854, "logits/rejected": -1.238301157951355, "logps/chosen": -105.61356353759766, "logps/rejected": -147.3721923828125, "loss": 0.2969, "rewards/accuracies": 1.0, "rewards/chosen": -1.3766144514083862, "rewards/margins": 2.976299285888672, "rewards/rejected": -4.352913856506348, "step": 4082 }, { "epoch": 0.9, "learning_rate": 9.790194993615065e-06, "logits/chosen": -1.6019577980041504, "logits/rejected": -1.5864802598953247, "logps/chosen": -154.62643432617188, "logps/rejected": -140.00302124023438, "loss": 0.1587, "rewards/accuracies": 1.0, "rewards/chosen": -3.358259677886963, "rewards/margins": 1.4101972579956055, "rewards/rejected": -4.768456935882568, "step": 4083 }, { "epoch": 0.9, "learning_rate": 9.78968093673314e-06, "logits/chosen": -1.5299640893936157, "logits/rejected": -1.6386735439300537, "logps/chosen": -185.01336669921875, "logps/rejected": -117.79313659667969, "loss": 0.1022, "rewards/accuracies": 1.0, "rewards/chosen": 2.1938629150390625, "rewards/margins": 1.5170745849609375, "rewards/rejected": 0.676788330078125, "step": 4084 }, { "epoch": 0.9, "learning_rate": 9.789166264388732e-06, "logits/chosen": -1.1908766031265259, "logits/rejected": -1.1754459142684937, "logps/chosen": -198.6179962158203, "logps/rejected": -352.6158447265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 4.118019104003906, "rewards/margins": 13.178367614746094, "rewards/rejected": -9.060348510742188, "step": 4085 }, { "epoch": 0.9, "learning_rate": 9.78865097664797e-06, "logits/chosen": -1.3829329013824463, "logits/rejected": -1.2669713497161865, "logps/chosen": -148.0043182373047, "logps/rejected": -287.90533447265625, "loss": 0.132, "rewards/accuracies": 1.0, "rewards/chosen": -0.02853546105325222, "rewards/margins": 1.1971298456192017, "rewards/rejected": -1.2256653308868408, "step": 4086 }, { "epoch": 0.9, "learning_rate": 9.788135073577069e-06, "logits/chosen": -1.2957789897918701, "logits/rejected": -1.2917170524597168, "logps/chosen": -131.95401000976562, "logps/rejected": -204.18267822265625, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": 0.5135132074356079, "rewards/margins": 6.403019905090332, "rewards/rejected": -5.889506816864014, "step": 4087 }, { "epoch": 0.9, "learning_rate": 9.787618555242321e-06, "logits/chosen": -1.1290704011917114, "logits/rejected": -1.1156530380249023, "logps/chosen": -195.26165771484375, "logps/rejected": -189.35447692871094, "loss": 0.1483, "rewards/accuracies": 1.0, "rewards/chosen": 2.851614475250244, "rewards/margins": 10.78795051574707, "rewards/rejected": -7.936336040496826, "step": 4088 }, { "epoch": 0.91, "learning_rate": 9.787101421710099e-06, "logits/chosen": -1.316963791847229, "logits/rejected": -1.2564648389816284, "logps/chosen": -169.38189697265625, "logps/rejected": -260.8314208984375, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": 0.496551513671875, "rewards/margins": 3.1564621925354004, "rewards/rejected": -2.6599106788635254, "step": 4089 }, { "epoch": 0.91, "learning_rate": 9.786583673046851e-06, "logits/chosen": -1.2160987854003906, "logits/rejected": -1.412745714187622, "logps/chosen": -246.87472534179688, "logps/rejected": -62.31531524658203, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -0.508453369140625, "rewards/margins": 3.3290927410125732, "rewards/rejected": -3.8375461101531982, "step": 4090 }, { "epoch": 0.91, "learning_rate": 9.786065309319107e-06, "logits/chosen": -1.3545504808425903, "logits/rejected": -1.3857524394989014, "logps/chosen": -64.00272369384766, "logps/rejected": -90.94326782226562, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": 1.3182533979415894, "rewards/margins": 3.3557000160217285, "rewards/rejected": -2.0374467372894287, "step": 4091 }, { "epoch": 0.91, "learning_rate": 9.785546330593479e-06, "logits/chosen": -1.2722253799438477, "logits/rejected": -1.2777119874954224, "logps/chosen": -86.61876678466797, "logps/rejected": -98.66857147216797, "loss": 0.0622, "rewards/accuracies": 1.0, "rewards/chosen": 0.8754249811172485, "rewards/margins": 2.07558536529541, "rewards/rejected": -1.200160264968872, "step": 4092 }, { "epoch": 0.91, "learning_rate": 9.78502673693665e-06, "logits/chosen": -1.3063822984695435, "logits/rejected": -1.3074740171432495, "logps/chosen": -105.18634033203125, "logps/rejected": -244.13731384277344, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.897834062576294, "rewards/margins": 10.209939002990723, "rewards/rejected": -12.107772827148438, "step": 4093 }, { "epoch": 0.91, "learning_rate": 9.784506528415388e-06, "logits/chosen": -1.2584556341171265, "logits/rejected": -1.2976529598236084, "logps/chosen": -97.01747131347656, "logps/rejected": -128.43710327148438, "loss": 0.4439, "rewards/accuracies": 1.0, "rewards/chosen": -0.28570252656936646, "rewards/margins": 4.253782749176025, "rewards/rejected": -4.539485454559326, "step": 4094 }, { "epoch": 0.91, "learning_rate": 9.78398570509654e-06, "logits/chosen": -1.0368626117706299, "logits/rejected": -0.9766259789466858, "logps/chosen": -126.37552642822266, "logps/rejected": -93.13569641113281, "loss": 0.1271, "rewards/accuracies": 1.0, "rewards/chosen": -0.6684989929199219, "rewards/margins": 1.3493735790252686, "rewards/rejected": -2.0178725719451904, "step": 4095 }, { "epoch": 0.91, "learning_rate": 9.783464267047027e-06, "logits/chosen": -1.5604183673858643, "logits/rejected": -1.5604183673858643, "logps/chosen": -91.72818756103516, "logps/rejected": -91.72818756103516, "loss": 1.1354, "rewards/accuracies": 0.0, "rewards/chosen": -3.8068206310272217, "rewards/margins": 0.0, "rewards/rejected": -3.8068206310272217, "step": 4096 }, { "epoch": 0.91, "learning_rate": 9.782942214333855e-06, "logits/chosen": -1.373311996459961, "logits/rejected": -1.373311996459961, "logps/chosen": -162.98202514648438, "logps/rejected": -162.98202514648438, "loss": 0.3485, "rewards/accuracies": 0.0, "rewards/chosen": -1.1579757928848267, "rewards/margins": 0.0, "rewards/rejected": -1.1579757928848267, "step": 4097 }, { "epoch": 0.91, "learning_rate": 9.782419547024108e-06, "logits/chosen": -1.5833516120910645, "logits/rejected": -1.6436198949813843, "logps/chosen": -105.39942932128906, "logps/rejected": -154.89508056640625, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -0.744702160358429, "rewards/margins": 4.052346706390381, "rewards/rejected": -4.797049045562744, "step": 4098 }, { "epoch": 0.91, "learning_rate": 9.781896265184944e-06, "logits/chosen": -1.2635537385940552, "logits/rejected": -1.173920750617981, "logps/chosen": -105.07157897949219, "logps/rejected": -198.77090454101562, "loss": 0.1835, "rewards/accuracies": 1.0, "rewards/chosen": 0.09167633205652237, "rewards/margins": 0.8136093616485596, "rewards/rejected": -0.7219330072402954, "step": 4099 }, { "epoch": 0.91, "learning_rate": 9.781372368883607e-06, "logits/chosen": -1.4141298532485962, "logits/rejected": -1.3911235332489014, "logps/chosen": -79.78118133544922, "logps/rejected": -105.1443862915039, "loss": 0.1254, "rewards/accuracies": 1.0, "rewards/chosen": -0.40483322739601135, "rewards/margins": 1.266687035560608, "rewards/rejected": -1.6715202331542969, "step": 4100 }, { "epoch": 0.91, "learning_rate": 9.780847858187414e-06, "logits/chosen": -1.4652490615844727, "logits/rejected": -1.4423075914382935, "logps/chosen": -150.35284423828125, "logps/rejected": -281.11273193359375, "loss": 0.9934, "rewards/accuracies": 1.0, "rewards/chosen": -1.5845062732696533, "rewards/margins": 6.125635147094727, "rewards/rejected": -7.710141181945801, "step": 4101 }, { "epoch": 0.91, "learning_rate": 9.780322733163766e-06, "logits/chosen": -0.9282065033912659, "logits/rejected": -1.0486719608306885, "logps/chosen": -231.6378936767578, "logps/rejected": -98.04911804199219, "loss": 0.0813, "rewards/accuracies": 1.0, "rewards/chosen": -0.17749939858913422, "rewards/margins": 1.7342712879180908, "rewards/rejected": -1.9117707014083862, "step": 4102 }, { "epoch": 0.91, "learning_rate": 9.779796993880135e-06, "logits/chosen": -1.3279117345809937, "logits/rejected": -1.3188711404800415, "logps/chosen": -204.25296020507812, "logps/rejected": -205.88475036621094, "loss": 0.0466, "rewards/accuracies": 1.0, "rewards/chosen": 0.8305267691612244, "rewards/margins": 2.3261306285858154, "rewards/rejected": -1.4956039190292358, "step": 4103 }, { "epoch": 0.91, "learning_rate": 9.779270640404082e-06, "logits/chosen": -1.5639228820800781, "logits/rejected": -1.5330034494400024, "logps/chosen": -170.19615173339844, "logps/rejected": -262.2122802734375, "loss": 0.1495, "rewards/accuracies": 1.0, "rewards/chosen": -0.5043106079101562, "rewards/margins": 1.1605576276779175, "rewards/rejected": -1.6648682355880737, "step": 4104 }, { "epoch": 0.91, "learning_rate": 9.778743672803241e-06, "logits/chosen": -1.1757625341415405, "logits/rejected": -1.1757625341415405, "logps/chosen": -251.3217010498047, "logps/rejected": -251.3217010498047, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.4593095779418945, "rewards/margins": 0.0, "rewards/rejected": -5.4593095779418945, "step": 4105 }, { "epoch": 0.91, "learning_rate": 9.778216091145325e-06, "logits/chosen": -1.6409555673599243, "logits/rejected": -1.6409555673599243, "logps/chosen": -180.1927490234375, "logps/rejected": -180.1927490234375, "loss": 0.3512, "rewards/accuracies": 0.0, "rewards/chosen": -8.312112808227539, "rewards/margins": 0.0, "rewards/rejected": -8.312112808227539, "step": 4106 }, { "epoch": 0.91, "learning_rate": 9.777687895498128e-06, "logits/chosen": -1.4470033645629883, "logits/rejected": -0.8006899356842041, "logps/chosen": -93.50173950195312, "logps/rejected": -546.1937255859375, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": 2.0062386989593506, "rewards/margins": 40.80638885498047, "rewards/rejected": -38.80015182495117, "step": 4107 }, { "epoch": 0.91, "learning_rate": 9.777159085929524e-06, "logits/chosen": -1.5299087762832642, "logits/rejected": -1.4701251983642578, "logps/chosen": -130.30865478515625, "logps/rejected": -183.81103515625, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": 0.18172608315944672, "rewards/margins": 3.0011229515075684, "rewards/rejected": -2.81939697265625, "step": 4108 }, { "epoch": 0.91, "learning_rate": 9.776629662507458e-06, "logits/chosen": -1.078148603439331, "logits/rejected": -0.840258002281189, "logps/chosen": -200.51657104492188, "logps/rejected": -453.80426025390625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": 1.5714142322540283, "rewards/margins": 33.39656066894531, "rewards/rejected": -31.82514762878418, "step": 4109 }, { "epoch": 0.91, "learning_rate": 9.776099625299966e-06, "logits/chosen": -1.5068174600601196, "logits/rejected": -1.5059599876403809, "logps/chosen": -59.00249481201172, "logps/rejected": -148.19091796875, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.0908997058868408, "rewards/margins": 4.9728803634643555, "rewards/rejected": -6.063780307769775, "step": 4110 }, { "epoch": 0.91, "learning_rate": 9.775568974375151e-06, "logits/chosen": -1.3770458698272705, "logits/rejected": -1.3926995992660522, "logps/chosen": -112.1514892578125, "logps/rejected": -93.45040893554688, "loss": 0.1449, "rewards/accuracies": 1.0, "rewards/chosen": -3.9637763500213623, "rewards/margins": 1.4591896533966064, "rewards/rejected": -5.422966003417969, "step": 4111 }, { "epoch": 0.91, "learning_rate": 9.775037709801206e-06, "logits/chosen": -1.3975450992584229, "logits/rejected": -1.4298723936080933, "logps/chosen": -73.35226440429688, "logps/rejected": -68.22813415527344, "loss": 0.3283, "rewards/accuracies": 1.0, "rewards/chosen": -0.4987014830112457, "rewards/margins": 2.0922446250915527, "rewards/rejected": -2.5909461975097656, "step": 4112 }, { "epoch": 0.91, "learning_rate": 9.774505831646392e-06, "logits/chosen": -1.1068155765533447, "logits/rejected": -1.0395843982696533, "logps/chosen": -124.96629333496094, "logps/rejected": -148.65716552734375, "loss": 0.0526, "rewards/accuracies": 1.0, "rewards/chosen": 0.835418701171875, "rewards/margins": 5.165463447570801, "rewards/rejected": -4.330044746398926, "step": 4113 }, { "epoch": 0.91, "learning_rate": 9.773973339979056e-06, "logits/chosen": -1.144493579864502, "logits/rejected": -1.2014700174331665, "logps/chosen": -188.61691284179688, "logps/rejected": -117.75119018554688, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.10914917290210724, "rewards/margins": 8.415117263793945, "rewards/rejected": -8.305968284606934, "step": 4114 }, { "epoch": 0.91, "learning_rate": 9.773440234867623e-06, "logits/chosen": -1.4483754634857178, "logits/rejected": -1.4880328178405762, "logps/chosen": -133.20068359375, "logps/rejected": -143.75051879882812, "loss": 0.1998, "rewards/accuracies": 1.0, "rewards/chosen": -1.209490180015564, "rewards/margins": 0.7213478088378906, "rewards/rejected": -1.9308379888534546, "step": 4115 }, { "epoch": 0.91, "learning_rate": 9.772906516380594e-06, "logits/chosen": -1.1868501901626587, "logits/rejected": -1.1868501901626587, "logps/chosen": -118.13845825195312, "logps/rejected": -118.13845825195312, "loss": 0.3496, "rewards/accuracies": 0.0, "rewards/chosen": -1.4452179670333862, "rewards/margins": 0.0, "rewards/rejected": -1.4452179670333862, "step": 4116 }, { "epoch": 0.91, "learning_rate": 9.772372184586551e-06, "logits/chosen": -1.2960751056671143, "logits/rejected": -1.2960751056671143, "logps/chosen": -159.00526428222656, "logps/rejected": -159.00526428222656, "loss": 0.4302, "rewards/accuracies": 0.0, "rewards/chosen": -3.716696262359619, "rewards/margins": 0.0, "rewards/rejected": -3.716696262359619, "step": 4117 }, { "epoch": 0.91, "learning_rate": 9.771837239554156e-06, "logits/chosen": -1.3987675905227661, "logits/rejected": -1.365380048751831, "logps/chosen": -64.28401184082031, "logps/rejected": -222.7130889892578, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -0.9760867953300476, "rewards/margins": 4.8398756980896, "rewards/rejected": -5.815962314605713, "step": 4118 }, { "epoch": 0.91, "learning_rate": 9.771301681352148e-06, "logits/chosen": -1.2400579452514648, "logits/rejected": -1.1429649591445923, "logps/chosen": -113.45247650146484, "logps/rejected": -253.558349609375, "loss": 0.4481, "rewards/accuracies": 1.0, "rewards/chosen": -0.723383367061615, "rewards/margins": 0.10741192102432251, "rewards/rejected": -0.8307952880859375, "step": 4119 }, { "epoch": 0.91, "learning_rate": 9.770765510049342e-06, "logits/chosen": -1.3568705320358276, "logits/rejected": -1.3541876077651978, "logps/chosen": -167.80105590820312, "logps/rejected": -211.8009490966797, "loss": 0.1493, "rewards/accuracies": 1.0, "rewards/chosen": 0.10860290378332138, "rewards/margins": 3.364586114883423, "rewards/rejected": -3.2559831142425537, "step": 4120 }, { "epoch": 0.91, "learning_rate": 9.770228725714637e-06, "logits/chosen": -1.4111131429672241, "logits/rejected": -1.4255739450454712, "logps/chosen": -128.24676513671875, "logps/rejected": -123.25086975097656, "loss": 0.2169, "rewards/accuracies": 1.0, "rewards/chosen": -1.6926612854003906, "rewards/margins": 0.627448320388794, "rewards/rejected": -2.3201096057891846, "step": 4121 }, { "epoch": 0.91, "learning_rate": 9.769691328417008e-06, "logits/chosen": -1.1844900846481323, "logits/rejected": -1.167154312133789, "logps/chosen": -122.43599700927734, "logps/rejected": -111.28275299072266, "loss": 0.0745, "rewards/accuracies": 1.0, "rewards/chosen": -0.13367462158203125, "rewards/margins": 2.2797577381134033, "rewards/rejected": -2.4134323596954346, "step": 4122 }, { "epoch": 0.91, "learning_rate": 9.769153318225509e-06, "logits/chosen": -1.2953089475631714, "logits/rejected": -1.316144347190857, "logps/chosen": -64.86416625976562, "logps/rejected": -92.12983703613281, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -2.2375991344451904, "rewards/margins": 4.448798179626465, "rewards/rejected": -6.686397075653076, "step": 4123 }, { "epoch": 0.91, "learning_rate": 9.768614695209273e-06, "logits/chosen": -1.363171100616455, "logits/rejected": -1.3754509687423706, "logps/chosen": -115.26200103759766, "logps/rejected": -86.06702423095703, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -3.117586612701416, "rewards/margins": 2.972099781036377, "rewards/rejected": -6.089686393737793, "step": 4124 }, { "epoch": 0.91, "learning_rate": 9.768075459437513e-06, "logits/chosen": -1.3482416868209839, "logits/rejected": -1.2621930837631226, "logps/chosen": -123.39654541015625, "logps/rejected": -250.23829650878906, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 3.9852569103240967, "rewards/margins": 9.136674880981445, "rewards/rejected": -5.1514177322387695, "step": 4125 }, { "epoch": 0.91, "learning_rate": 9.76753561097952e-06, "logits/chosen": -1.414419174194336, "logits/rejected": -1.3651726245880127, "logps/chosen": -88.3733139038086, "logps/rejected": -201.16908264160156, "loss": 0.058, "rewards/accuracies": 1.0, "rewards/chosen": -0.4418182373046875, "rewards/margins": 2.7662904262542725, "rewards/rejected": -3.20810866355896, "step": 4126 }, { "epoch": 0.91, "learning_rate": 9.766995149904658e-06, "logits/chosen": -1.3143213987350464, "logits/rejected": -1.3143213987350464, "logps/chosen": -133.3391876220703, "logps/rejected": -133.3391876220703, "loss": 0.3524, "rewards/accuracies": 0.0, "rewards/chosen": -2.567098379135132, "rewards/margins": 0.0, "rewards/rejected": -2.567098379135132, "step": 4127 }, { "epoch": 0.91, "learning_rate": 9.766454076282382e-06, "logits/chosen": -1.2799497842788696, "logits/rejected": -1.2799497842788696, "logps/chosen": -103.47190856933594, "logps/rejected": -103.47190856933594, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.505505561828613, "rewards/margins": 0.0, "rewards/rejected": -5.505505561828613, "step": 4128 }, { "epoch": 0.91, "learning_rate": 9.765912390182216e-06, "logits/chosen": -1.5002787113189697, "logits/rejected": -1.4921807050704956, "logps/chosen": -91.67192077636719, "logps/rejected": -126.05886840820312, "loss": 0.1044, "rewards/accuracies": 1.0, "rewards/chosen": -1.6274040937423706, "rewards/margins": 1.486489176750183, "rewards/rejected": -3.1138932704925537, "step": 4129 }, { "epoch": 0.91, "learning_rate": 9.765370091673762e-06, "logits/chosen": -1.4844752550125122, "logits/rejected": -1.4096254110336304, "logps/chosen": -153.11956787109375, "logps/rejected": -193.47930908203125, "loss": 0.4635, "rewards/accuracies": 0.0, "rewards/chosen": -3.5184381008148193, "rewards/margins": -0.40753722190856934, "rewards/rejected": -3.11090087890625, "step": 4130 }, { "epoch": 0.91, "learning_rate": 9.764827180826708e-06, "logits/chosen": -1.0971424579620361, "logits/rejected": -1.0886298418045044, "logps/chosen": -53.26399230957031, "logps/rejected": -105.09532928466797, "loss": 0.0543, "rewards/accuracies": 1.0, "rewards/chosen": -0.056397248059511185, "rewards/margins": 2.4762916564941406, "rewards/rejected": -2.532688856124878, "step": 4131 }, { "epoch": 0.91, "learning_rate": 9.764283657710815e-06, "logits/chosen": -1.3086727857589722, "logits/rejected": -1.3332207202911377, "logps/chosen": -141.21551513671875, "logps/rejected": -166.94512939453125, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -0.7374939322471619, "rewards/margins": 6.390809535980225, "rewards/rejected": -7.128303527832031, "step": 4132 }, { "epoch": 0.91, "learning_rate": 9.763739522395926e-06, "logits/chosen": -1.2502299547195435, "logits/rejected": -1.273516297340393, "logps/chosen": -168.5052490234375, "logps/rejected": -234.91946411132812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 1.009667992591858, "rewards/margins": 7.981501579284668, "rewards/rejected": -6.9718337059021, "step": 4133 }, { "epoch": 0.92, "learning_rate": 9.76319477495196e-06, "logits/chosen": -1.6222283840179443, "logits/rejected": -1.6622405052185059, "logps/chosen": -100.58963012695312, "logps/rejected": -98.56477355957031, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -0.9407440423965454, "rewards/margins": 6.189180374145508, "rewards/rejected": -7.129924297332764, "step": 4134 }, { "epoch": 0.92, "learning_rate": 9.762649415448916e-06, "logits/chosen": -1.2347214221954346, "logits/rejected": -1.2042957544326782, "logps/chosen": -86.81077575683594, "logps/rejected": -93.41300201416016, "loss": 0.3983, "rewards/accuracies": 0.0, "rewards/chosen": 0.5797409415245056, "rewards/margins": -0.09646373987197876, "rewards/rejected": 0.6762046813964844, "step": 4135 }, { "epoch": 0.92, "learning_rate": 9.76210344395687e-06, "logits/chosen": -1.2290698289871216, "logits/rejected": -1.1700936555862427, "logps/chosen": -96.93171691894531, "logps/rejected": -115.65493774414062, "loss": 0.0484, "rewards/accuracies": 1.0, "rewards/chosen": -0.606030285358429, "rewards/margins": 2.2901506423950195, "rewards/rejected": -2.8961808681488037, "step": 4136 }, { "epoch": 0.92, "learning_rate": 9.76155686054598e-06, "logits/chosen": -1.2745721340179443, "logits/rejected": -1.2745721340179443, "logps/chosen": -115.68952941894531, "logps/rejected": -115.68952941894531, "loss": 0.347, "rewards/accuracies": 0.0, "rewards/chosen": -0.7855850458145142, "rewards/margins": 0.0, "rewards/rejected": -0.7855850458145142, "step": 4137 }, { "epoch": 0.92, "learning_rate": 9.76100966528648e-06, "logits/chosen": -0.997338056564331, "logits/rejected": -0.9281706809997559, "logps/chosen": -105.98262023925781, "logps/rejected": -185.72711181640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3034194707870483, "rewards/margins": 6.426989555358887, "rewards/rejected": -7.730409145355225, "step": 4138 }, { "epoch": 0.92, "learning_rate": 9.760461858248684e-06, "logits/chosen": -1.5060234069824219, "logits/rejected": -1.4936823844909668, "logps/chosen": -89.79639434814453, "logps/rejected": -156.6829376220703, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": -1.2223358154296875, "rewards/margins": 3.0004806518554688, "rewards/rejected": -4.222816467285156, "step": 4139 }, { "epoch": 0.92, "learning_rate": 9.759913439502982e-06, "logits/chosen": -1.3202747106552124, "logits/rejected": -0.9704796671867371, "logps/chosen": -138.04473876953125, "logps/rejected": -287.3147277832031, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.3553581237792969, "rewards/margins": 13.231097221374512, "rewards/rejected": -14.586455345153809, "step": 4140 }, { "epoch": 0.92, "learning_rate": 9.759364409119844e-06, "logits/chosen": -1.088106632232666, "logits/rejected": -1.065779447555542, "logps/chosen": -102.64923858642578, "logps/rejected": -137.25341796875, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": 0.2241065949201584, "rewards/margins": 3.368321180343628, "rewards/rejected": -3.144214630126953, "step": 4141 }, { "epoch": 0.92, "learning_rate": 9.758814767169825e-06, "logits/chosen": -1.31036376953125, "logits/rejected": -1.2682271003723145, "logps/chosen": -163.0577392578125, "logps/rejected": -188.10476684570312, "loss": 0.94, "rewards/accuracies": 0.0, "rewards/chosen": -1.6366058588027954, "rewards/margins": -1.676716685295105, "rewards/rejected": 0.04011077806353569, "step": 4142 }, { "epoch": 0.92, "learning_rate": 9.758264513723544e-06, "logits/chosen": -1.627840518951416, "logits/rejected": -1.6174805164337158, "logps/chosen": -105.757080078125, "logps/rejected": -185.63511657714844, "loss": 0.2266, "rewards/accuracies": 1.0, "rewards/chosen": 0.6650673151016235, "rewards/margins": 9.091482162475586, "rewards/rejected": -8.426414489746094, "step": 4143 }, { "epoch": 0.92, "learning_rate": 9.757713648851714e-06, "logits/chosen": -1.219488263130188, "logits/rejected": -1.2341805696487427, "logps/chosen": -53.03541946411133, "logps/rejected": -58.303340911865234, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -1.1522144079208374, "rewards/margins": 3.7044153213500977, "rewards/rejected": -4.856629848480225, "step": 4144 }, { "epoch": 0.92, "learning_rate": 9.757162172625116e-06, "logits/chosen": -1.1034976243972778, "logits/rejected": -1.0938653945922852, "logps/chosen": -97.40372467041016, "logps/rejected": -103.12721252441406, "loss": 1.2036, "rewards/accuracies": 0.0, "rewards/chosen": -2.881742238998413, "rewards/margins": -2.267045021057129, "rewards/rejected": -0.614697277545929, "step": 4145 }, { "epoch": 0.92, "learning_rate": 9.756610085114615e-06, "logits/chosen": -1.7514379024505615, "logits/rejected": -1.724909782409668, "logps/chosen": -86.78868103027344, "logps/rejected": -85.00691223144531, "loss": 0.1555, "rewards/accuracies": 1.0, "rewards/chosen": -0.11577453464269638, "rewards/margins": 1.072975993156433, "rewards/rejected": -1.1887505054473877, "step": 4146 }, { "epoch": 0.92, "learning_rate": 9.756057386391154e-06, "logits/chosen": -1.1917510032653809, "logits/rejected": -1.2196699380874634, "logps/chosen": -102.56588745117188, "logps/rejected": -124.67105102539062, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -0.4621826112270355, "rewards/margins": 3.2631311416625977, "rewards/rejected": -3.725313663482666, "step": 4147 }, { "epoch": 0.92, "learning_rate": 9.75550407652575e-06, "logits/chosen": -1.5930572748184204, "logits/rejected": -1.5441361665725708, "logps/chosen": -95.37309265136719, "logps/rejected": -233.06466674804688, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": 0.742462158203125, "rewards/margins": 3.6250336170196533, "rewards/rejected": -2.8825714588165283, "step": 4148 }, { "epoch": 0.92, "learning_rate": 9.754950155589504e-06, "logits/chosen": -1.2327663898468018, "logits/rejected": -0.810804009437561, "logps/chosen": -88.92398071289062, "logps/rejected": -690.5513916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.08016663044691086, "rewards/margins": 10.5565767288208, "rewards/rejected": -10.476409912109375, "step": 4149 }, { "epoch": 0.92, "learning_rate": 9.754395623653595e-06, "logits/chosen": -1.3563274145126343, "logits/rejected": -1.4254602193832397, "logps/chosen": -93.88665771484375, "logps/rejected": -125.23139953613281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.002760315081104636, "rewards/margins": 8.15501594543457, "rewards/rejected": -8.15225601196289, "step": 4150 }, { "epoch": 0.92, "learning_rate": 9.753840480789278e-06, "logits/chosen": -1.3061044216156006, "logits/rejected": -1.2914955615997314, "logps/chosen": -82.54788208007812, "logps/rejected": -107.04739379882812, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": -0.6374565362930298, "rewards/margins": 2.559009552001953, "rewards/rejected": -3.1964662075042725, "step": 4151 }, { "epoch": 0.92, "learning_rate": 9.753284727067886e-06, "logits/chosen": -1.159706711769104, "logits/rejected": -1.2106961011886597, "logps/chosen": -198.9308319091797, "logps/rejected": -208.64498901367188, "loss": 0.0829, "rewards/accuracies": 1.0, "rewards/chosen": 0.3121963441371918, "rewards/margins": 1.949787974357605, "rewards/rejected": -1.6375916004180908, "step": 4152 }, { "epoch": 0.92, "learning_rate": 9.752728362560834e-06, "logits/chosen": -1.5592041015625, "logits/rejected": -0.9102333784103394, "logps/chosen": -144.91409301757812, "logps/rejected": -816.5753173828125, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": 0.956652820110321, "rewards/margins": 57.503170013427734, "rewards/rejected": -56.54651641845703, "step": 4153 }, { "epoch": 0.92, "learning_rate": 9.752171387339612e-06, "logits/chosen": -0.8673402070999146, "logits/rejected": -0.8204055428504944, "logps/chosen": -159.4215087890625, "logps/rejected": -250.80564880371094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.4199188947677612, "rewards/margins": 8.560626029968262, "rewards/rejected": -7.140707492828369, "step": 4154 }, { "epoch": 0.92, "learning_rate": 9.75161380147579e-06, "logits/chosen": -1.3130676746368408, "logits/rejected": -1.307948112487793, "logps/chosen": -132.13931274414062, "logps/rejected": -166.84768676757812, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -0.9581695795059204, "rewards/margins": 3.1726412773132324, "rewards/rejected": -4.130810737609863, "step": 4155 }, { "epoch": 0.92, "learning_rate": 9.751055605041017e-06, "logits/chosen": -1.4531879425048828, "logits/rejected": -1.4849320650100708, "logps/chosen": -106.93954467773438, "logps/rejected": -70.96487426757812, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": 0.571911633014679, "rewards/margins": 5.296597003936768, "rewards/rejected": -4.724685192108154, "step": 4156 }, { "epoch": 0.92, "learning_rate": 9.750496798107021e-06, "logits/chosen": -1.124955654144287, "logits/rejected": -1.1660876274108887, "logps/chosen": -192.86927795410156, "logps/rejected": -153.5623321533203, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.1749618500471115, "rewards/margins": 5.605224609375, "rewards/rejected": -5.780186653137207, "step": 4157 }, { "epoch": 0.92, "learning_rate": 9.749937380745607e-06, "logits/chosen": -1.0848983526229858, "logits/rejected": -1.1354323625564575, "logps/chosen": -39.92424392700195, "logps/rejected": -54.07903289794922, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.7092182040214539, "rewards/margins": 3.6862552165985107, "rewards/rejected": -4.395473480224609, "step": 4158 }, { "epoch": 0.92, "learning_rate": 9.749377353028657e-06, "logits/chosen": -0.774705708026886, "logits/rejected": -0.20814861357212067, "logps/chosen": -112.35220336914062, "logps/rejected": -499.99951171875, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": -3.5999772548675537, "rewards/margins": 39.080535888671875, "rewards/rejected": -42.680511474609375, "step": 4159 }, { "epoch": 0.92, "learning_rate": 9.748816715028135e-06, "logits/chosen": -1.2076665163040161, "logits/rejected": -0.8172128796577454, "logps/chosen": -107.79521179199219, "logps/rejected": -377.5332336425781, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": -2.2800827026367188, "rewards/margins": 25.61659812927246, "rewards/rejected": -27.89668083190918, "step": 4160 }, { "epoch": 0.92, "learning_rate": 9.748255466816081e-06, "logits/chosen": -0.9930289387702942, "logits/rejected": -1.073014497756958, "logps/chosen": -221.17222595214844, "logps/rejected": -173.05401611328125, "loss": 0.0845, "rewards/accuracies": 1.0, "rewards/chosen": 2.1202590465545654, "rewards/margins": 1.7108107805252075, "rewards/rejected": 0.4094482362270355, "step": 4161 }, { "epoch": 0.92, "learning_rate": 9.747693608464614e-06, "logits/chosen": -1.0403523445129395, "logits/rejected": -1.0185731649398804, "logps/chosen": -70.1102294921875, "logps/rejected": -172.96925354003906, "loss": 0.1338, "rewards/accuracies": 1.0, "rewards/chosen": -1.9814350605010986, "rewards/margins": 4.715268135070801, "rewards/rejected": -6.6967034339904785, "step": 4162 }, { "epoch": 0.92, "learning_rate": 9.74713114004593e-06, "logits/chosen": -0.9745740294456482, "logits/rejected": -1.033285140991211, "logps/chosen": -153.4780731201172, "logps/rejected": -203.53648376464844, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.2362351417541504, "rewards/margins": 5.801289081573486, "rewards/rejected": -8.037524223327637, "step": 4163 }, { "epoch": 0.92, "learning_rate": 9.746568061632308e-06, "logits/chosen": -1.2125952243804932, "logits/rejected": -1.3036819696426392, "logps/chosen": -147.41690063476562, "logps/rejected": -110.34539031982422, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 0.6902313232421875, "rewards/margins": 5.0351786613464355, "rewards/rejected": -4.344947338104248, "step": 4164 }, { "epoch": 0.92, "learning_rate": 9.746004373296099e-06, "logits/chosen": -1.178739070892334, "logits/rejected": -1.2636113166809082, "logps/chosen": -210.53036499023438, "logps/rejected": -147.0605926513672, "loss": 0.1346, "rewards/accuracies": 1.0, "rewards/chosen": -1.2558517456054688, "rewards/margins": 2.822640895843506, "rewards/rejected": -4.078492641448975, "step": 4165 }, { "epoch": 0.92, "learning_rate": 9.745440075109738e-06, "logits/chosen": -1.5205504894256592, "logits/rejected": -1.6123905181884766, "logps/chosen": -88.50381469726562, "logps/rejected": -111.44552612304688, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.6142196655273438, "rewards/margins": 7.071721076965332, "rewards/rejected": -7.685940742492676, "step": 4166 }, { "epoch": 0.92, "learning_rate": 9.744875167145735e-06, "logits/chosen": -0.8098389506340027, "logits/rejected": -0.8098389506340027, "logps/chosen": -57.334312438964844, "logps/rejected": -57.334312438964844, "loss": 0.3476, "rewards/accuracies": 0.0, "rewards/chosen": 0.5054107904434204, "rewards/margins": 0.0, "rewards/rejected": 0.5054107904434204, "step": 4167 }, { "epoch": 0.92, "learning_rate": 9.74430964947668e-06, "logits/chosen": -1.4452546834945679, "logits/rejected": -1.5208598375320435, "logps/chosen": -179.87167358398438, "logps/rejected": -218.6461944580078, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.5528671145439148, "rewards/margins": 6.306097507476807, "rewards/rejected": -5.753230571746826, "step": 4168 }, { "epoch": 0.92, "learning_rate": 9.74374352217524e-06, "logits/chosen": -1.3090500831604004, "logits/rejected": -1.34498929977417, "logps/chosen": -67.36112976074219, "logps/rejected": -55.11944580078125, "loss": 0.098, "rewards/accuracies": 1.0, "rewards/chosen": 0.5647552609443665, "rewards/margins": 1.5305161476135254, "rewards/rejected": -0.9657608270645142, "step": 4169 }, { "epoch": 0.92, "learning_rate": 9.743176785314159e-06, "logits/chosen": -1.4060574769973755, "logits/rejected": -1.4425365924835205, "logps/chosen": -129.89083862304688, "logps/rejected": -113.26055908203125, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -0.96856689453125, "rewards/margins": 3.843555450439453, "rewards/rejected": -4.812122344970703, "step": 4170 }, { "epoch": 0.92, "learning_rate": 9.742609438966265e-06, "logits/chosen": -1.2790030241012573, "logits/rejected": -1.2639459371566772, "logps/chosen": -121.53935241699219, "logps/rejected": -167.36453247070312, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.1989486664533615, "rewards/margins": 10.389303207397461, "rewards/rejected": -10.588252067565918, "step": 4171 }, { "epoch": 0.92, "learning_rate": 9.74204148320446e-06, "logits/chosen": -1.2683343887329102, "logits/rejected": -1.1985493898391724, "logps/chosen": -162.30294799804688, "logps/rejected": -183.9844970703125, "loss": 0.8333, "rewards/accuracies": 0.0, "rewards/chosen": -1.2623428106307983, "rewards/margins": -1.4566971063613892, "rewards/rejected": 0.19435425102710724, "step": 4172 }, { "epoch": 0.92, "learning_rate": 9.741472918101722e-06, "logits/chosen": -1.3279037475585938, "logits/rejected": -1.3279037475585938, "logps/chosen": -307.43914794921875, "logps/rejected": -307.43914794921875, "loss": 0.3523, "rewards/accuracies": 0.0, "rewards/chosen": -12.418960571289062, "rewards/margins": 0.0, "rewards/rejected": -12.418960571289062, "step": 4173 }, { "epoch": 0.92, "learning_rate": 9.740903743731113e-06, "logits/chosen": -1.0059778690338135, "logits/rejected": -0.9374960660934448, "logps/chosen": -78.28380584716797, "logps/rejected": -138.72291564941406, "loss": 0.6972, "rewards/accuracies": 1.0, "rewards/chosen": -0.5135360956192017, "rewards/margins": 4.094985485076904, "rewards/rejected": -4.608521461486816, "step": 4174 }, { "epoch": 0.92, "learning_rate": 9.74033396016577e-06, "logits/chosen": -0.8700963854789734, "logits/rejected": -0.8727036714553833, "logps/chosen": -224.50088500976562, "logps/rejected": -137.52178955078125, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": 0.7217682003974915, "rewards/margins": 3.7232749462127686, "rewards/rejected": -3.001506805419922, "step": 4175 }, { "epoch": 0.92, "learning_rate": 9.739763567478908e-06, "logits/chosen": -1.0764044523239136, "logits/rejected": -1.0431801080703735, "logps/chosen": -168.1302032470703, "logps/rejected": -172.79306030273438, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": 2.025456190109253, "rewards/margins": 4.342776298522949, "rewards/rejected": -2.3173203468322754, "step": 4176 }, { "epoch": 0.92, "learning_rate": 9.739192565743822e-06, "logits/chosen": -1.1449041366577148, "logits/rejected": -1.1149686574935913, "logps/chosen": -89.85794830322266, "logps/rejected": -198.8674774169922, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -1.045344591140747, "rewards/margins": 8.750919342041016, "rewards/rejected": -9.796263694763184, "step": 4177 }, { "epoch": 0.92, "learning_rate": 9.738620955033883e-06, "logits/chosen": -0.7655892372131348, "logits/rejected": -0.782647967338562, "logps/chosen": -123.12982177734375, "logps/rejected": -107.92967987060547, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 0.10762634128332138, "rewards/margins": 5.327729225158691, "rewards/rejected": -5.220102787017822, "step": 4178 }, { "epoch": 0.92, "learning_rate": 9.738048735422545e-06, "logits/chosen": -1.2141326665878296, "logits/rejected": -1.2141326665878296, "logps/chosen": -75.55140686035156, "logps/rejected": -75.55140686035156, "loss": 0.3869, "rewards/accuracies": 0.0, "rewards/chosen": -1.6948117017745972, "rewards/margins": 0.0, "rewards/rejected": -1.6948117017745972, "step": 4179 }, { "epoch": 0.93, "learning_rate": 9.737475906983333e-06, "logits/chosen": -1.2246240377426147, "logits/rejected": -0.7723124623298645, "logps/chosen": -76.42890167236328, "logps/rejected": -753.000732421875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 1.3282890319824219, "rewards/margins": 58.8799934387207, "rewards/rejected": -57.55170440673828, "step": 4180 }, { "epoch": 0.93, "learning_rate": 9.736902469789855e-06, "logits/chosen": -1.373883843421936, "logits/rejected": -1.367807388305664, "logps/chosen": -179.46450805664062, "logps/rejected": -150.11524963378906, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 3.23870849609375, "rewards/margins": 7.504042148590088, "rewards/rejected": -4.265333652496338, "step": 4181 }, { "epoch": 0.93, "learning_rate": 9.736328423915797e-06, "logits/chosen": -1.1333231925964355, "logits/rejected": -1.1736794710159302, "logps/chosen": -148.71505737304688, "logps/rejected": -225.11770629882812, "loss": 0.1537, "rewards/accuracies": 1.0, "rewards/chosen": -1.5795563459396362, "rewards/margins": 7.352068901062012, "rewards/rejected": -8.931625366210938, "step": 4182 }, { "epoch": 0.93, "learning_rate": 9.735753769434923e-06, "logits/chosen": -1.3840806484222412, "logits/rejected": -1.4186440706253052, "logps/chosen": -87.70752716064453, "logps/rejected": -78.04969024658203, "loss": 0.1068, "rewards/accuracies": 1.0, "rewards/chosen": -3.9999454021453857, "rewards/margins": 1.5380184650421143, "rewards/rejected": -5.5379638671875, "step": 4183 }, { "epoch": 0.93, "learning_rate": 9.735178506421075e-06, "logits/chosen": -1.475484848022461, "logits/rejected": -1.4505596160888672, "logps/chosen": -90.84344482421875, "logps/rejected": -105.21204376220703, "loss": 0.0359, "rewards/accuracies": 1.0, "rewards/chosen": -0.10871277004480362, "rewards/margins": 2.954876184463501, "rewards/rejected": -3.063588857650757, "step": 4184 }, { "epoch": 0.93, "learning_rate": 9.73460263494817e-06, "logits/chosen": -1.3601197004318237, "logits/rejected": -1.414348840713501, "logps/chosen": -162.1859588623047, "logps/rejected": -182.01849365234375, "loss": 0.0427, "rewards/accuracies": 1.0, "rewards/chosen": 1.0936279296875, "rewards/margins": 8.710737228393555, "rewards/rejected": -7.6171088218688965, "step": 4185 }, { "epoch": 0.93, "learning_rate": 9.734026155090208e-06, "logits/chosen": -1.4119467735290527, "logits/rejected": -1.5868397951126099, "logps/chosen": -220.7060546875, "logps/rejected": -65.91409301757812, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.40292665362358093, "rewards/margins": 3.7637479305267334, "rewards/rejected": -4.166674613952637, "step": 4186 }, { "epoch": 0.93, "learning_rate": 9.733449066921268e-06, "logits/chosen": -1.3387559652328491, "logits/rejected": -1.3387559652328491, "logps/chosen": -132.93470764160156, "logps/rejected": -132.93470764160156, "loss": 0.3741, "rewards/accuracies": 0.0, "rewards/chosen": -5.292588233947754, "rewards/margins": 0.0, "rewards/rejected": -5.292588233947754, "step": 4187 }, { "epoch": 0.93, "learning_rate": 9.7328713705155e-06, "logits/chosen": -1.347895622253418, "logits/rejected": -1.3929095268249512, "logps/chosen": -84.95046997070312, "logps/rejected": -78.2449722290039, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -0.3595077693462372, "rewards/margins": 3.663353681564331, "rewards/rejected": -4.022861480712891, "step": 4188 }, { "epoch": 0.93, "learning_rate": 9.732293065947138e-06, "logits/chosen": -1.6549327373504639, "logits/rejected": -1.5472439527511597, "logps/chosen": -106.10453796386719, "logps/rejected": -171.4357452392578, "loss": 0.1589, "rewards/accuracies": 1.0, "rewards/chosen": -0.43605804443359375, "rewards/margins": 1.0061920881271362, "rewards/rejected": -1.44225013256073, "step": 4189 }, { "epoch": 0.93, "learning_rate": 9.731714153290492e-06, "logits/chosen": -1.191800832748413, "logits/rejected": -1.227964997291565, "logps/chosen": -67.72434997558594, "logps/rejected": -80.27928161621094, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.9234833121299744, "rewards/margins": 6.528148651123047, "rewards/rejected": -5.604665279388428, "step": 4190 }, { "epoch": 0.93, "learning_rate": 9.731134632619954e-06, "logits/chosen": -1.2199814319610596, "logits/rejected": -1.2493906021118164, "logps/chosen": -70.66033935546875, "logps/rejected": -71.69612884521484, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 1.2682021856307983, "rewards/margins": 5.765054225921631, "rewards/rejected": -4.496851921081543, "step": 4191 }, { "epoch": 0.93, "learning_rate": 9.73055450400999e-06, "logits/chosen": -1.1736284494400024, "logits/rejected": -1.0617713928222656, "logps/chosen": -105.9615707397461, "logps/rejected": -209.71864318847656, "loss": 0.7338, "rewards/accuracies": 0.0, "rewards/chosen": -2.8864693641662598, "rewards/margins": -1.2054665088653564, "rewards/rejected": -1.6810028553009033, "step": 4192 }, { "epoch": 0.93, "learning_rate": 9.729973767535142e-06, "logits/chosen": -1.4863284826278687, "logits/rejected": -1.4414002895355225, "logps/chosen": -80.74828338623047, "logps/rejected": -160.19615173339844, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.5172691345214844, "rewards/margins": 5.991031646728516, "rewards/rejected": -6.50830078125, "step": 4193 }, { "epoch": 0.93, "learning_rate": 9.729392423270036e-06, "logits/chosen": -0.8183813095092773, "logits/rejected": -0.7771865725517273, "logps/chosen": -120.67695617675781, "logps/rejected": -263.62652587890625, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 0.3223884701728821, "rewards/margins": 5.7350945472717285, "rewards/rejected": -5.412705898284912, "step": 4194 }, { "epoch": 0.93, "learning_rate": 9.728810471289374e-06, "logits/chosen": -1.4233191013336182, "logits/rejected": -1.4294192790985107, "logps/chosen": -116.4406509399414, "logps/rejected": -105.02207946777344, "loss": 1.1242, "rewards/accuracies": 0.0, "rewards/chosen": -2.7491378784179688, "rewards/margins": -2.1331048011779785, "rewards/rejected": -0.6160331964492798, "step": 4195 }, { "epoch": 0.93, "learning_rate": 9.728227911667934e-06, "logits/chosen": -1.0569019317626953, "logits/rejected": -1.037013053894043, "logps/chosen": -98.497314453125, "logps/rejected": -139.03421020507812, "loss": 1.0795, "rewards/accuracies": 0.0, "rewards/chosen": -4.455952167510986, "rewards/margins": -1.2034099102020264, "rewards/rejected": -3.25254225730896, "step": 4196 }, { "epoch": 0.93, "learning_rate": 9.727644744480571e-06, "logits/chosen": -1.0575170516967773, "logits/rejected": -0.9608524441719055, "logps/chosen": -77.95662689208984, "logps/rejected": -187.31695556640625, "loss": 0.0793, "rewards/accuracies": 1.0, "rewards/chosen": 0.47923049330711365, "rewards/margins": 4.688617706298828, "rewards/rejected": -4.209387302398682, "step": 4197 }, { "epoch": 0.93, "learning_rate": 9.727060969802226e-06, "logits/chosen": -1.1471203565597534, "logits/rejected": -1.0951697826385498, "logps/chosen": -152.678466796875, "logps/rejected": -150.70928955078125, "loss": 0.3034, "rewards/accuracies": 1.0, "rewards/chosen": -1.7598907947540283, "rewards/margins": 0.1812591552734375, "rewards/rejected": -1.9411499500274658, "step": 4198 }, { "epoch": 0.93, "learning_rate": 9.726476587707908e-06, "logits/chosen": -1.5063527822494507, "logits/rejected": -1.5335266590118408, "logps/chosen": -108.22587585449219, "logps/rejected": -130.85574340820312, "loss": 0.0536, "rewards/accuracies": 1.0, "rewards/chosen": -1.4088776111602783, "rewards/margins": 2.4322478771209717, "rewards/rejected": -3.84112548828125, "step": 4199 }, { "epoch": 0.93, "learning_rate": 9.725891598272711e-06, "logits/chosen": -1.3007822036743164, "logits/rejected": -1.2970956563949585, "logps/chosen": -65.28562927246094, "logps/rejected": -105.72054290771484, "loss": 0.3126, "rewards/accuracies": 1.0, "rewards/chosen": -0.8788135647773743, "rewards/margins": 0.997244656085968, "rewards/rejected": -1.8760582208633423, "step": 4200 }, { "epoch": 0.93, "learning_rate": 9.725306001571806e-06, "logits/chosen": -0.8911104202270508, "logits/rejected": -0.8720291256904602, "logps/chosen": -116.80013275146484, "logps/rejected": -171.31472778320312, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.0356025695800781, "rewards/margins": 6.510275363922119, "rewards/rejected": -7.545877933502197, "step": 4201 }, { "epoch": 0.93, "learning_rate": 9.72471979768044e-06, "logits/chosen": -0.9492663741111755, "logits/rejected": -0.9930667877197266, "logps/chosen": -194.3434600830078, "logps/rejected": -93.69202423095703, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.1304733753204346, "rewards/margins": 8.459424018859863, "rewards/rejected": -6.32895040512085, "step": 4202 }, { "epoch": 0.93, "learning_rate": 9.724132986673935e-06, "logits/chosen": -1.1540141105651855, "logits/rejected": -1.0155024528503418, "logps/chosen": -175.73855590820312, "logps/rejected": -328.53759765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.350921630859375, "rewards/margins": 7.5394439697265625, "rewards/rejected": -9.890365600585938, "step": 4203 }, { "epoch": 0.93, "learning_rate": 9.723545568627699e-06, "logits/chosen": -1.0524218082427979, "logits/rejected": -0.9612070918083191, "logps/chosen": -173.22329711914062, "logps/rejected": -301.1304931640625, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": 0.5711746215820312, "rewards/margins": 6.544063091278076, "rewards/rejected": -5.972888469696045, "step": 4204 }, { "epoch": 0.93, "learning_rate": 9.722957543617211e-06, "logits/chosen": -1.1548055410385132, "logits/rejected": -1.1625988483428955, "logps/chosen": -87.39877319335938, "logps/rejected": -165.40762329101562, "loss": 0.2227, "rewards/accuracies": 1.0, "rewards/chosen": -1.0947860479354858, "rewards/margins": 2.42592191696167, "rewards/rejected": -3.520707845687866, "step": 4205 }, { "epoch": 0.93, "learning_rate": 9.722368911718034e-06, "logits/chosen": -1.3015103340148926, "logits/rejected": -1.3605624437332153, "logps/chosen": -141.09515380859375, "logps/rejected": -81.2153549194336, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.22122345864772797, "rewards/margins": 4.251338958740234, "rewards/rejected": -4.472562313079834, "step": 4206 }, { "epoch": 0.93, "learning_rate": 9.721779673005805e-06, "logits/chosen": -1.241525650024414, "logits/rejected": -1.1963392496109009, "logps/chosen": -77.00581359863281, "logps/rejected": -150.97122192382812, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.19664306938648224, "rewards/margins": 5.443521499633789, "rewards/rejected": -5.640164375305176, "step": 4207 }, { "epoch": 0.93, "learning_rate": 9.721189827556237e-06, "logits/chosen": -1.0975421667099, "logits/rejected": -1.0975421667099, "logps/chosen": -104.18373107910156, "logps/rejected": -104.18373107910156, "loss": 0.3609, "rewards/accuracies": 0.0, "rewards/chosen": -0.9118431210517883, "rewards/margins": 0.0, "rewards/rejected": -0.9118431210517883, "step": 4208 }, { "epoch": 0.93, "learning_rate": 9.720599375445125e-06, "logits/chosen": -1.109223484992981, "logits/rejected": -1.109223484992981, "logps/chosen": -169.45062255859375, "logps/rejected": -169.45062255859375, "loss": 0.3513, "rewards/accuracies": 0.0, "rewards/chosen": -0.7405762076377869, "rewards/margins": 0.0, "rewards/rejected": -0.7405762076377869, "step": 4209 }, { "epoch": 0.93, "learning_rate": 9.720008316748344e-06, "logits/chosen": -1.1201337575912476, "logits/rejected": -1.1531329154968262, "logps/chosen": -201.47689819335938, "logps/rejected": -149.3406219482422, "loss": 0.0907, "rewards/accuracies": 1.0, "rewards/chosen": -4.359954833984375, "rewards/margins": 1.7063689231872559, "rewards/rejected": -6.066323757171631, "step": 4210 }, { "epoch": 0.93, "learning_rate": 9.719416651541839e-06, "logits/chosen": -1.0783149003982544, "logits/rejected": -1.106431484222412, "logps/chosen": -161.73744201660156, "logps/rejected": -115.90567016601562, "loss": 0.9151, "rewards/accuracies": 0.0, "rewards/chosen": -2.0296952724456787, "rewards/margins": -1.6554169654846191, "rewards/rejected": -0.3742782771587372, "step": 4211 }, { "epoch": 0.93, "learning_rate": 9.718824379901639e-06, "logits/chosen": -1.2740055322647095, "logits/rejected": -1.2563711404800415, "logps/chosen": -72.14602661132812, "logps/rejected": -110.51040649414062, "loss": 0.1234, "rewards/accuracies": 1.0, "rewards/chosen": -0.5303589105606079, "rewards/margins": 1.2876739501953125, "rewards/rejected": -1.8180328607559204, "step": 4212 }, { "epoch": 0.93, "learning_rate": 9.718231501903851e-06, "logits/chosen": -1.2031707763671875, "logits/rejected": -1.0836995840072632, "logps/chosen": -121.9546127319336, "logps/rejected": -280.09539794921875, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -0.38837891817092896, "rewards/margins": 3.638348340988159, "rewards/rejected": -4.026727199554443, "step": 4213 }, { "epoch": 0.93, "learning_rate": 9.717638017624657e-06, "logits/chosen": -1.1342848539352417, "logits/rejected": -1.1240038871765137, "logps/chosen": -110.63223266601562, "logps/rejected": -107.10809326171875, "loss": 0.4076, "rewards/accuracies": 1.0, "rewards/chosen": -1.4662026166915894, "rewards/margins": 1.1833137273788452, "rewards/rejected": -2.6495163440704346, "step": 4214 }, { "epoch": 0.93, "learning_rate": 9.717043927140319e-06, "logits/chosen": -1.1225461959838867, "logits/rejected": -1.1279774904251099, "logps/chosen": -135.33274841308594, "logps/rejected": -133.32215881347656, "loss": 0.0631, "rewards/accuracies": 1.0, "rewards/chosen": -2.1529595851898193, "rewards/margins": 2.6241681575775146, "rewards/rejected": -4.777127742767334, "step": 4215 }, { "epoch": 0.93, "learning_rate": 9.716449230527175e-06, "logits/chosen": -1.091374397277832, "logits/rejected": -1.1156082153320312, "logps/chosen": -120.03040313720703, "logps/rejected": -209.02955627441406, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": 0.7243423461914062, "rewards/margins": 6.8537278175354, "rewards/rejected": -6.129385471343994, "step": 4216 }, { "epoch": 0.93, "learning_rate": 9.715853927861643e-06, "logits/chosen": -0.908743143081665, "logits/rejected": -1.0125616788864136, "logps/chosen": -324.8968200683594, "logps/rejected": -133.51690673828125, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": -6.391971111297607, "rewards/margins": 2.453277111053467, "rewards/rejected": -8.845248222351074, "step": 4217 }, { "epoch": 0.93, "learning_rate": 9.71525801922022e-06, "logits/chosen": -1.0492539405822754, "logits/rejected": -1.0639580488204956, "logps/chosen": -105.0128173828125, "logps/rejected": -149.90879821777344, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -0.574720025062561, "rewards/margins": 4.5329155921936035, "rewards/rejected": -5.107635498046875, "step": 4218 }, { "epoch": 0.93, "learning_rate": 9.714661504679474e-06, "logits/chosen": -1.472439169883728, "logits/rejected": -1.5119917392730713, "logps/chosen": -203.74288940429688, "logps/rejected": -260.40966796875, "loss": 0.0698, "rewards/accuracies": 1.0, "rewards/chosen": 0.1274673491716385, "rewards/margins": 6.039930820465088, "rewards/rejected": -5.912463665008545, "step": 4219 }, { "epoch": 0.93, "learning_rate": 9.71406438431606e-06, "logits/chosen": -1.1940125226974487, "logits/rejected": -0.9064086079597473, "logps/chosen": -94.53459930419922, "logps/rejected": -799.640380859375, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": -0.2787528932094574, "rewards/margins": 64.61871337890625, "rewards/rejected": -64.89746856689453, "step": 4220 }, { "epoch": 0.93, "learning_rate": 9.713466658206703e-06, "logits/chosen": -1.2903293371200562, "logits/rejected": -1.2411308288574219, "logps/chosen": -141.94659423828125, "logps/rejected": -241.879150390625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.0749528408050537, "rewards/margins": 5.835000991821289, "rewards/rejected": -7.909953594207764, "step": 4221 }, { "epoch": 0.93, "learning_rate": 9.712868326428213e-06, "logits/chosen": -1.311671495437622, "logits/rejected": -1.3775007724761963, "logps/chosen": -231.20132446289062, "logps/rejected": -189.0948486328125, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -1.0562164783477783, "rewards/margins": 3.6946685314178467, "rewards/rejected": -4.750885009765625, "step": 4222 }, { "epoch": 0.93, "learning_rate": 9.712269389057471e-06, "logits/chosen": -1.2094838619232178, "logits/rejected": -1.1955313682556152, "logps/chosen": -169.19454956054688, "logps/rejected": -169.02236938476562, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.0064025879837572575, "rewards/margins": 5.281487464904785, "rewards/rejected": -5.2878899574279785, "step": 4223 }, { "epoch": 0.93, "learning_rate": 9.711669846171443e-06, "logits/chosen": -0.9410141110420227, "logits/rejected": -0.9410141110420227, "logps/chosen": -113.3674087524414, "logps/rejected": -113.3674087524414, "loss": 0.3473, "rewards/accuracies": 0.0, "rewards/chosen": -1.3780983686447144, "rewards/margins": 0.0, "rewards/rejected": -1.3780983686447144, "step": 4224 }, { "epoch": 0.94, "learning_rate": 9.711069697847165e-06, "logits/chosen": -1.3702458143234253, "logits/rejected": -1.3437811136245728, "logps/chosen": -78.44441223144531, "logps/rejected": -134.54086303710938, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": 1.3572555780410767, "rewards/margins": 11.054935455322266, "rewards/rejected": -9.69767951965332, "step": 4225 }, { "epoch": 0.94, "learning_rate": 9.710468944161755e-06, "logits/chosen": -0.9368904232978821, "logits/rejected": -0.961297333240509, "logps/chosen": -90.95585632324219, "logps/rejected": -107.85973358154297, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -1.041754126548767, "rewards/margins": 3.9776611328125, "rewards/rejected": -5.019415378570557, "step": 4226 }, { "epoch": 0.94, "learning_rate": 9.70986758519241e-06, "logits/chosen": -1.2650158405303955, "logits/rejected": -1.2681280374526978, "logps/chosen": -127.21139526367188, "logps/rejected": -165.28738403320312, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -1.5175354480743408, "rewards/margins": 3.3138906955718994, "rewards/rejected": -4.83142614364624, "step": 4227 }, { "epoch": 0.94, "learning_rate": 9.709265621016401e-06, "logits/chosen": -1.2673990726470947, "logits/rejected": -1.2348546981811523, "logps/chosen": -92.62570190429688, "logps/rejected": -198.40969848632812, "loss": 0.1301, "rewards/accuracies": 1.0, "rewards/chosen": -6.064857482910156, "rewards/margins": 1.2129745483398438, "rewards/rejected": -7.27783203125, "step": 4228 }, { "epoch": 0.94, "learning_rate": 9.708663051711083e-06, "logits/chosen": -1.636142373085022, "logits/rejected": -1.002407193183899, "logps/chosen": -156.94461059570312, "logps/rejected": -677.6751708984375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.322972059249878, "rewards/margins": 39.328453063964844, "rewards/rejected": -42.651424407958984, "step": 4229 }, { "epoch": 0.94, "learning_rate": 9.708059877353881e-06, "logits/chosen": -1.0102514028549194, "logits/rejected": -1.0102514028549194, "logps/chosen": -417.3385925292969, "logps/rejected": -417.3385925292969, "loss": 0.8543, "rewards/accuracies": 0.0, "rewards/chosen": -8.741373062133789, "rewards/margins": 0.0, "rewards/rejected": -8.741373062133789, "step": 4230 }, { "epoch": 0.94, "learning_rate": 9.707456098022303e-06, "logits/chosen": -1.1335701942443848, "logits/rejected": -1.1461721658706665, "logps/chosen": -158.08973693847656, "logps/rejected": -189.86074829101562, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.3635696470737457, "rewards/margins": 4.710784912109375, "rewards/rejected": -5.074354648590088, "step": 4231 }, { "epoch": 0.94, "learning_rate": 9.706851713793932e-06, "logits/chosen": -1.2776738405227661, "logits/rejected": -1.2776738405227661, "logps/chosen": -191.40054321289062, "logps/rejected": -191.40054321289062, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.4440765380859375, "rewards/margins": 0.0, "rewards/rejected": -2.4440765380859375, "step": 4232 }, { "epoch": 0.94, "learning_rate": 9.706246724746433e-06, "logits/chosen": -1.186357021331787, "logits/rejected": -1.1556833982467651, "logps/chosen": -119.602783203125, "logps/rejected": -109.66944885253906, "loss": 0.2037, "rewards/accuracies": 1.0, "rewards/chosen": -0.8903946280479431, "rewards/margins": 0.6950134634971619, "rewards/rejected": -1.585408091545105, "step": 4233 }, { "epoch": 0.94, "learning_rate": 9.705641130957541e-06, "logits/chosen": -1.4359354972839355, "logits/rejected": -1.420717477798462, "logps/chosen": -202.99742126464844, "logps/rejected": -228.2428436279297, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.23773956298828125, "rewards/margins": 8.968928337097168, "rewards/rejected": -8.731188774108887, "step": 4234 }, { "epoch": 0.94, "learning_rate": 9.705034932505076e-06, "logits/chosen": -1.2150990962982178, "logits/rejected": -1.1272765398025513, "logps/chosen": -155.081298828125, "logps/rejected": -148.97677612304688, "loss": 0.482, "rewards/accuracies": 0.0, "rewards/chosen": -3.985002279281616, "rewards/margins": -0.47583484649658203, "rewards/rejected": -3.509167432785034, "step": 4235 }, { "epoch": 0.94, "learning_rate": 9.704428129466934e-06, "logits/chosen": -0.8812150359153748, "logits/rejected": -0.8973373174667358, "logps/chosen": -75.18730163574219, "logps/rejected": -114.95364379882812, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.4379265010356903, "rewards/margins": 4.262974262237549, "rewards/rejected": -4.700900554656982, "step": 4236 }, { "epoch": 0.94, "learning_rate": 9.703820721921085e-06, "logits/chosen": -1.2357439994812012, "logits/rejected": -1.2156167030334473, "logps/chosen": -82.70855712890625, "logps/rejected": -122.34580993652344, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": 0.201945498585701, "rewards/margins": 2.6831681728363037, "rewards/rejected": -2.481222629547119, "step": 4237 }, { "epoch": 0.94, "learning_rate": 9.703212709945583e-06, "logits/chosen": -1.4352989196777344, "logits/rejected": -1.438781499862671, "logps/chosen": -167.92593383789062, "logps/rejected": -121.42731475830078, "loss": 0.1135, "rewards/accuracies": 1.0, "rewards/chosen": -1.6749314069747925, "rewards/margins": 1.8603309392929077, "rewards/rejected": -3.5352623462677, "step": 4238 }, { "epoch": 0.94, "learning_rate": 9.70260409361855e-06, "logits/chosen": -1.0922883749008179, "logits/rejected": -1.0314785242080688, "logps/chosen": -186.325439453125, "logps/rejected": -349.456298828125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.08908843994140625, "rewards/margins": 17.153779983520508, "rewards/rejected": -17.0646915435791, "step": 4239 }, { "epoch": 0.94, "learning_rate": 9.701994873018198e-06, "logits/chosen": -0.9170006513595581, "logits/rejected": -0.8133838176727295, "logps/chosen": -135.44761657714844, "logps/rejected": -299.6941833496094, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": 2.726985216140747, "rewards/margins": 6.708940505981445, "rewards/rejected": -3.981955051422119, "step": 4240 }, { "epoch": 0.94, "learning_rate": 9.70138504822281e-06, "logits/chosen": -1.28099524974823, "logits/rejected": -1.28099524974823, "logps/chosen": -175.14266967773438, "logps/rejected": -175.14266967773438, "loss": 0.3514, "rewards/accuracies": 0.0, "rewards/chosen": -5.538970947265625, "rewards/margins": 0.0, "rewards/rejected": -5.538970947265625, "step": 4241 }, { "epoch": 0.94, "learning_rate": 9.700774619310744e-06, "logits/chosen": -1.4341721534729004, "logits/rejected": -1.4805549383163452, "logps/chosen": -95.2664794921875, "logps/rejected": -127.10124206542969, "loss": 0.0485, "rewards/accuracies": 1.0, "rewards/chosen": -0.9163574576377869, "rewards/margins": 3.7339391708374023, "rewards/rejected": -4.650296688079834, "step": 4242 }, { "epoch": 0.94, "learning_rate": 9.700163586360438e-06, "logits/chosen": -1.3326555490493774, "logits/rejected": -1.2652182579040527, "logps/chosen": -89.06497192382812, "logps/rejected": -204.80392456054688, "loss": 0.1595, "rewards/accuracies": 1.0, "rewards/chosen": 0.29789429903030396, "rewards/margins": 6.936029434204102, "rewards/rejected": -6.638134956359863, "step": 4243 }, { "epoch": 0.94, "learning_rate": 9.699551949450412e-06, "logits/chosen": -1.2887523174285889, "logits/rejected": -1.3850672245025635, "logps/chosen": -194.60751342773438, "logps/rejected": -137.76454162597656, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.11406097561120987, "rewards/margins": 6.447779178619385, "rewards/rejected": -6.333718299865723, "step": 4244 }, { "epoch": 0.94, "learning_rate": 9.698939708659258e-06, "logits/chosen": -1.3878093957901, "logits/rejected": -1.4488595724105835, "logps/chosen": -122.91488647460938, "logps/rejected": -73.84909057617188, "loss": 0.0899, "rewards/accuracies": 1.0, "rewards/chosen": 0.37811279296875, "rewards/margins": 1.6633259057998657, "rewards/rejected": -1.2852131128311157, "step": 4245 }, { "epoch": 0.94, "learning_rate": 9.698326864065646e-06, "logits/chosen": -1.1946686506271362, "logits/rejected": -1.2286462783813477, "logps/chosen": -105.1189193725586, "logps/rejected": -121.88758850097656, "loss": 0.2984, "rewards/accuracies": 1.0, "rewards/chosen": -1.5761741399765015, "rewards/margins": 0.4918595552444458, "rewards/rejected": -2.0680336952209473, "step": 4246 }, { "epoch": 0.94, "learning_rate": 9.697713415748327e-06, "logits/chosen": -1.0338926315307617, "logits/rejected": -1.0263726711273193, "logps/chosen": -90.54608154296875, "logps/rejected": -102.72233581542969, "loss": 0.2641, "rewards/accuracies": 1.0, "rewards/chosen": -2.015718936920166, "rewards/margins": 0.48961710929870605, "rewards/rejected": -2.505336046218872, "step": 4247 }, { "epoch": 0.94, "learning_rate": 9.697099363786127e-06, "logits/chosen": -1.5228585004806519, "logits/rejected": -1.5232656002044678, "logps/chosen": -182.01190185546875, "logps/rejected": -307.15020751953125, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": 0.15401916205883026, "rewards/margins": 10.980480194091797, "rewards/rejected": -10.826460838317871, "step": 4248 }, { "epoch": 0.94, "learning_rate": 9.69648470825795e-06, "logits/chosen": -1.3182834386825562, "logits/rejected": -1.557342767715454, "logps/chosen": -178.7352752685547, "logps/rejected": -86.27771759033203, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.05608062818646431, "rewards/margins": 5.527841567993164, "rewards/rejected": -5.583922386169434, "step": 4249 }, { "epoch": 0.94, "learning_rate": 9.695869449242779e-06, "logits/chosen": -1.5971860885620117, "logits/rejected": -1.6345469951629639, "logps/chosen": -170.73251342773438, "logps/rejected": -183.3438720703125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 2.2077529430389404, "rewards/margins": 5.353388786315918, "rewards/rejected": -3.1456360816955566, "step": 4250 }, { "epoch": 0.94, "learning_rate": 9.695253586819672e-06, "logits/chosen": -0.7482976913452148, "logits/rejected": -0.7281373143196106, "logps/chosen": -145.81568908691406, "logps/rejected": -156.38522338867188, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -3.144639730453491, "rewards/margins": 2.789799451828003, "rewards/rejected": -5.934439182281494, "step": 4251 }, { "epoch": 0.94, "learning_rate": 9.694637121067764e-06, "logits/chosen": -1.2778656482696533, "logits/rejected": -1.2563977241516113, "logps/chosen": -125.58846282958984, "logps/rejected": -185.51800537109375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.7541458010673523, "rewards/margins": 5.057775497436523, "rewards/rejected": -5.811921119689941, "step": 4252 }, { "epoch": 0.94, "learning_rate": 9.694020052066275e-06, "logits/chosen": -1.0719109773635864, "logits/rejected": -1.036738395690918, "logps/chosen": -65.63716125488281, "logps/rejected": -109.0247802734375, "loss": 0.266, "rewards/accuracies": 1.0, "rewards/chosen": -3.1418607234954834, "rewards/margins": 0.4078967571258545, "rewards/rejected": -3.549757480621338, "step": 4253 }, { "epoch": 0.94, "learning_rate": 9.693402379894492e-06, "logits/chosen": -0.8780539035797119, "logits/rejected": -0.7719899415969849, "logps/chosen": -228.0212860107422, "logps/rejected": -414.9615173339844, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.145257592201233, "rewards/margins": 7.569763660430908, "rewards/rejected": -8.715021133422852, "step": 4254 }, { "epoch": 0.94, "learning_rate": 9.692784104631785e-06, "logits/chosen": -1.2989035844802856, "logits/rejected": -1.2655829191207886, "logps/chosen": -125.43677520751953, "logps/rejected": -87.50721740722656, "loss": 0.1177, "rewards/accuracies": 1.0, "rewards/chosen": -3.90837025642395, "rewards/margins": 1.3265650272369385, "rewards/rejected": -5.234935283660889, "step": 4255 }, { "epoch": 0.94, "learning_rate": 9.692165226357603e-06, "logits/chosen": -1.3957860469818115, "logits/rejected": -1.3957860469818115, "logps/chosen": -194.18527221679688, "logps/rejected": -194.18527221679688, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -6.216879367828369, "rewards/margins": 0.0, "rewards/rejected": -6.216879367828369, "step": 4256 }, { "epoch": 0.94, "learning_rate": 9.691545745151469e-06, "logits/chosen": -1.350205898284912, "logits/rejected": -1.3957951068878174, "logps/chosen": -130.95623779296875, "logps/rejected": -71.9609375, "loss": 0.1767, "rewards/accuracies": 1.0, "rewards/chosen": -3.3208062648773193, "rewards/margins": 0.9684522151947021, "rewards/rejected": -4.2892584800720215, "step": 4257 }, { "epoch": 0.94, "learning_rate": 9.690925661092984e-06, "logits/chosen": -1.0323777198791504, "logits/rejected": -0.9278635382652283, "logps/chosen": -124.55046081542969, "logps/rejected": -363.46600341796875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.8869796991348267, "rewards/margins": 12.514561653137207, "rewards/rejected": -13.401541709899902, "step": 4258 }, { "epoch": 0.94, "learning_rate": 9.690304974261828e-06, "logits/chosen": -1.06875741481781, "logits/rejected": -1.0502616167068481, "logps/chosen": -113.31285095214844, "logps/rejected": -142.58609008789062, "loss": 0.2605, "rewards/accuracies": 1.0, "rewards/chosen": -3.109046220779419, "rewards/margins": 0.38079380989074707, "rewards/rejected": -3.489840030670166, "step": 4259 }, { "epoch": 0.94, "learning_rate": 9.689683684737758e-06, "logits/chosen": -0.6562071442604065, "logits/rejected": -0.6777873039245605, "logps/chosen": -228.966796875, "logps/rejected": -173.41961669921875, "loss": 0.0344, "rewards/accuracies": 1.0, "rewards/chosen": -2.9821717739105225, "rewards/margins": 3.292083501815796, "rewards/rejected": -6.274255275726318, "step": 4260 }, { "epoch": 0.94, "learning_rate": 9.68906179260061e-06, "logits/chosen": -0.8523867726325989, "logits/rejected": -0.7656906843185425, "logps/chosen": -180.5223846435547, "logps/rejected": -203.74008178710938, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 1.4580612182617188, "rewards/margins": 6.727389812469482, "rewards/rejected": -5.269328594207764, "step": 4261 }, { "epoch": 0.94, "learning_rate": 9.688439297930292e-06, "logits/chosen": -1.2413989305496216, "logits/rejected": -1.195940375328064, "logps/chosen": -137.07144165039062, "logps/rejected": -276.72454833984375, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": 0.11981506645679474, "rewards/margins": 8.307367324829102, "rewards/rejected": -8.187552452087402, "step": 4262 }, { "epoch": 0.94, "learning_rate": 9.687816200806795e-06, "logits/chosen": -1.0640672445297241, "logits/rejected": -1.1077117919921875, "logps/chosen": -179.51876831054688, "logps/rejected": -144.43545532226562, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 1.0976532697677612, "rewards/margins": 4.140328884124756, "rewards/rejected": -3.042675733566284, "step": 4263 }, { "epoch": 0.94, "learning_rate": 9.687192501310186e-06, "logits/chosen": -0.9732130169868469, "logits/rejected": -0.9732130169868469, "logps/chosen": -82.06842041015625, "logps/rejected": -82.06842041015625, "loss": 0.3489, "rewards/accuracies": 0.0, "rewards/chosen": -5.035938262939453, "rewards/margins": 0.0, "rewards/rejected": -5.035938262939453, "step": 4264 }, { "epoch": 0.94, "learning_rate": 9.68656819952061e-06, "logits/chosen": -0.8352944254875183, "logits/rejected": -0.6936700940132141, "logps/chosen": -226.9757080078125, "logps/rejected": -269.6806335449219, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": 3.143728733062744, "rewards/margins": 12.541015625, "rewards/rejected": -9.397287368774414, "step": 4265 }, { "epoch": 0.94, "learning_rate": 9.685943295518283e-06, "logits/chosen": -1.2683595418930054, "logits/rejected": -1.306044101715088, "logps/chosen": -269.88214111328125, "logps/rejected": -277.12078857421875, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": 2.2113037109375, "rewards/margins": 4.245337009429932, "rewards/rejected": -2.0340332984924316, "step": 4266 }, { "epoch": 0.94, "learning_rate": 9.685317789383509e-06, "logits/chosen": -1.1546825170516968, "logits/rejected": -1.1058499813079834, "logps/chosen": -219.83016967773438, "logps/rejected": -385.07415771484375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 1.4571930170059204, "rewards/margins": 5.748379707336426, "rewards/rejected": -4.291186809539795, "step": 4267 }, { "epoch": 0.94, "learning_rate": 9.684691681196664e-06, "logits/chosen": -1.2833577394485474, "logits/rejected": -1.3051420450210571, "logps/chosen": -93.76666259765625, "logps/rejected": -144.88726806640625, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -1.5628715753555298, "rewards/margins": 5.747623443603516, "rewards/rejected": -7.310494899749756, "step": 4268 }, { "epoch": 0.94, "learning_rate": 9.684064971038196e-06, "logits/chosen": -1.1394519805908203, "logits/rejected": -1.2945170402526855, "logps/chosen": -199.31732177734375, "logps/rejected": -97.83779907226562, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 1.1380051374435425, "rewards/margins": 3.779026985168457, "rewards/rejected": -2.641021728515625, "step": 4269 }, { "epoch": 0.95, "learning_rate": 9.683437658988642e-06, "logits/chosen": -1.4222551584243774, "logits/rejected": -1.3747749328613281, "logps/chosen": -136.6136474609375, "logps/rejected": -210.9603271484375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -4.705996990203857, "rewards/margins": 3.98331880569458, "rewards/rejected": -8.689315795898438, "step": 4270 }, { "epoch": 0.95, "learning_rate": 9.682809745128607e-06, "logits/chosen": -1.1911423206329346, "logits/rejected": -1.1911423206329346, "logps/chosen": -78.49243927001953, "logps/rejected": -78.49243927001953, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -6.277584075927734, "rewards/margins": 0.0, "rewards/rejected": -6.277584075927734, "step": 4271 }, { "epoch": 0.95, "learning_rate": 9.682181229538776e-06, "logits/chosen": -0.9893907308578491, "logits/rejected": -0.9893907308578491, "logps/chosen": -99.90478515625, "logps/rejected": -99.90478515625, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -3.8092076778411865, "rewards/margins": 0.0, "rewards/rejected": -3.8092076778411865, "step": 4272 }, { "epoch": 0.95, "learning_rate": 9.681552112299914e-06, "logits/chosen": -1.2349920272827148, "logits/rejected": -1.1488676071166992, "logps/chosen": -201.82821655273438, "logps/rejected": -485.0357666015625, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -0.409005731344223, "rewards/margins": 12.216235160827637, "rewards/rejected": -12.62524127960205, "step": 4273 }, { "epoch": 0.95, "learning_rate": 9.680922393492858e-06, "logits/chosen": -0.9467423558235168, "logits/rejected": -0.8387444019317627, "logps/chosen": -149.34732055664062, "logps/rejected": -299.7489929199219, "loss": 0.3482, "rewards/accuracies": 1.0, "rewards/chosen": 0.18371276557445526, "rewards/margins": 5.720691204071045, "rewards/rejected": -5.536978244781494, "step": 4274 }, { "epoch": 0.95, "learning_rate": 9.68029207319853e-06, "logits/chosen": -1.0873774290084839, "logits/rejected": -0.7376042008399963, "logps/chosen": -130.58914184570312, "logps/rejected": -694.4677734375, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": 0.91888427734375, "rewards/margins": 53.359375, "rewards/rejected": -52.44049072265625, "step": 4275 }, { "epoch": 0.95, "learning_rate": 9.679661151497919e-06, "logits/chosen": -1.047173261642456, "logits/rejected": -1.0576379299163818, "logps/chosen": -223.78428649902344, "logps/rejected": -189.03285217285156, "loss": 0.7791, "rewards/accuracies": 0.0, "rewards/chosen": -0.17907562851905823, "rewards/margins": -1.317236304283142, "rewards/rejected": 1.1381607055664062, "step": 4276 }, { "epoch": 0.95, "learning_rate": 9.6790296284721e-06, "logits/chosen": -0.9585037231445312, "logits/rejected": -0.8996811509132385, "logps/chosen": -95.7499008178711, "logps/rejected": -197.90817260742188, "loss": 1.052, "rewards/accuracies": 1.0, "rewards/chosen": 0.18508529663085938, "rewards/margins": 1.2621926069259644, "rewards/rejected": -1.077107310295105, "step": 4277 }, { "epoch": 0.95, "learning_rate": 9.678397504202222e-06, "logits/chosen": -1.236533522605896, "logits/rejected": -1.271829605102539, "logps/chosen": -228.80970764160156, "logps/rejected": -196.43499755859375, "loss": 0.1169, "rewards/accuracies": 1.0, "rewards/chosen": -4.263623237609863, "rewards/margins": 1.4815382957458496, "rewards/rejected": -5.745161533355713, "step": 4278 }, { "epoch": 0.95, "learning_rate": 9.677764778769512e-06, "logits/chosen": -1.4351555109024048, "logits/rejected": -1.4231733083724976, "logps/chosen": -69.78907775878906, "logps/rejected": -77.84724426269531, "loss": 0.4401, "rewards/accuracies": 0.0, "rewards/chosen": -5.025123119354248, "rewards/margins": -0.32175636291503906, "rewards/rejected": -4.703366756439209, "step": 4279 }, { "epoch": 0.95, "learning_rate": 9.677131452255272e-06, "logits/chosen": -1.3503260612487793, "logits/rejected": -1.2969313859939575, "logps/chosen": -215.27450561523438, "logps/rejected": -460.78448486328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.553735375404358, "rewards/margins": 11.910889625549316, "rewards/rejected": -10.35715389251709, "step": 4280 }, { "epoch": 0.95, "learning_rate": 9.676497524740885e-06, "logits/chosen": -0.7665166854858398, "logits/rejected": -0.7876140475273132, "logps/chosen": -186.06234741210938, "logps/rejected": -158.09176635742188, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 1.2155731916427612, "rewards/margins": 6.841487407684326, "rewards/rejected": -5.625914096832275, "step": 4281 }, { "epoch": 0.95, "learning_rate": 9.675862996307808e-06, "logits/chosen": -1.3631376028060913, "logits/rejected": -1.388878583908081, "logps/chosen": -104.32289123535156, "logps/rejected": -118.89181518554688, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -0.48574599623680115, "rewards/margins": 3.5831522941589355, "rewards/rejected": -4.0688982009887695, "step": 4282 }, { "epoch": 0.95, "learning_rate": 9.675227867037576e-06, "logits/chosen": -1.5182039737701416, "logits/rejected": -1.5080757141113281, "logps/chosen": -103.20622253417969, "logps/rejected": -132.95291137695312, "loss": 0.2766, "rewards/accuracies": 1.0, "rewards/chosen": -2.961474657058716, "rewards/margins": 0.31987762451171875, "rewards/rejected": -3.2813522815704346, "step": 4283 }, { "epoch": 0.95, "learning_rate": 9.674592137011801e-06, "logits/chosen": -1.444278359413147, "logits/rejected": -1.4213975667953491, "logps/chosen": -83.8735580444336, "logps/rejected": -196.47772216796875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.4468696713447571, "rewards/margins": 4.469844818115234, "rewards/rejected": -4.916714668273926, "step": 4284 }, { "epoch": 0.95, "learning_rate": 9.673955806312175e-06, "logits/chosen": -1.287007212638855, "logits/rejected": -1.2811299562454224, "logps/chosen": -115.4400634765625, "logps/rejected": -172.7812957763672, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.54620361328125, "rewards/margins": 6.411506652832031, "rewards/rejected": -8.957710266113281, "step": 4285 }, { "epoch": 0.95, "learning_rate": 9.673318875020463e-06, "logits/chosen": -0.8193143606185913, "logits/rejected": -0.8571034073829651, "logps/chosen": -155.74819946289062, "logps/rejected": -242.6460723876953, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.7965942621231079, "rewards/margins": 11.492921829223633, "rewards/rejected": -10.696327209472656, "step": 4286 }, { "epoch": 0.95, "learning_rate": 9.67268134321851e-06, "logits/chosen": -1.1754369735717773, "logits/rejected": -1.2461564540863037, "logps/chosen": -138.7806396484375, "logps/rejected": -94.10295867919922, "loss": 0.0479, "rewards/accuracies": 1.0, "rewards/chosen": -1.1514450311660767, "rewards/margins": 3.124858856201172, "rewards/rejected": -4.276303768157959, "step": 4287 }, { "epoch": 0.95, "learning_rate": 9.672043210988237e-06, "logits/chosen": -1.1696934700012207, "logits/rejected": -1.1696934700012207, "logps/chosen": -214.945068359375, "logps/rejected": -214.945068359375, "loss": 0.3469, "rewards/accuracies": 0.0, "rewards/chosen": -1.8839950561523438, "rewards/margins": 0.0, "rewards/rejected": -1.8839950561523438, "step": 4288 }, { "epoch": 0.95, "learning_rate": 9.671404478411645e-06, "logits/chosen": -0.8406968116760254, "logits/rejected": -0.8870208859443665, "logps/chosen": -113.21192932128906, "logps/rejected": -90.23473358154297, "loss": 0.6882, "rewards/accuracies": 1.0, "rewards/chosen": -5.307675838470459, "rewards/margins": 0.2134552001953125, "rewards/rejected": -5.5211310386657715, "step": 4289 }, { "epoch": 0.95, "learning_rate": 9.670765145570804e-06, "logits/chosen": -0.8747773766517639, "logits/rejected": -0.9102672338485718, "logps/chosen": -257.5848388671875, "logps/rejected": -219.5801239013672, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.5445618033409119, "rewards/margins": 10.455548286437988, "rewards/rejected": -9.91098690032959, "step": 4290 }, { "epoch": 0.95, "learning_rate": 9.670125212547872e-06, "logits/chosen": -1.1612061262130737, "logits/rejected": -1.1563876867294312, "logps/chosen": -94.68643951416016, "logps/rejected": -82.13023376464844, "loss": 0.3395, "rewards/accuracies": 1.0, "rewards/chosen": -1.992878794670105, "rewards/margins": 0.049228549003601074, "rewards/rejected": -2.042107343673706, "step": 4291 }, { "epoch": 0.95, "learning_rate": 9.669484679425077e-06, "logits/chosen": -1.3030496835708618, "logits/rejected": -1.1513738632202148, "logps/chosen": -94.24634552001953, "logps/rejected": -302.9039611816406, "loss": 0.5459, "rewards/accuracies": 1.0, "rewards/chosen": -0.8624237179756165, "rewards/margins": 6.109016418457031, "rewards/rejected": -6.971440315246582, "step": 4292 }, { "epoch": 0.95, "learning_rate": 9.668843546284725e-06, "logits/chosen": -1.211102843284607, "logits/rejected": -1.208053708076477, "logps/chosen": -110.97268676757812, "logps/rejected": -266.99462890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.358058214187622, "rewards/margins": 7.938053131103516, "rewards/rejected": -9.296111106872559, "step": 4293 }, { "epoch": 0.95, "learning_rate": 9.668201813209202e-06, "logits/chosen": -1.0214183330535889, "logits/rejected": -0.9593185782432556, "logps/chosen": -88.7707290649414, "logps/rejected": -252.82899475097656, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.832129716873169, "rewards/margins": 6.383316993713379, "rewards/rejected": -8.215446472167969, "step": 4294 }, { "epoch": 0.95, "learning_rate": 9.667559480280968e-06, "logits/chosen": -0.9465270042419434, "logits/rejected": -1.0195629596710205, "logps/chosen": -141.6953125, "logps/rejected": -118.50044250488281, "loss": 0.0521, "rewards/accuracies": 1.0, "rewards/chosen": -0.328024297952652, "rewards/margins": 2.3658690452575684, "rewards/rejected": -2.6938934326171875, "step": 4295 }, { "epoch": 0.95, "learning_rate": 9.66691654758256e-06, "logits/chosen": -0.9916802048683167, "logits/rejected": -1.0097078084945679, "logps/chosen": -145.41146850585938, "logps/rejected": -229.9733123779297, "loss": 0.324, "rewards/accuracies": 1.0, "rewards/chosen": -0.657867431640625, "rewards/margins": 0.12895965576171875, "rewards/rejected": -0.7868270874023438, "step": 4296 }, { "epoch": 0.95, "learning_rate": 9.666273015196595e-06, "logits/chosen": -1.3121005296707153, "logits/rejected": -1.4238847494125366, "logps/chosen": -112.9810791015625, "logps/rejected": -123.28462982177734, "loss": 0.3553, "rewards/accuracies": 1.0, "rewards/chosen": -1.2222923040390015, "rewards/margins": 7.353505611419678, "rewards/rejected": -8.575798034667969, "step": 4297 }, { "epoch": 0.95, "learning_rate": 9.665628883205765e-06, "logits/chosen": -1.080023169517517, "logits/rejected": -1.0571579933166504, "logps/chosen": -87.75772094726562, "logps/rejected": -160.85162353515625, "loss": 0.1157, "rewards/accuracies": 1.0, "rewards/chosen": -0.4374809265136719, "rewards/margins": 5.999838352203369, "rewards/rejected": -6.437319278717041, "step": 4298 }, { "epoch": 0.95, "learning_rate": 9.66498415169284e-06, "logits/chosen": -0.9189335703849792, "logits/rejected": -0.8974040746688843, "logps/chosen": -71.91681671142578, "logps/rejected": -92.33250427246094, "loss": 0.652, "rewards/accuracies": 0.0, "rewards/chosen": -2.3670384883880615, "rewards/margins": -0.978697657585144, "rewards/rejected": -1.3883408308029175, "step": 4299 }, { "epoch": 0.95, "learning_rate": 9.664338820740664e-06, "logits/chosen": -0.7622030377388, "logits/rejected": -0.6985621452331543, "logps/chosen": -147.0037841796875, "logps/rejected": -312.94012451171875, "loss": 0.2372, "rewards/accuracies": 1.0, "rewards/chosen": -0.8446945548057556, "rewards/margins": 6.841529846191406, "rewards/rejected": -7.686224460601807, "step": 4300 }, { "epoch": 0.95, "learning_rate": 9.663692890432164e-06, "logits/chosen": -0.8341432809829712, "logits/rejected": -0.8341432809829712, "logps/chosen": -137.8996124267578, "logps/rejected": -137.8996124267578, "loss": 0.3498, "rewards/accuracies": 0.0, "rewards/chosen": -1.6356178522109985, "rewards/margins": 0.0, "rewards/rejected": -1.6356178522109985, "step": 4301 }, { "epoch": 0.95, "learning_rate": 9.663046360850338e-06, "logits/chosen": -1.0834051370620728, "logits/rejected": -1.1709505319595337, "logps/chosen": -188.48080444335938, "logps/rejected": -136.2961883544922, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": 0.2894134521484375, "rewards/margins": 5.6484527587890625, "rewards/rejected": -5.359039306640625, "step": 4302 }, { "epoch": 0.95, "learning_rate": 9.662399232078264e-06, "logits/chosen": -1.0029122829437256, "logits/rejected": -1.0452228784561157, "logps/chosen": -144.11422729492188, "logps/rejected": -94.16567993164062, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.5945541858673096, "rewards/margins": 4.908294677734375, "rewards/rejected": -6.502849102020264, "step": 4303 }, { "epoch": 0.95, "learning_rate": 9.661751504199097e-06, "logits/chosen": -1.5899499654769897, "logits/rejected": -1.6280516386032104, "logps/chosen": -81.45863342285156, "logps/rejected": -86.83366394042969, "loss": 0.2609, "rewards/accuracies": 1.0, "rewards/chosen": -0.9707977175712585, "rewards/margins": 0.4586532711982727, "rewards/rejected": -1.4294509887695312, "step": 4304 }, { "epoch": 0.95, "learning_rate": 9.661103177296069e-06, "logits/chosen": -1.2944334745407104, "logits/rejected": -1.331076741218567, "logps/chosen": -95.48252868652344, "logps/rejected": -115.61080932617188, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.18311920762062073, "rewards/margins": 7.176578521728516, "rewards/rejected": -6.993459224700928, "step": 4305 }, { "epoch": 0.95, "learning_rate": 9.660454251452487e-06, "logits/chosen": -1.2183629274368286, "logits/rejected": -1.2111332416534424, "logps/chosen": -153.94483947753906, "logps/rejected": -123.99137878417969, "loss": 2.0167, "rewards/accuracies": 0.0, "rewards/chosen": -5.325953960418701, "rewards/margins": -4.015254974365234, "rewards/rejected": -1.3106987476348877, "step": 4306 }, { "epoch": 0.95, "learning_rate": 9.659804726751737e-06, "logits/chosen": -0.996112048625946, "logits/rejected": -0.9932888746261597, "logps/chosen": -63.969642639160156, "logps/rejected": -89.50064849853516, "loss": 0.0693, "rewards/accuracies": 1.0, "rewards/chosen": -1.5486496686935425, "rewards/margins": 2.0368967056274414, "rewards/rejected": -3.5855462551116943, "step": 4307 }, { "epoch": 0.95, "learning_rate": 9.659154603277283e-06, "logits/chosen": -1.5450557470321655, "logits/rejected": -1.5111123323440552, "logps/chosen": -110.8275146484375, "logps/rejected": -94.6009292602539, "loss": 0.7951, "rewards/accuracies": 0.0, "rewards/chosen": -3.272693634033203, "rewards/margins": -1.3612487316131592, "rewards/rejected": -1.911444902420044, "step": 4308 }, { "epoch": 0.95, "learning_rate": 9.658503881112661e-06, "logits/chosen": -1.354467511177063, "logits/rejected": -0.8416516184806824, "logps/chosen": -171.18673706054688, "logps/rejected": -732.6141357421875, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -5.063821315765381, "rewards/margins": 46.99386978149414, "rewards/rejected": -52.05768966674805, "step": 4309 }, { "epoch": 0.95, "learning_rate": 9.65785256034149e-06, "logits/chosen": -1.0977877378463745, "logits/rejected": -1.1395134925842285, "logps/chosen": -172.95001220703125, "logps/rejected": -126.52413940429688, "loss": 0.0394, "rewards/accuracies": 1.0, "rewards/chosen": 0.08005370944738388, "rewards/margins": 2.5567140579223633, "rewards/rejected": -2.4766602516174316, "step": 4310 }, { "epoch": 0.95, "learning_rate": 9.657200641047462e-06, "logits/chosen": -0.6998888850212097, "logits/rejected": -0.634773850440979, "logps/chosen": -101.35795593261719, "logps/rejected": -152.7504425048828, "loss": 0.0706, "rewards/accuracies": 1.0, "rewards/chosen": -1.2016693353652954, "rewards/margins": 1.8868073225021362, "rewards/rejected": -3.0884766578674316, "step": 4311 }, { "epoch": 0.95, "learning_rate": 9.656548123314346e-06, "logits/chosen": -1.1176761388778687, "logits/rejected": -1.073936939239502, "logps/chosen": -256.71624755859375, "logps/rejected": -332.32537841796875, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4353531002998352, "rewards/margins": 5.450309753417969, "rewards/rejected": -5.885663032531738, "step": 4312 }, { "epoch": 0.95, "learning_rate": 9.655895007225992e-06, "logits/chosen": -1.1168322563171387, "logits/rejected": -1.1530890464782715, "logps/chosen": -208.62606811523438, "logps/rejected": -165.66708374023438, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.4911483824253082, "rewards/margins": 7.1444244384765625, "rewards/rejected": -6.653275966644287, "step": 4313 }, { "epoch": 0.95, "learning_rate": 9.655241292866321e-06, "logits/chosen": -0.9385493993759155, "logits/rejected": -0.9161199331283569, "logps/chosen": -119.49616241455078, "logps/rejected": -158.62335205078125, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": -1.5209099054336548, "rewards/margins": 2.336930274963379, "rewards/rejected": -3.857840061187744, "step": 4314 }, { "epoch": 0.96, "learning_rate": 9.654586980319335e-06, "logits/chosen": -1.088957667350769, "logits/rejected": -1.1116352081298828, "logps/chosen": -218.27383422851562, "logps/rejected": -237.7624969482422, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.4945053160190582, "rewards/margins": 6.61216926574707, "rewards/rejected": -6.117663860321045, "step": 4315 }, { "epoch": 0.96, "learning_rate": 9.653932069669112e-06, "logits/chosen": -1.1566026210784912, "logits/rejected": -1.1217273473739624, "logps/chosen": -93.63048553466797, "logps/rejected": -123.70712280273438, "loss": 0.6004, "rewards/accuracies": 0.0, "rewards/chosen": -1.1340324878692627, "rewards/margins": -0.8014481067657471, "rewards/rejected": -0.3325843811035156, "step": 4316 }, { "epoch": 0.96, "learning_rate": 9.653276560999805e-06, "logits/chosen": -1.081776738166809, "logits/rejected": -1.0467009544372559, "logps/chosen": -89.44749450683594, "logps/rejected": -176.48187255859375, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 0.21739502251148224, "rewards/margins": 7.056118011474609, "rewards/rejected": -6.838723182678223, "step": 4317 }, { "epoch": 0.96, "learning_rate": 9.652620454395647e-06, "logits/chosen": -1.3433421850204468, "logits/rejected": -0.6331561207771301, "logps/chosen": -88.06718444824219, "logps/rejected": -622.94140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1593544483184814, "rewards/margins": 46.42032241821289, "rewards/rejected": -47.57967758178711, "step": 4318 }, { "epoch": 0.96, "learning_rate": 9.651963749940944e-06, "logits/chosen": -1.023471474647522, "logits/rejected": -1.0171188116073608, "logps/chosen": -188.07174682617188, "logps/rejected": -213.8184356689453, "loss": 0.0511, "rewards/accuracies": 1.0, "rewards/chosen": 0.39837646484375, "rewards/margins": 6.731296062469482, "rewards/rejected": -6.332919597625732, "step": 4319 }, { "epoch": 0.96, "learning_rate": 9.651306447720083e-06, "logits/chosen": -1.482704758644104, "logits/rejected": -1.5891801118850708, "logps/chosen": -103.16650390625, "logps/rejected": -89.82453155517578, "loss": 0.203, "rewards/accuracies": 1.0, "rewards/chosen": -1.4158982038497925, "rewards/margins": 4.600662708282471, "rewards/rejected": -6.016561031341553, "step": 4320 }, { "epoch": 0.96, "learning_rate": 9.650648547817524e-06, "logits/chosen": -1.0523340702056885, "logits/rejected": -1.165633201599121, "logps/chosen": -230.8229217529297, "logps/rejected": -259.7778625488281, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.5912002921104431, "rewards/margins": 8.005836486816406, "rewards/rejected": -7.414636135101318, "step": 4321 }, { "epoch": 0.96, "learning_rate": 9.649990050317806e-06, "logits/chosen": -1.1910046339035034, "logits/rejected": -1.20737886428833, "logps/chosen": -147.8440704345703, "logps/rejected": -124.06918334960938, "loss": 0.341, "rewards/accuracies": 1.0, "rewards/chosen": 0.8781524896621704, "rewards/margins": 5.118314266204834, "rewards/rejected": -4.240161895751953, "step": 4322 }, { "epoch": 0.96, "learning_rate": 9.649330955305547e-06, "logits/chosen": -1.1468710899353027, "logits/rejected": -1.0632843971252441, "logps/chosen": -132.56405639648438, "logps/rejected": -350.22882080078125, "loss": 0.1143, "rewards/accuracies": 1.0, "rewards/chosen": -3.6826188564300537, "rewards/margins": 1.3592817783355713, "rewards/rejected": -5.041900634765625, "step": 4323 }, { "epoch": 0.96, "learning_rate": 9.648671262865434e-06, "logits/chosen": -0.9539600610733032, "logits/rejected": -1.0193673372268677, "logps/chosen": -210.04049682617188, "logps/rejected": -104.48218536376953, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": 1.388240098953247, "rewards/margins": 8.005760192871094, "rewards/rejected": -6.617520332336426, "step": 4324 }, { "epoch": 0.96, "learning_rate": 9.648010973082243e-06, "logits/chosen": -0.8090908527374268, "logits/rejected": -0.9330356121063232, "logps/chosen": -162.70913696289062, "logps/rejected": -119.73226928710938, "loss": 0.046, "rewards/accuracies": 1.0, "rewards/chosen": 0.4644272029399872, "rewards/margins": 2.5448150634765625, "rewards/rejected": -2.080387830734253, "step": 4325 }, { "epoch": 0.96, "learning_rate": 9.647350086040812e-06, "logits/chosen": -0.8972765207290649, "logits/rejected": -0.936432957649231, "logps/chosen": -95.6513442993164, "logps/rejected": -134.9788360595703, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -1.3172539472579956, "rewards/margins": 2.849982261657715, "rewards/rejected": -4.167236328125, "step": 4326 }, { "epoch": 0.96, "learning_rate": 9.646688601826068e-06, "logits/chosen": -1.063930630683899, "logits/rejected": -1.0749030113220215, "logps/chosen": -65.45405578613281, "logps/rejected": -74.27147674560547, "loss": 0.1985, "rewards/accuracies": 1.0, "rewards/chosen": -1.4806358814239502, "rewards/margins": 0.7187702655792236, "rewards/rejected": -2.199406147003174, "step": 4327 }, { "epoch": 0.96, "learning_rate": 9.646026520523008e-06, "logits/chosen": -0.8610567450523376, "logits/rejected": -0.6394333243370056, "logps/chosen": -169.8492431640625, "logps/rejected": -663.9761962890625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -2.084132432937622, "rewards/margins": 38.84687805175781, "rewards/rejected": -40.93101119995117, "step": 4328 }, { "epoch": 0.96, "learning_rate": 9.64536384221671e-06, "logits/chosen": -1.0198874473571777, "logits/rejected": -1.1041194200515747, "logps/chosen": -182.3797149658203, "logps/rejected": -202.1253662109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.3038894832134247, "rewards/margins": 8.492647171020508, "rewards/rejected": -8.796536445617676, "step": 4329 }, { "epoch": 0.96, "learning_rate": 9.644700566992324e-06, "logits/chosen": -0.9941965937614441, "logits/rejected": -1.004145860671997, "logps/chosen": -171.4857635498047, "logps/rejected": -211.11148071289062, "loss": 0.1364, "rewards/accuracies": 1.0, "rewards/chosen": -6.7584123611450195, "rewards/margins": 1.1633758544921875, "rewards/rejected": -7.921788215637207, "step": 4330 }, { "epoch": 0.96, "learning_rate": 9.644036694935083e-06, "logits/chosen": -1.3596948385238647, "logits/rejected": -1.3596948385238647, "logps/chosen": -88.68155670166016, "logps/rejected": -88.68155670166016, "loss": 0.3633, "rewards/accuracies": 0.0, "rewards/chosen": -3.4935479164123535, "rewards/margins": 0.0, "rewards/rejected": -3.4935479164123535, "step": 4331 }, { "epoch": 0.96, "learning_rate": 9.64337222613029e-06, "logits/chosen": -1.099621057510376, "logits/rejected": -1.0950815677642822, "logps/chosen": -84.48744201660156, "logps/rejected": -136.08169555664062, "loss": 0.0609, "rewards/accuracies": 1.0, "rewards/chosen": -2.1851773262023926, "rewards/margins": 2.414205551147461, "rewards/rejected": -4.5993828773498535, "step": 4332 }, { "epoch": 0.96, "learning_rate": 9.642707160663326e-06, "logits/chosen": -0.931562066078186, "logits/rejected": -0.8929452300071716, "logps/chosen": -102.39422607421875, "logps/rejected": -116.95631408691406, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -0.1353752166032791, "rewards/margins": 2.962726593017578, "rewards/rejected": -3.098101854324341, "step": 4333 }, { "epoch": 0.96, "learning_rate": 9.642041498619655e-06, "logits/chosen": -1.4234243631362915, "logits/rejected": -1.4416402578353882, "logps/chosen": -88.47123718261719, "logps/rejected": -88.8262939453125, "loss": 0.5791, "rewards/accuracies": 1.0, "rewards/chosen": -0.4948692321777344, "rewards/margins": 5.544334411621094, "rewards/rejected": -6.039203643798828, "step": 4334 }, { "epoch": 0.96, "learning_rate": 9.64137524008481e-06, "logits/chosen": -1.0875345468521118, "logits/rejected": -1.0237351655960083, "logps/chosen": -41.07968521118164, "logps/rejected": -123.4218978881836, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.12437210232019424, "rewards/margins": 7.426440238952637, "rewards/rejected": -7.30206823348999, "step": 4335 }, { "epoch": 0.96, "learning_rate": 9.640708385144403e-06, "logits/chosen": -1.3225200176239014, "logits/rejected": -1.0093060731887817, "logps/chosen": -66.40673065185547, "logps/rejected": -279.1336364746094, "loss": 0.1074, "rewards/accuracies": 1.0, "rewards/chosen": 0.3962646424770355, "rewards/margins": 1.4322052001953125, "rewards/rejected": -1.0359405279159546, "step": 4336 }, { "epoch": 0.96, "learning_rate": 9.640040933884126e-06, "logits/chosen": -0.715372622013092, "logits/rejected": -0.7612789869308472, "logps/chosen": -118.8368148803711, "logps/rejected": -229.14663696289062, "loss": 0.2118, "rewards/accuracies": 1.0, "rewards/chosen": -7.593898773193359, "rewards/margins": 0.6412076950073242, "rewards/rejected": -8.235106468200684, "step": 4337 }, { "epoch": 0.96, "learning_rate": 9.639372886389743e-06, "logits/chosen": -1.0076086521148682, "logits/rejected": -1.015436053276062, "logps/chosen": -68.56858825683594, "logps/rejected": -118.91297912597656, "loss": 0.1504, "rewards/accuracies": 1.0, "rewards/chosen": -1.8959228992462158, "rewards/margins": 2.991623640060425, "rewards/rejected": -4.887546539306641, "step": 4338 }, { "epoch": 0.96, "learning_rate": 9.638704242747097e-06, "logits/chosen": -1.0399671792984009, "logits/rejected": -1.0302555561065674, "logps/chosen": -200.23797607421875, "logps/rejected": -322.4920654296875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5360748171806335, "rewards/margins": 5.924344062805176, "rewards/rejected": -6.460418701171875, "step": 4339 }, { "epoch": 0.96, "learning_rate": 9.638035003042108e-06, "logits/chosen": -1.0035395622253418, "logits/rejected": -0.9461289644241333, "logps/chosen": -95.42594146728516, "logps/rejected": -186.16921997070312, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.515337347984314, "rewards/margins": 5.719081878662109, "rewards/rejected": -7.234419345855713, "step": 4340 }, { "epoch": 0.96, "learning_rate": 9.637365167360769e-06, "logits/chosen": -1.0816503763198853, "logits/rejected": -1.1513711214065552, "logps/chosen": -108.35649108886719, "logps/rejected": -67.49789428710938, "loss": 0.4767, "rewards/accuracies": 1.0, "rewards/chosen": -1.1140320301055908, "rewards/margins": 1.2132415771484375, "rewards/rejected": -2.3272736072540283, "step": 4341 }, { "epoch": 0.96, "learning_rate": 9.636694735789153e-06, "logits/chosen": -0.8831546902656555, "logits/rejected": -0.9011043906211853, "logps/chosen": -153.32421875, "logps/rejected": -109.87603759765625, "loss": 0.0965, "rewards/accuracies": 1.0, "rewards/chosen": 0.21423034369945526, "rewards/margins": 1.5871963500976562, "rewards/rejected": -1.3729660511016846, "step": 4342 }, { "epoch": 0.96, "learning_rate": 9.636023708413412e-06, "logits/chosen": -1.0119895935058594, "logits/rejected": -1.0390853881835938, "logps/chosen": -70.94831848144531, "logps/rejected": -65.39550018310547, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -1.5116100311279297, "rewards/margins": 3.5290493965148926, "rewards/rejected": -5.040659427642822, "step": 4343 }, { "epoch": 0.96, "learning_rate": 9.635352085319768e-06, "logits/chosen": -1.0676993131637573, "logits/rejected": -1.0865858793258667, "logps/chosen": -137.73611450195312, "logps/rejected": -126.3529052734375, "loss": 0.0774, "rewards/accuracies": 1.0, "rewards/chosen": -1.024261474609375, "rewards/margins": 1.7876884937286377, "rewards/rejected": -2.8119499683380127, "step": 4344 }, { "epoch": 0.96, "learning_rate": 9.634679866594525e-06, "logits/chosen": -0.9183520078659058, "logits/rejected": -0.5582526922225952, "logps/chosen": -130.0335235595703, "logps/rejected": -499.7525634765625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.5833526849746704, "rewards/margins": 39.06957244873047, "rewards/rejected": -39.652923583984375, "step": 4345 }, { "epoch": 0.96, "learning_rate": 9.63400705232406e-06, "logits/chosen": -0.5802932977676392, "logits/rejected": -0.5426732897758484, "logps/chosen": -124.18002319335938, "logps/rejected": -221.64144897460938, "loss": 0.0484, "rewards/accuracies": 1.0, "rewards/chosen": -0.18776550889015198, "rewards/margins": 9.552393913269043, "rewards/rejected": -9.740159034729004, "step": 4346 }, { "epoch": 0.96, "learning_rate": 9.633333642594828e-06, "logits/chosen": -1.0715104341506958, "logits/rejected": -1.0840821266174316, "logps/chosen": -119.14691925048828, "logps/rejected": -133.06097412109375, "loss": 0.0488, "rewards/accuracies": 1.0, "rewards/chosen": -1.1004310846328735, "rewards/margins": 2.2782845497131348, "rewards/rejected": -3.3787155151367188, "step": 4347 }, { "epoch": 0.96, "learning_rate": 9.632659637493362e-06, "logits/chosen": -0.9019557237625122, "logits/rejected": -0.9019557237625122, "logps/chosen": -134.36331176757812, "logps/rejected": -134.36331176757812, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.932625770568848, "rewards/margins": 0.0, "rewards/rejected": -4.932625770568848, "step": 4348 }, { "epoch": 0.96, "learning_rate": 9.631985037106268e-06, "logits/chosen": -0.9702052474021912, "logits/rejected": -0.971777617931366, "logps/chosen": -154.1588897705078, "logps/rejected": -120.82476043701172, "loss": 0.1496, "rewards/accuracies": 1.0, "rewards/chosen": -0.9290024042129517, "rewards/margins": 1.0804466009140015, "rewards/rejected": -2.009449005126953, "step": 4349 }, { "epoch": 0.96, "learning_rate": 9.631309841520233e-06, "logits/chosen": -1.0683784484863281, "logits/rejected": -1.0683265924453735, "logps/chosen": -73.88861083984375, "logps/rejected": -116.90616607666016, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -0.5502212643623352, "rewards/margins": 4.639850616455078, "rewards/rejected": -5.190072059631348, "step": 4350 }, { "epoch": 0.96, "learning_rate": 9.630634050822016e-06, "logits/chosen": -0.7110156416893005, "logits/rejected": -0.6642489433288574, "logps/chosen": -211.8383026123047, "logps/rejected": -209.0657501220703, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 2.7617385387420654, "rewards/margins": 12.398693084716797, "rewards/rejected": -9.636954307556152, "step": 4351 }, { "epoch": 0.96, "learning_rate": 9.629957665098458e-06, "logits/chosen": -0.8762679100036621, "logits/rejected": -0.862472414970398, "logps/chosen": -189.0749969482422, "logps/rejected": -168.97901916503906, "loss": 0.3264, "rewards/accuracies": 1.0, "rewards/chosen": 1.5984238386154175, "rewards/margins": 0.08746039867401123, "rewards/rejected": 1.5109634399414062, "step": 4352 }, { "epoch": 0.96, "learning_rate": 9.629280684436467e-06, "logits/chosen": -1.3201041221618652, "logits/rejected": -1.1684081554412842, "logps/chosen": -128.17098999023438, "logps/rejected": -167.4805450439453, "loss": 0.9209, "rewards/accuracies": 0.0, "rewards/chosen": -5.908069610595703, "rewards/margins": -1.669243335723877, "rewards/rejected": -4.238826274871826, "step": 4353 }, { "epoch": 0.96, "learning_rate": 9.628603108923037e-06, "logits/chosen": -1.0408093929290771, "logits/rejected": -1.0404070615768433, "logps/chosen": -114.46357727050781, "logps/rejected": -166.6788787841797, "loss": 0.2915, "rewards/accuracies": 1.0, "rewards/chosen": -2.073573350906372, "rewards/margins": 0.24060678482055664, "rewards/rejected": -2.3141801357269287, "step": 4354 }, { "epoch": 0.96, "learning_rate": 9.627924938645234e-06, "logits/chosen": -1.2840193510055542, "logits/rejected": -1.330783486366272, "logps/chosen": -187.1597900390625, "logps/rejected": -150.39369201660156, "loss": 0.0506, "rewards/accuracies": 1.0, "rewards/chosen": -2.8137574195861816, "rewards/margins": 2.6569151878356934, "rewards/rejected": -5.470672607421875, "step": 4355 }, { "epoch": 0.96, "learning_rate": 9.627246173690202e-06, "logits/chosen": -1.0008373260498047, "logits/rejected": -1.0008373260498047, "logps/chosen": -187.46083068847656, "logps/rejected": -187.46083068847656, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -8.037261962890625, "rewards/margins": 0.0, "rewards/rejected": -8.037261962890625, "step": 4356 }, { "epoch": 0.96, "learning_rate": 9.62656681414516e-06, "logits/chosen": -1.1208696365356445, "logits/rejected": -1.1174311637878418, "logps/chosen": -89.89994812011719, "logps/rejected": -81.57992553710938, "loss": 0.0806, "rewards/accuracies": 1.0, "rewards/chosen": -1.2350510358810425, "rewards/margins": 2.791106700897217, "rewards/rejected": -4.026157855987549, "step": 4357 }, { "epoch": 0.96, "learning_rate": 9.625886860097406e-06, "logits/chosen": -0.9978704452514648, "logits/rejected": -0.9740827083587646, "logps/chosen": -215.38430786132812, "logps/rejected": -223.29476928710938, "loss": 0.1381, "rewards/accuracies": 1.0, "rewards/chosen": -0.8282058835029602, "rewards/margins": 1.1474502086639404, "rewards/rejected": -1.9756561517715454, "step": 4358 }, { "epoch": 0.96, "learning_rate": 9.62520631163431e-06, "logits/chosen": -1.1248096227645874, "logits/rejected": -1.1248096227645874, "logps/chosen": -99.79671478271484, "logps/rejected": -99.79671478271484, "loss": 0.3513, "rewards/accuracies": 0.0, "rewards/chosen": -0.5188804864883423, "rewards/margins": 0.0, "rewards/rejected": -0.5188804864883423, "step": 4359 }, { "epoch": 0.97, "learning_rate": 9.62452516884332e-06, "logits/chosen": -1.19826078414917, "logits/rejected": -1.1857566833496094, "logps/chosen": -89.85945129394531, "logps/rejected": -95.70706939697266, "loss": 0.3473, "rewards/accuracies": 1.0, "rewards/chosen": -0.3433837890625, "rewards/margins": 6.496585845947266, "rewards/rejected": -6.839969635009766, "step": 4360 }, { "epoch": 0.97, "learning_rate": 9.623843431811964e-06, "logits/chosen": -1.1674703359603882, "logits/rejected": -1.1289080381393433, "logps/chosen": -146.51150512695312, "logps/rejected": -184.61587524414062, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -2.103955030441284, "rewards/margins": 4.474574089050293, "rewards/rejected": -6.578528881072998, "step": 4361 }, { "epoch": 0.97, "learning_rate": 9.623161100627842e-06, "logits/chosen": -1.027501106262207, "logits/rejected": -0.8535563945770264, "logps/chosen": -188.34030151367188, "logps/rejected": -266.04754638671875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 1.7070831060409546, "rewards/margins": 5.886470317840576, "rewards/rejected": -4.179387092590332, "step": 4362 }, { "epoch": 0.97, "learning_rate": 9.622478175378634e-06, "logits/chosen": -1.0486042499542236, "logits/rejected": -0.9653716087341309, "logps/chosen": -241.59146118164062, "logps/rejected": -238.03286743164062, "loss": 0.1122, "rewards/accuracies": 1.0, "rewards/chosen": 3.035959005355835, "rewards/margins": 4.473202705383301, "rewards/rejected": -1.4372437000274658, "step": 4363 }, { "epoch": 0.97, "learning_rate": 9.62179465615209e-06, "logits/chosen": -0.8144199252128601, "logits/rejected": -0.795438289642334, "logps/chosen": -237.4635467529297, "logps/rejected": -213.58331298828125, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": 0.6921097040176392, "rewards/margins": 4.41732931137085, "rewards/rejected": -3.7252197265625, "step": 4364 }, { "epoch": 0.97, "learning_rate": 9.621110543036047e-06, "logits/chosen": -1.1295115947723389, "logits/rejected": -1.1805769205093384, "logps/chosen": -108.44908905029297, "logps/rejected": -89.39309692382812, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.1907203644514084, "rewards/margins": 5.089082717895508, "rewards/rejected": -5.279803276062012, "step": 4365 }, { "epoch": 0.97, "learning_rate": 9.620425836118406e-06, "logits/chosen": -0.8493829965591431, "logits/rejected": -0.8762686848640442, "logps/chosen": -251.09954833984375, "logps/rejected": -181.35227966308594, "loss": 0.0997, "rewards/accuracies": 1.0, "rewards/chosen": 0.754718005657196, "rewards/margins": 1.511744737625122, "rewards/rejected": -0.7570266723632812, "step": 4366 }, { "epoch": 0.97, "learning_rate": 9.619740535487151e-06, "logits/chosen": -0.861415445804596, "logits/rejected": -0.8644909262657166, "logps/chosen": -114.62508392333984, "logps/rejected": -93.38032531738281, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5059303641319275, "rewards/margins": 5.935174465179443, "rewards/rejected": -6.441104888916016, "step": 4367 }, { "epoch": 0.97, "learning_rate": 9.619054641230343e-06, "logits/chosen": -0.9267128109931946, "logits/rejected": -0.9651627540588379, "logps/chosen": -95.51371765136719, "logps/rejected": -94.860595703125, "loss": 0.8395, "rewards/accuracies": 0.0, "rewards/chosen": -2.22532057762146, "rewards/margins": -1.4709367752075195, "rewards/rejected": -0.7543838620185852, "step": 4368 }, { "epoch": 0.97, "learning_rate": 9.618368153436119e-06, "logits/chosen": -0.776614785194397, "logits/rejected": -0.746724009513855, "logps/chosen": -110.6807632446289, "logps/rejected": -183.35536193847656, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.9526374936103821, "rewards/margins": 4.070242881774902, "rewards/rejected": -5.022880554199219, "step": 4369 }, { "epoch": 0.97, "learning_rate": 9.617681072192688e-06, "logits/chosen": -0.7961201667785645, "logits/rejected": -0.771187424659729, "logps/chosen": -170.56838989257812, "logps/rejected": -340.326416015625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.26671144366264343, "rewards/margins": 5.093729019165039, "rewards/rejected": -5.360440254211426, "step": 4370 }, { "epoch": 0.97, "learning_rate": 9.616993397588342e-06, "logits/chosen": -0.9381685853004456, "logits/rejected": -1.0803037881851196, "logps/chosen": -255.1898193359375, "logps/rejected": -80.59855651855469, "loss": 0.1081, "rewards/accuracies": 1.0, "rewards/chosen": -3.4075591564178467, "rewards/margins": 2.3121516704559326, "rewards/rejected": -5.719710826873779, "step": 4371 }, { "epoch": 0.97, "learning_rate": 9.61630512971144e-06, "logits/chosen": -1.1088424921035767, "logits/rejected": -1.1648043394088745, "logps/chosen": -133.87933349609375, "logps/rejected": -143.42050170898438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.190907284617424, "rewards/margins": 9.818668365478516, "rewards/rejected": -9.627760887145996, "step": 4372 }, { "epoch": 0.97, "learning_rate": 9.61561626865043e-06, "logits/chosen": -1.0928473472595215, "logits/rejected": -1.0853549242019653, "logps/chosen": -79.45648193359375, "logps/rejected": -104.2476806640625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.75893634557724, "rewards/margins": 5.430440902709961, "rewards/rejected": -6.189377307891846, "step": 4373 }, { "epoch": 0.97, "learning_rate": 9.614926814493822e-06, "logits/chosen": -1.1584150791168213, "logits/rejected": -1.3172470331192017, "logps/chosen": -213.06219482421875, "logps/rejected": -178.8130340576172, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.5589141845703125, "rewards/margins": 7.788281440734863, "rewards/rejected": -7.229367256164551, "step": 4374 }, { "epoch": 0.97, "learning_rate": 9.614236767330214e-06, "logits/chosen": -0.8096104860305786, "logits/rejected": -0.7560445070266724, "logps/chosen": -143.88858032226562, "logps/rejected": -172.57888793945312, "loss": 0.5352, "rewards/accuracies": 0.0, "rewards/chosen": -4.256042003631592, "rewards/margins": -0.6466577053070068, "rewards/rejected": -3.609384298324585, "step": 4375 }, { "epoch": 0.97, "learning_rate": 9.613546127248272e-06, "logits/chosen": -0.9232035875320435, "logits/rejected": -0.9247387051582336, "logps/chosen": -94.0682601928711, "logps/rejected": -207.2382049560547, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.1504722684621811, "rewards/margins": 7.732102394104004, "rewards/rejected": -7.581630229949951, "step": 4376 }, { "epoch": 0.97, "learning_rate": 9.612854894336746e-06, "logits/chosen": -1.0087698698043823, "logits/rejected": -0.8770734071731567, "logps/chosen": -74.31401824951172, "logps/rejected": -168.90914916992188, "loss": 0.9562, "rewards/accuracies": 0.0, "rewards/chosen": -0.09395523369312286, "rewards/margins": -1.7520241737365723, "rewards/rejected": 1.6580688953399658, "step": 4377 }, { "epoch": 0.97, "learning_rate": 9.612163068684453e-06, "logits/chosen": -0.7445504665374756, "logits/rejected": -0.760744035243988, "logps/chosen": -176.80987548828125, "logps/rejected": -177.43350219726562, "loss": 0.048, "rewards/accuracies": 1.0, "rewards/chosen": 1.8947540521621704, "rewards/margins": 2.6759612560272217, "rewards/rejected": -0.781207263469696, "step": 4378 }, { "epoch": 0.97, "learning_rate": 9.611470650380293e-06, "logits/chosen": -1.0740593671798706, "logits/rejected": -1.111944317817688, "logps/chosen": -219.10614013671875, "logps/rejected": -267.790283203125, "loss": 0.2474, "rewards/accuracies": 1.0, "rewards/chosen": 0.4231826961040497, "rewards/margins": 0.4726974666118622, "rewards/rejected": -0.0495147705078125, "step": 4379 }, { "epoch": 0.97, "learning_rate": 9.61077763951324e-06, "logits/chosen": -0.8988651037216187, "logits/rejected": -0.9205526113510132, "logps/chosen": -233.93048095703125, "logps/rejected": -237.97494506835938, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -3.964031934738159, "rewards/margins": 5.115780830383301, "rewards/rejected": -9.079813003540039, "step": 4380 }, { "epoch": 0.97, "learning_rate": 9.610084036172346e-06, "logits/chosen": -0.8316335678100586, "logits/rejected": -0.8316335678100586, "logps/chosen": -162.23782348632812, "logps/rejected": -162.23782348632812, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -1.6209839582443237, "rewards/margins": 0.0, "rewards/rejected": -1.6209839582443237, "step": 4381 }, { "epoch": 0.97, "learning_rate": 9.609389840446734e-06, "logits/chosen": -0.7209523916244507, "logits/rejected": -0.6666943430900574, "logps/chosen": -112.6409683227539, "logps/rejected": -302.1791076660156, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.8060089349746704, "rewards/margins": 5.268078804016113, "rewards/rejected": -6.074087619781494, "step": 4382 }, { "epoch": 0.97, "learning_rate": 9.60869505242561e-06, "logits/chosen": -1.113698124885559, "logits/rejected": -1.1251553297042847, "logps/chosen": -84.22807312011719, "logps/rejected": -114.19660949707031, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": 1.7676231861114502, "rewards/margins": 5.200543403625488, "rewards/rejected": -3.432920217514038, "step": 4383 }, { "epoch": 0.97, "learning_rate": 9.60799967219825e-06, "logits/chosen": -0.897731363773346, "logits/rejected": -0.9172682166099548, "logps/chosen": -202.40292358398438, "logps/rejected": -132.86744689941406, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.22564087808132172, "rewards/margins": 5.415886878967285, "rewards/rejected": -5.6415276527404785, "step": 4384 }, { "epoch": 0.97, "learning_rate": 9.607303699854009e-06, "logits/chosen": -1.1250877380371094, "logits/rejected": -1.1130775213241577, "logps/chosen": -149.64971923828125, "logps/rejected": -180.9932098388672, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.9622818231582642, "rewards/margins": 4.913606643676758, "rewards/rejected": -6.875888347625732, "step": 4385 }, { "epoch": 0.97, "learning_rate": 9.606607135482318e-06, "logits/chosen": -1.3841508626937866, "logits/rejected": -1.235700249671936, "logps/chosen": -102.09091186523438, "logps/rejected": -245.7930908203125, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -0.4886794984340668, "rewards/margins": 3.6808032989501953, "rewards/rejected": -4.169482707977295, "step": 4386 }, { "epoch": 0.97, "learning_rate": 9.605909979172683e-06, "logits/chosen": -0.8860353231430054, "logits/rejected": -0.8860353231430054, "logps/chosen": -300.90472412109375, "logps/rejected": -300.90472412109375, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -3.8612916469573975, "rewards/margins": 0.0, "rewards/rejected": -3.8612916469573975, "step": 4387 }, { "epoch": 0.97, "learning_rate": 9.60521223101469e-06, "logits/chosen": -0.8806954026222229, "logits/rejected": -0.9020051956176758, "logps/chosen": -215.44625854492188, "logps/rejected": -85.07701873779297, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.543133556842804, "rewards/margins": 6.146928787231445, "rewards/rejected": -5.603795051574707, "step": 4388 }, { "epoch": 0.97, "learning_rate": 9.604513891097995e-06, "logits/chosen": -0.9499014616012573, "logits/rejected": -0.9686207175254822, "logps/chosen": -262.29046630859375, "logps/rejected": -288.47186279296875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.6733779907226562, "rewards/margins": 6.945431709289551, "rewards/rejected": -9.618809700012207, "step": 4389 }, { "epoch": 0.97, "learning_rate": 9.603814959512334e-06, "logits/chosen": -1.0630512237548828, "logits/rejected": -1.068430781364441, "logps/chosen": -94.75929260253906, "logps/rejected": -134.10865783691406, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/chosen": 0.3472885191440582, "rewards/margins": 2.746260166168213, "rewards/rejected": -2.3989715576171875, "step": 4390 }, { "epoch": 0.97, "learning_rate": 9.603115436347519e-06, "logits/chosen": -0.7525902390480042, "logits/rejected": -0.8241041302680969, "logps/chosen": -207.45797729492188, "logps/rejected": -157.8303680419922, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.26680299639701843, "rewards/margins": 5.222079753875732, "rewards/rejected": -5.488882541656494, "step": 4391 }, { "epoch": 0.97, "learning_rate": 9.602415321693434e-06, "logits/chosen": -1.0103596448898315, "logits/rejected": -1.0904699563980103, "logps/chosen": -343.1678466796875, "logps/rejected": -123.4262924194336, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": -1.180975317955017, "rewards/margins": 2.283822536468506, "rewards/rejected": -3.4647979736328125, "step": 4392 }, { "epoch": 0.97, "learning_rate": 9.601714615640046e-06, "logits/chosen": -1.2784487009048462, "logits/rejected": -1.3142398595809937, "logps/chosen": -169.31280517578125, "logps/rejected": -168.29322814941406, "loss": 0.2625, "rewards/accuracies": 1.0, "rewards/chosen": -4.0475616455078125, "rewards/margins": 0.44275617599487305, "rewards/rejected": -4.4903178215026855, "step": 4393 }, { "epoch": 0.97, "learning_rate": 9.601013318277391e-06, "logits/chosen": -1.0986924171447754, "logits/rejected": -1.1902631521224976, "logps/chosen": -189.78244018554688, "logps/rejected": -200.3772430419922, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.990863025188446, "rewards/margins": 8.451705932617188, "rewards/rejected": -7.460843086242676, "step": 4394 }, { "epoch": 0.97, "learning_rate": 9.600311429695586e-06, "logits/chosen": -0.9778394103050232, "logits/rejected": -0.9807272553443909, "logps/chosen": -187.57302856445312, "logps/rejected": -260.740478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.5486907958984375, "rewards/margins": 10.129898071289062, "rewards/rejected": -9.581207275390625, "step": 4395 }, { "epoch": 0.97, "learning_rate": 9.59960894998482e-06, "logits/chosen": -0.49832338094711304, "logits/rejected": -0.5212353467941284, "logps/chosen": -233.09585571289062, "logps/rejected": -105.10736846923828, "loss": 0.7255, "rewards/accuracies": 0.0, "rewards/chosen": -5.435492038726807, "rewards/margins": -1.1830987930297852, "rewards/rejected": -4.2523932456970215, "step": 4396 }, { "epoch": 0.97, "learning_rate": 9.598905879235362e-06, "logits/chosen": -1.0037494897842407, "logits/rejected": -1.0824326276779175, "logps/chosen": -119.32705688476562, "logps/rejected": -191.2636260986328, "loss": 0.0502, "rewards/accuracies": 1.0, "rewards/chosen": -1.5458420515060425, "rewards/margins": 5.4494948387146, "rewards/rejected": -6.995337009429932, "step": 4397 }, { "epoch": 0.97, "learning_rate": 9.598202217537554e-06, "logits/chosen": -0.8053162097930908, "logits/rejected": -0.4836263358592987, "logps/chosen": -170.7564697265625, "logps/rejected": -611.159423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.1281722784042358, "rewards/margins": 39.807376861572266, "rewards/rejected": -38.679203033447266, "step": 4398 }, { "epoch": 0.97, "learning_rate": 9.597497964981815e-06, "logits/chosen": -0.9505196809768677, "logits/rejected": -0.8990996479988098, "logps/chosen": -115.38076782226562, "logps/rejected": -240.96156311035156, "loss": 0.3707, "rewards/accuracies": 1.0, "rewards/chosen": -0.00408935546875, "rewards/margins": 7.401414394378662, "rewards/rejected": -7.405503749847412, "step": 4399 }, { "epoch": 0.97, "learning_rate": 9.59679312165864e-06, "logits/chosen": -0.9681998491287231, "logits/rejected": -0.9859582185745239, "logps/chosen": -110.40363311767578, "logps/rejected": -109.64421081542969, "loss": 0.1493, "rewards/accuracies": 1.0, "rewards/chosen": -0.9813346862792969, "rewards/margins": 1.055901288986206, "rewards/rejected": -2.037235975265503, "step": 4400 }, { "epoch": 0.97, "learning_rate": 9.596087687658598e-06, "logits/chosen": -1.0858280658721924, "logits/rejected": -0.8382771611213684, "logps/chosen": -103.59471130371094, "logps/rejected": -701.89453125, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -1.811226725578308, "rewards/margins": 53.61343765258789, "rewards/rejected": -55.42466354370117, "step": 4401 }, { "epoch": 0.97, "learning_rate": 9.595381663072335e-06, "logits/chosen": -0.7859122157096863, "logits/rejected": -0.7626213431358337, "logps/chosen": -99.39430236816406, "logps/rejected": -105.70001983642578, "loss": 0.1012, "rewards/accuracies": 1.0, "rewards/chosen": -3.6961610317230225, "rewards/margins": 3.5269224643707275, "rewards/rejected": -7.22308349609375, "step": 4402 }, { "epoch": 0.97, "learning_rate": 9.594675047990578e-06, "logits/chosen": -1.4559712409973145, "logits/rejected": -1.5544193983078003, "logps/chosen": -93.31265258789062, "logps/rejected": -94.48653411865234, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.282952904701233, "rewards/margins": 5.301990509033203, "rewards/rejected": -6.5849432945251465, "step": 4403 }, { "epoch": 0.97, "learning_rate": 9.593967842504121e-06, "logits/chosen": -1.0304148197174072, "logits/rejected": -1.0467849969863892, "logps/chosen": -178.76861572265625, "logps/rejected": -176.5622100830078, "loss": 0.0635, "rewards/accuracies": 1.0, "rewards/chosen": -0.586346447467804, "rewards/margins": 2.0845565795898438, "rewards/rejected": -2.670902967453003, "step": 4404 }, { "epoch": 0.97, "learning_rate": 9.593260046703842e-06, "logits/chosen": -0.8586581349372864, "logits/rejected": -0.7354162931442261, "logps/chosen": -157.89654541015625, "logps/rejected": -392.4420166015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9344528317451477, "rewards/margins": 8.756464958190918, "rewards/rejected": -9.69091796875, "step": 4405 }, { "epoch": 0.98, "learning_rate": 9.592551660680687e-06, "logits/chosen": -0.7980427742004395, "logits/rejected": -0.7980427742004395, "logps/chosen": -139.14208984375, "logps/rejected": -139.14208984375, "loss": 0.3634, "rewards/accuracies": 0.0, "rewards/chosen": 0.47349855303764343, "rewards/margins": 0.0, "rewards/rejected": 0.47349855303764343, "step": 4406 }, { "epoch": 0.98, "learning_rate": 9.591842684525685e-06, "logits/chosen": -0.8949408531188965, "logits/rejected": -0.9102914929389954, "logps/chosen": -217.8863983154297, "logps/rejected": -182.083740234375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.960894763469696, "rewards/margins": 7.665075778961182, "rewards/rejected": -6.70418119430542, "step": 4407 }, { "epoch": 0.98, "learning_rate": 9.591133118329936e-06, "logits/chosen": -0.6616039276123047, "logits/rejected": -0.6616039276123047, "logps/chosen": -46.06147766113281, "logps/rejected": -46.06147766113281, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -3.1899917125701904, "rewards/margins": 0.0, "rewards/rejected": -3.1899917125701904, "step": 4408 }, { "epoch": 0.98, "learning_rate": 9.590422962184619e-06, "logits/chosen": -1.5616626739501953, "logits/rejected": -1.5853601694107056, "logps/chosen": -109.27566528320312, "logps/rejected": -130.99172973632812, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": 0.766949474811554, "rewards/margins": 4.487254619598389, "rewards/rejected": -3.7203049659729004, "step": 4409 }, { "epoch": 0.98, "learning_rate": 9.589712216180986e-06, "logits/chosen": -0.7744614481925964, "logits/rejected": -0.7654604315757751, "logps/chosen": -157.16162109375, "logps/rejected": -158.23477172851562, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.8724395632743835, "rewards/margins": 7.154890537261963, "rewards/rejected": -6.282451152801514, "step": 4410 }, { "epoch": 0.98, "learning_rate": 9.589000880410366e-06, "logits/chosen": -0.8545732498168945, "logits/rejected": -0.7991676330566406, "logps/chosen": -65.90090942382812, "logps/rejected": -155.42599487304688, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -0.38999253511428833, "rewards/margins": 5.0601372718811035, "rewards/rejected": -5.450129985809326, "step": 4411 }, { "epoch": 0.98, "learning_rate": 9.588288954964164e-06, "logits/chosen": -0.8420989513397217, "logits/rejected": -0.8359572887420654, "logps/chosen": -63.55978775024414, "logps/rejected": -73.1498794555664, "loss": 0.2334, "rewards/accuracies": 1.0, "rewards/chosen": -4.131922721862793, "rewards/margins": 0.531367301940918, "rewards/rejected": -4.663290023803711, "step": 4412 }, { "epoch": 0.98, "learning_rate": 9.587576439933862e-06, "logits/chosen": -0.999620258808136, "logits/rejected": -1.0594158172607422, "logps/chosen": -322.8394775390625, "logps/rejected": -255.10472106933594, "loss": 0.0308, "rewards/accuracies": 1.0, "rewards/chosen": 2.2943177223205566, "rewards/margins": 2.7734575271606445, "rewards/rejected": -0.4791397154331207, "step": 4413 }, { "epoch": 0.98, "learning_rate": 9.586863335411017e-06, "logits/chosen": -0.7456406950950623, "logits/rejected": -0.7230441570281982, "logps/chosen": -189.5590362548828, "logps/rejected": -175.63790893554688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.2358415126800537, "rewards/margins": 9.75000286102295, "rewards/rejected": -7.514161109924316, "step": 4414 }, { "epoch": 0.98, "learning_rate": 9.586149641487257e-06, "logits/chosen": -1.3263214826583862, "logits/rejected": -1.2570712566375732, "logps/chosen": -100.08705139160156, "logps/rejected": -192.64230346679688, "loss": 0.4796, "rewards/accuracies": 0.0, "rewards/chosen": -0.6807876825332642, "rewards/margins": -0.4009536802768707, "rewards/rejected": -0.27983400225639343, "step": 4415 }, { "epoch": 0.98, "learning_rate": 9.585435358254295e-06, "logits/chosen": -1.1263482570648193, "logits/rejected": -1.0877097845077515, "logps/chosen": -94.898681640625, "logps/rejected": -179.16796875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 1.1066970825195312, "rewards/margins": 5.603025913238525, "rewards/rejected": -4.496328830718994, "step": 4416 }, { "epoch": 0.98, "learning_rate": 9.584720485803912e-06, "logits/chosen": -1.041551113128662, "logits/rejected": -0.9418726563453674, "logps/chosen": -79.61099243164062, "logps/rejected": -172.46551513671875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.6658431887626648, "rewards/margins": 5.839697360992432, "rewards/rejected": -5.173854351043701, "step": 4417 }, { "epoch": 0.98, "learning_rate": 9.584005024227967e-06, "logits/chosen": -0.7667662501335144, "logits/rejected": -0.8240324854850769, "logps/chosen": -201.11614990234375, "logps/rejected": -114.31250762939453, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": -0.8740814328193665, "rewards/margins": 2.35439395904541, "rewards/rejected": -3.228475332260132, "step": 4418 }, { "epoch": 0.98, "learning_rate": 9.583288973618398e-06, "logits/chosen": -0.7469905018806458, "logits/rejected": -0.7202771902084351, "logps/chosen": -187.86007690429688, "logps/rejected": -272.2186279296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.0792877674102783, "rewards/margins": 8.869144439697266, "rewards/rejected": -9.948431968688965, "step": 4419 }, { "epoch": 0.98, "learning_rate": 9.582572334067213e-06, "logits/chosen": -1.0529640913009644, "logits/rejected": -1.0150718688964844, "logps/chosen": -130.4288330078125, "logps/rejected": -286.713134765625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.3450279235839844, "rewards/margins": 5.995389461517334, "rewards/rejected": -6.340417385101318, "step": 4420 }, { "epoch": 0.98, "learning_rate": 9.581855105666497e-06, "logits/chosen": -1.0927071571350098, "logits/rejected": -1.0815860033035278, "logps/chosen": -85.4046630859375, "logps/rejected": -183.82534790039062, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.160227969288826, "rewards/margins": 5.310619831085205, "rewards/rejected": -5.150392055511475, "step": 4421 }, { "epoch": 0.98, "learning_rate": 9.581137288508417e-06, "logits/chosen": -1.0764297246932983, "logits/rejected": -1.0764297246932983, "logps/chosen": -183.69229125976562, "logps/rejected": -183.69229125976562, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -7.0455474853515625, "rewards/margins": 0.0, "rewards/rejected": -7.0455474853515625, "step": 4422 }, { "epoch": 0.98, "learning_rate": 9.580418882685208e-06, "logits/chosen": -0.778393566608429, "logits/rejected": -0.8012458086013794, "logps/chosen": -103.61824035644531, "logps/rejected": -118.83255004882812, "loss": 0.157, "rewards/accuracies": 1.0, "rewards/chosen": -1.9659576416015625, "rewards/margins": 1.0045502185821533, "rewards/rejected": -2.970507860183716, "step": 4423 }, { "epoch": 0.98, "learning_rate": 9.579699888289184e-06, "logits/chosen": -0.8426328897476196, "logits/rejected": -0.7281071543693542, "logps/chosen": -198.10377502441406, "logps/rejected": -653.2650146484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.6728652715682983, "rewards/margins": 56.79155731201172, "rewards/rejected": -55.118690490722656, "step": 4424 }, { "epoch": 0.98, "learning_rate": 9.578980305412733e-06, "logits/chosen": -1.3851919174194336, "logits/rejected": -1.3773030042648315, "logps/chosen": -92.22587585449219, "logps/rejected": -93.42376708984375, "loss": 0.1517, "rewards/accuracies": 1.0, "rewards/chosen": -1.7116364240646362, "rewards/margins": 1.037394642829895, "rewards/rejected": -2.7490310668945312, "step": 4425 }, { "epoch": 0.98, "learning_rate": 9.57826013414832e-06, "logits/chosen": -0.84049391746521, "logits/rejected": -0.8860616683959961, "logps/chosen": -158.98902893066406, "logps/rejected": -208.90029907226562, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.06502228230237961, "rewards/margins": 8.200562477111816, "rewards/rejected": -8.135540008544922, "step": 4426 }, { "epoch": 0.98, "learning_rate": 9.577539374588486e-06, "logits/chosen": -0.8183075189590454, "logits/rejected": -0.49975576996803284, "logps/chosen": -205.48037719726562, "logps/rejected": -513.5396728515625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -3.2902772426605225, "rewards/margins": 29.78545570373535, "rewards/rejected": -33.07573318481445, "step": 4427 }, { "epoch": 0.98, "learning_rate": 9.576818026825846e-06, "logits/chosen": -1.2113534212112427, "logits/rejected": -1.2215898036956787, "logps/chosen": -139.85903930664062, "logps/rejected": -172.18211364746094, "loss": 0.4637, "rewards/accuracies": 1.0, "rewards/chosen": -3.5285003185272217, "rewards/margins": 1.11344313621521, "rewards/rejected": -4.641943454742432, "step": 4428 }, { "epoch": 0.98, "learning_rate": 9.57609609095309e-06, "logits/chosen": -1.281077265739441, "logits/rejected": -1.3249112367630005, "logps/chosen": -126.47323608398438, "logps/rejected": -166.75689697265625, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -2.5289993286132812, "rewards/margins": 2.6167192459106445, "rewards/rejected": -5.145718574523926, "step": 4429 }, { "epoch": 0.98, "learning_rate": 9.57537356706299e-06, "logits/chosen": -1.2293893098831177, "logits/rejected": -1.0596081018447876, "logps/chosen": -140.89187622070312, "logps/rejected": -415.7299499511719, "loss": 1.0355, "rewards/accuracies": 1.0, "rewards/chosen": -1.7491371631622314, "rewards/margins": 8.106517791748047, "rewards/rejected": -9.8556547164917, "step": 4430 }, { "epoch": 0.98, "learning_rate": 9.574650455248384e-06, "logits/chosen": -0.8550158143043518, "logits/rejected": -0.8748153448104858, "logps/chosen": -209.9279022216797, "logps/rejected": -184.33700561523438, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": 1.5625656843185425, "rewards/margins": 3.8304247856140137, "rewards/rejected": -2.2678589820861816, "step": 4431 }, { "epoch": 0.98, "learning_rate": 9.573926755602194e-06, "logits/chosen": -0.9146069288253784, "logits/rejected": -0.900804877281189, "logps/chosen": -111.61808776855469, "logps/rejected": -89.794677734375, "loss": 0.0534, "rewards/accuracies": 1.0, "rewards/chosen": -1.4634217023849487, "rewards/margins": 2.1842188835144043, "rewards/rejected": -3.6476407051086426, "step": 4432 }, { "epoch": 0.98, "learning_rate": 9.573202468217408e-06, "logits/chosen": -0.9572161436080933, "logits/rejected": -0.943590521812439, "logps/chosen": -103.08490753173828, "logps/rejected": -190.34091186523438, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.3768806457519531, "rewards/margins": 5.460195064544678, "rewards/rejected": -5.837075710296631, "step": 4433 }, { "epoch": 0.98, "learning_rate": 9.572477593187101e-06, "logits/chosen": -0.8229422569274902, "logits/rejected": -0.9080458879470825, "logps/chosen": -306.192138671875, "logps/rejected": -193.94415283203125, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": 0.96209716796875, "rewards/margins": 11.032801628112793, "rewards/rejected": -10.070704460144043, "step": 4434 }, { "epoch": 0.98, "learning_rate": 9.571752130604414e-06, "logits/chosen": -0.7626850605010986, "logits/rejected": -0.7655901908874512, "logps/chosen": -114.08702087402344, "logps/rejected": -191.80384826660156, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -3.6385910511016846, "rewards/margins": 4.984657287597656, "rewards/rejected": -8.623248100280762, "step": 4435 }, { "epoch": 0.98, "learning_rate": 9.571026080562569e-06, "logits/chosen": -0.8458007574081421, "logits/rejected": -0.8458007574081421, "logps/chosen": -158.00271606445312, "logps/rejected": -158.00271606445312, "loss": 0.3616, "rewards/accuracies": 0.0, "rewards/chosen": -5.533140659332275, "rewards/margins": 0.0, "rewards/rejected": -5.533140659332275, "step": 4436 }, { "epoch": 0.98, "learning_rate": 9.57029944315486e-06, "logits/chosen": -1.1840431690216064, "logits/rejected": -1.1664619445800781, "logps/chosen": -120.27018737792969, "logps/rejected": -225.9045867919922, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.8440178036689758, "rewards/margins": 6.31909704208374, "rewards/rejected": -7.16311502456665, "step": 4437 }, { "epoch": 0.98, "learning_rate": 9.569572218474662e-06, "logits/chosen": -1.080634355545044, "logits/rejected": -1.0628089904785156, "logps/chosen": -104.45024108886719, "logps/rejected": -112.57862854003906, "loss": 0.3681, "rewards/accuracies": 0.0, "rewards/chosen": -3.7862343788146973, "rewards/margins": -0.08425211906433105, "rewards/rejected": -3.701982259750366, "step": 4438 }, { "epoch": 0.98, "learning_rate": 9.568844406615416e-06, "logits/chosen": -1.1694836616516113, "logits/rejected": -1.1435507535934448, "logps/chosen": -119.91939544677734, "logps/rejected": -232.27432250976562, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -2.6401360034942627, "rewards/margins": 3.6170737743377686, "rewards/rejected": -6.257209777832031, "step": 4439 }, { "epoch": 0.98, "learning_rate": 9.568116007670647e-06, "logits/chosen": -1.0295127630233765, "logits/rejected": -1.0306769609451294, "logps/chosen": -119.40005493164062, "logps/rejected": -144.4921112060547, "loss": 0.2441, "rewards/accuracies": 1.0, "rewards/chosen": -2.195343017578125, "rewards/margins": 2.203355312347412, "rewards/rejected": -4.398698329925537, "step": 4440 }, { "epoch": 0.98, "learning_rate": 9.567387021733954e-06, "logits/chosen": -0.9589055180549622, "logits/rejected": -0.9672046899795532, "logps/chosen": -116.1863021850586, "logps/rejected": -117.05326843261719, "loss": 0.55, "rewards/accuracies": 0.0, "rewards/chosen": -1.6844338178634644, "rewards/margins": -0.3927055597305298, "rewards/rejected": -1.2917282581329346, "step": 4441 }, { "epoch": 0.98, "learning_rate": 9.566657448899009e-06, "logits/chosen": -1.0820153951644897, "logits/rejected": -1.0540755987167358, "logps/chosen": -113.47235107421875, "logps/rejected": -273.62664794921875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -4.10263204574585, "rewards/margins": 9.864177703857422, "rewards/rejected": -13.966809272766113, "step": 4442 }, { "epoch": 0.98, "learning_rate": 9.565927289259558e-06, "logits/chosen": -1.0940214395523071, "logits/rejected": -1.1446077823638916, "logps/chosen": -223.69297790527344, "logps/rejected": -171.52243041992188, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -4.321685791015625, "rewards/margins": 4.667144775390625, "rewards/rejected": -8.98883056640625, "step": 4443 }, { "epoch": 0.98, "learning_rate": 9.565196542909425e-06, "logits/chosen": -0.9641237854957581, "logits/rejected": -0.9641237854957581, "logps/chosen": -108.6526870727539, "logps/rejected": -108.6526870727539, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.056665420532227, "rewards/margins": 0.0, "rewards/rejected": -5.056665420532227, "step": 4444 }, { "epoch": 0.98, "learning_rate": 9.564465209942512e-06, "logits/chosen": -1.3851903676986694, "logits/rejected": -1.4016343355178833, "logps/chosen": -86.58988952636719, "logps/rejected": -149.60491943359375, "loss": 0.4373, "rewards/accuracies": 1.0, "rewards/chosen": -4.316285133361816, "rewards/margins": 6.242513656616211, "rewards/rejected": -10.558798789978027, "step": 4445 }, { "epoch": 0.98, "learning_rate": 9.563733290452795e-06, "logits/chosen": -1.1865088939666748, "logits/rejected": -1.1863577365875244, "logps/chosen": -94.03056335449219, "logps/rejected": -116.07853698730469, "loss": 0.2208, "rewards/accuracies": 1.0, "rewards/chosen": -0.8139816522598267, "rewards/margins": 0.9183517694473267, "rewards/rejected": -1.7323334217071533, "step": 4446 }, { "epoch": 0.98, "learning_rate": 9.56300078453432e-06, "logits/chosen": -0.9891586899757385, "logits/rejected": -0.8850530982017517, "logps/chosen": -170.68975830078125, "logps/rejected": -394.0426330566406, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.759759545326233, "rewards/margins": 8.080361366271973, "rewards/rejected": -9.840121269226074, "step": 4447 }, { "epoch": 0.98, "learning_rate": 9.562267692281212e-06, "logits/chosen": -1.1708506345748901, "logits/rejected": -1.120761513710022, "logps/chosen": -89.87792205810547, "logps/rejected": -175.24893188476562, "loss": 0.0416, "rewards/accuracies": 1.0, "rewards/chosen": 0.40152207016944885, "rewards/margins": 10.379387855529785, "rewards/rejected": -9.977866172790527, "step": 4448 }, { "epoch": 0.98, "learning_rate": 9.561534013787671e-06, "logits/chosen": -0.9906045794487, "logits/rejected": -0.9851319789886475, "logps/chosen": -111.03015899658203, "logps/rejected": -170.05343627929688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.2758522033691406, "rewards/margins": 6.349390506744385, "rewards/rejected": -6.073538303375244, "step": 4449 }, { "epoch": 0.98, "learning_rate": 9.560799749147977e-06, "logits/chosen": -1.0244832038879395, "logits/rejected": -1.0380140542984009, "logps/chosen": -136.57424926757812, "logps/rejected": -171.6036376953125, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -0.9663497805595398, "rewards/margins": 6.387134075164795, "rewards/rejected": -7.3534836769104, "step": 4450 }, { "epoch": 0.99, "learning_rate": 9.56006489845648e-06, "logits/chosen": -0.927323043346405, "logits/rejected": -0.8999531269073486, "logps/chosen": -43.70626449584961, "logps/rejected": -79.7164535522461, "loss": 0.0703, "rewards/accuracies": 1.0, "rewards/chosen": -1.7750885486602783, "rewards/margins": 2.6053149700164795, "rewards/rejected": -4.380403518676758, "step": 4451 }, { "epoch": 0.99, "learning_rate": 9.559329461807605e-06, "logits/chosen": -0.9804372787475586, "logits/rejected": -0.989525318145752, "logps/chosen": -202.69619750976562, "logps/rejected": -251.41970825195312, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.8837478756904602, "rewards/margins": 6.956255912780762, "rewards/rejected": -7.840003967285156, "step": 4452 }, { "epoch": 0.99, "learning_rate": 9.558593439295853e-06, "logits/chosen": -0.7099252939224243, "logits/rejected": -0.689521849155426, "logps/chosen": -153.06857299804688, "logps/rejected": -274.21038818359375, "loss": 0.3481, "rewards/accuracies": 1.0, "rewards/chosen": -0.5425063967704773, "rewards/margins": 5.816246509552002, "rewards/rejected": -6.358752727508545, "step": 4453 }, { "epoch": 0.99, "learning_rate": 9.557856831015805e-06, "logits/chosen": -1.2346528768539429, "logits/rejected": -1.2346528768539429, "logps/chosen": -183.70960998535156, "logps/rejected": -183.70960998535156, "loss": 0.3501, "rewards/accuracies": 0.0, "rewards/chosen": -2.758000135421753, "rewards/margins": 0.0, "rewards/rejected": -2.758000135421753, "step": 4454 }, { "epoch": 0.99, "learning_rate": 9.55711963706211e-06, "logits/chosen": -0.9495323896408081, "logits/rejected": -0.9456177949905396, "logps/chosen": -155.3111572265625, "logps/rejected": -155.40615844726562, "loss": 1.1776, "rewards/accuracies": 0.0, "rewards/chosen": -4.363458156585693, "rewards/margins": -2.2446165084838867, "rewards/rejected": -2.1188416481018066, "step": 4455 }, { "epoch": 0.99, "learning_rate": 9.556381857529497e-06, "logits/chosen": -0.9729136824607849, "logits/rejected": -0.9729136824607849, "logps/chosen": -172.2393341064453, "logps/rejected": -172.2393341064453, "loss": 0.3542, "rewards/accuracies": 0.0, "rewards/chosen": -6.021336555480957, "rewards/margins": 0.0, "rewards/rejected": -6.021336555480957, "step": 4456 }, { "epoch": 0.99, "learning_rate": 9.555643492512767e-06, "logits/chosen": -1.2325077056884766, "logits/rejected": -1.2130533456802368, "logps/chosen": -99.78192138671875, "logps/rejected": -184.08065795898438, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -0.24868011474609375, "rewards/margins": 4.0471696853637695, "rewards/rejected": -4.295849800109863, "step": 4457 }, { "epoch": 0.99, "learning_rate": 9.554904542106802e-06, "logits/chosen": -1.3047442436218262, "logits/rejected": -1.3208638429641724, "logps/chosen": -160.6641082763672, "logps/rejected": -120.15055847167969, "loss": 0.3701, "rewards/accuracies": 0.0, "rewards/chosen": -3.1304643154144287, "rewards/margins": -0.08941054344177246, "rewards/rejected": -3.0410537719726562, "step": 4458 }, { "epoch": 0.99, "learning_rate": 9.55416500640655e-06, "logits/chosen": -0.7495752573013306, "logits/rejected": -0.7224733829498291, "logps/chosen": -67.7925796508789, "logps/rejected": -99.84652709960938, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -1.134647011756897, "rewards/margins": 3.39127254486084, "rewards/rejected": -4.525919437408447, "step": 4459 }, { "epoch": 0.99, "learning_rate": 9.553424885507045e-06, "logits/chosen": -1.1335803270339966, "logits/rejected": -1.241355299949646, "logps/chosen": -215.71090698242188, "logps/rejected": -233.03143310546875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.8880401849746704, "rewards/margins": 7.413214206695557, "rewards/rejected": -8.301254272460938, "step": 4460 }, { "epoch": 0.99, "learning_rate": 9.552684179503389e-06, "logits/chosen": -1.0687229633331299, "logits/rejected": -1.049696683883667, "logps/chosen": -95.4630126953125, "logps/rejected": -149.215576171875, "loss": 0.1175, "rewards/accuracies": 1.0, "rewards/chosen": -2.3991897106170654, "rewards/margins": 1.4002113342285156, "rewards/rejected": -3.799401044845581, "step": 4461 }, { "epoch": 0.99, "learning_rate": 9.551942888490759e-06, "logits/chosen": -0.7096840739250183, "logits/rejected": -0.6994609832763672, "logps/chosen": -118.93986511230469, "logps/rejected": -71.32006072998047, "loss": 0.137, "rewards/accuracies": 1.0, "rewards/chosen": -2.336895704269409, "rewards/margins": 1.1548802852630615, "rewards/rejected": -3.4917759895324707, "step": 4462 }, { "epoch": 0.99, "learning_rate": 9.55120101256441e-06, "logits/chosen": -1.2005504369735718, "logits/rejected": -1.1774877309799194, "logps/chosen": -182.09222412109375, "logps/rejected": -401.1231384277344, "loss": 0.16, "rewards/accuracies": 1.0, "rewards/chosen": -3.4563262462615967, "rewards/margins": 0.9752013683319092, "rewards/rejected": -4.431527614593506, "step": 4463 }, { "epoch": 0.99, "learning_rate": 9.550458551819672e-06, "logits/chosen": -0.9661107063293457, "logits/rejected": -1.0300414562225342, "logps/chosen": -189.42068481445312, "logps/rejected": -155.0044403076172, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -1.325341820716858, "rewards/margins": 7.513253688812256, "rewards/rejected": -8.838595390319824, "step": 4464 }, { "epoch": 0.99, "learning_rate": 9.54971550635195e-06, "logits/chosen": -1.2126612663269043, "logits/rejected": -1.2124559879302979, "logps/chosen": -124.58529663085938, "logps/rejected": -165.1960906982422, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.0880584716796875, "rewards/margins": 4.014686584472656, "rewards/rejected": -4.102745056152344, "step": 4465 }, { "epoch": 0.99, "learning_rate": 9.548971876256721e-06, "logits/chosen": -0.6490827202796936, "logits/rejected": -0.6289282441139221, "logps/chosen": -303.969970703125, "logps/rejected": -246.16311645507812, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": -1.0095306634902954, "rewards/margins": 2.8182373046875, "rewards/rejected": -3.827768087387085, "step": 4466 }, { "epoch": 0.99, "learning_rate": 9.548227661629541e-06, "logits/chosen": -0.7077223062515259, "logits/rejected": -0.7937703132629395, "logps/chosen": -259.8774108886719, "logps/rejected": -118.06484985351562, "loss": 1.0583, "rewards/accuracies": 0.0, "rewards/chosen": -6.421456813812256, "rewards/margins": -1.9859800338745117, "rewards/rejected": -4.435476779937744, "step": 4467 }, { "epoch": 0.99, "learning_rate": 9.547482862566043e-06, "logits/chosen": -0.9785757064819336, "logits/rejected": -0.9577251076698303, "logps/chosen": -213.1895751953125, "logps/rejected": -134.6636505126953, "loss": 0.2562, "rewards/accuracies": 1.0, "rewards/chosen": -8.197630882263184, "rewards/margins": 0.40148162841796875, "rewards/rejected": -8.599112510681152, "step": 4468 }, { "epoch": 0.99, "learning_rate": 9.546737479161926e-06, "logits/chosen": -1.0235145092010498, "logits/rejected": -0.9978446364402771, "logps/chosen": -144.9650421142578, "logps/rejected": -218.64480590820312, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -2.55804443359375, "rewards/margins": 3.2216644287109375, "rewards/rejected": -5.7797088623046875, "step": 4469 }, { "epoch": 0.99, "learning_rate": 9.545991511512975e-06, "logits/chosen": -0.9348684549331665, "logits/rejected": -0.9915605783462524, "logps/chosen": -224.51284790039062, "logps/rejected": -283.41668701171875, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -4.152052402496338, "rewards/margins": 3.3253707885742188, "rewards/rejected": -7.477423191070557, "step": 4470 }, { "epoch": 0.99, "learning_rate": 9.545244959715041e-06, "logits/chosen": -1.1467558145523071, "logits/rejected": -1.0647162199020386, "logps/chosen": -163.7076873779297, "logps/rejected": -308.8116149902344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.9084731936454773, "rewards/margins": 11.43113899230957, "rewards/rejected": -10.522665977478027, "step": 4471 }, { "epoch": 0.99, "learning_rate": 9.544497823864058e-06, "logits/chosen": -0.6105592250823975, "logits/rejected": -0.5995920896530151, "logps/chosen": -143.62991333007812, "logps/rejected": -108.18781280517578, "loss": 0.7887, "rewards/accuracies": 0.0, "rewards/chosen": -4.520537853240967, "rewards/margins": -1.3459863662719727, "rewards/rejected": -3.174551486968994, "step": 4472 }, { "epoch": 0.99, "learning_rate": 9.543750104056029e-06, "logits/chosen": -1.289028525352478, "logits/rejected": -1.331626296043396, "logps/chosen": -139.92701721191406, "logps/rejected": -165.04843139648438, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -2.4227027893066406, "rewards/margins": 4.285900115966797, "rewards/rejected": -6.7086029052734375, "step": 4473 }, { "epoch": 0.99, "learning_rate": 9.543001800387034e-06, "logits/chosen": -1.0613805055618286, "logits/rejected": -1.0402456521987915, "logps/chosen": -301.9281311035156, "logps/rejected": -147.64373779296875, "loss": 0.398, "rewards/accuracies": 0.0, "rewards/chosen": -5.990441799163818, "rewards/margins": -0.17368602752685547, "rewards/rejected": -5.816755771636963, "step": 4474 }, { "epoch": 0.99, "learning_rate": 9.54225291295323e-06, "logits/chosen": -0.9110009074211121, "logits/rejected": -0.9801562428474426, "logps/chosen": -170.27304077148438, "logps/rejected": -170.1260986328125, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.5890716910362244, "rewards/margins": 6.752344608306885, "rewards/rejected": -7.341416358947754, "step": 4475 }, { "epoch": 0.99, "learning_rate": 9.541503441850844e-06, "logits/chosen": -0.973893404006958, "logits/rejected": -1.1155011653900146, "logps/chosen": -283.568359375, "logps/rejected": -186.3140869140625, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -0.3396667540073395, "rewards/margins": 6.5549468994140625, "rewards/rejected": -6.894613742828369, "step": 4476 }, { "epoch": 0.99, "learning_rate": 9.540753387176183e-06, "logits/chosen": -0.6618593335151672, "logits/rejected": -0.6716256737709045, "logps/chosen": -195.63522338867188, "logps/rejected": -191.14244079589844, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.740148901939392, "rewards/margins": 5.066376686096191, "rewards/rejected": -6.806525707244873, "step": 4477 }, { "epoch": 0.99, "learning_rate": 9.54000274902563e-06, "logits/chosen": -1.1252996921539307, "logits/rejected": -1.1525309085845947, "logps/chosen": -141.45143127441406, "logps/rejected": -112.26080322265625, "loss": 0.369, "rewards/accuracies": 1.0, "rewards/chosen": -2.4385437965393066, "rewards/margins": 3.082106590270996, "rewards/rejected": -5.520650386810303, "step": 4478 }, { "epoch": 0.99, "learning_rate": 9.539251527495636e-06, "logits/chosen": -0.8100332617759705, "logits/rejected": -0.8222583532333374, "logps/chosen": -206.44288635253906, "logps/rejected": -211.55987548828125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0248184204101562, "rewards/margins": 6.070579528808594, "rewards/rejected": -7.09539794921875, "step": 4479 }, { "epoch": 0.99, "learning_rate": 9.538499722682733e-06, "logits/chosen": -1.2305819988250732, "logits/rejected": -1.2104614973068237, "logps/chosen": -176.4810791015625, "logps/rejected": -71.31533813476562, "loss": 2.3904, "rewards/accuracies": 0.0, "rewards/chosen": -10.026883125305176, "rewards/margins": -4.771691799163818, "rewards/rejected": -5.255191326141357, "step": 4480 }, { "epoch": 0.99, "learning_rate": 9.537747334683524e-06, "logits/chosen": -1.006983757019043, "logits/rejected": -1.019087314605713, "logps/chosen": -85.46061706542969, "logps/rejected": -123.81712341308594, "loss": 0.0635, "rewards/accuracies": 1.0, "rewards/chosen": -0.8570007681846619, "rewards/margins": 1.9996016025543213, "rewards/rejected": -2.856602430343628, "step": 4481 }, { "epoch": 0.99, "learning_rate": 9.536994363594694e-06, "logits/chosen": -1.132096529006958, "logits/rejected": -1.174904227256775, "logps/chosen": -277.8077087402344, "logps/rejected": -205.5182647705078, "loss": 0.7341, "rewards/accuracies": 0.0, "rewards/chosen": -9.124785423278809, "rewards/margins": -1.1991209983825684, "rewards/rejected": -7.92566442489624, "step": 4482 }, { "epoch": 0.99, "learning_rate": 9.536240809512994e-06, "logits/chosen": -1.1246650218963623, "logits/rejected": -1.14372718334198, "logps/chosen": -128.34616088867188, "logps/rejected": -112.76591491699219, "loss": 0.8767, "rewards/accuracies": 0.0, "rewards/chosen": -2.4307861328125, "rewards/margins": -0.6688690185546875, "rewards/rejected": -1.7619171142578125, "step": 4483 }, { "epoch": 0.99, "learning_rate": 9.535486672535255e-06, "logits/chosen": -1.2385348081588745, "logits/rejected": -1.310592532157898, "logps/chosen": -231.4061279296875, "logps/rejected": -179.725830078125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.0012725591659546, "rewards/margins": 6.075621604919434, "rewards/rejected": -7.076894283294678, "step": 4484 }, { "epoch": 0.99, "learning_rate": 9.53473195275838e-06, "logits/chosen": -1.2615396976470947, "logits/rejected": -1.32939612865448, "logps/chosen": -194.60537719726562, "logps/rejected": -162.99371337890625, "loss": 0.0588, "rewards/accuracies": 1.0, "rewards/chosen": -5.026272773742676, "rewards/margins": 2.081157684326172, "rewards/rejected": -7.107430458068848, "step": 4485 }, { "epoch": 0.99, "learning_rate": 9.53397665027935e-06, "logits/chosen": -0.8461257219314575, "logits/rejected": -0.8937705159187317, "logps/chosen": -149.08938598632812, "logps/rejected": -114.50770568847656, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.1234536170959473, "rewards/margins": 5.461620330810547, "rewards/rejected": -7.585073947906494, "step": 4486 }, { "epoch": 0.99, "learning_rate": 9.533220765195223e-06, "logits/chosen": -0.7786433696746826, "logits/rejected": -0.6500292420387268, "logps/chosen": -180.15325927734375, "logps/rejected": -367.7037353515625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.8105209469795227, "rewards/margins": 10.796116828918457, "rewards/rejected": -9.985595703125, "step": 4487 }, { "epoch": 0.99, "learning_rate": 9.532464297603124e-06, "logits/chosen": -0.7396426200866699, "logits/rejected": -0.7166509628295898, "logps/chosen": -206.38072204589844, "logps/rejected": -343.19598388671875, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -0.6372177004814148, "rewards/margins": 9.020721435546875, "rewards/rejected": -9.657938957214355, "step": 4488 }, { "epoch": 0.99, "learning_rate": 9.531707247600258e-06, "logits/chosen": -0.9110277891159058, "logits/rejected": -0.8150565028190613, "logps/chosen": -107.76013946533203, "logps/rejected": -248.03927612304688, "loss": 0.0762, "rewards/accuracies": 1.0, "rewards/chosen": -0.34077149629592896, "rewards/margins": 1.8039841651916504, "rewards/rejected": -2.1447556018829346, "step": 4489 }, { "epoch": 0.99, "learning_rate": 9.530949615283902e-06, "logits/chosen": -0.9665948748588562, "logits/rejected": -0.9562627077102661, "logps/chosen": -111.1790771484375, "logps/rejected": -87.42088317871094, "loss": 0.1103, "rewards/accuracies": 1.0, "rewards/chosen": -2.04876708984375, "rewards/margins": 1.4354884624481201, "rewards/rejected": -3.48425555229187, "step": 4490 }, { "epoch": 0.99, "learning_rate": 9.530191400751416e-06, "logits/chosen": -0.7369624376296997, "logits/rejected": -0.7638830542564392, "logps/chosen": -153.45291137695312, "logps/rejected": -187.1146240234375, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": 0.3734237849712372, "rewards/margins": 4.8595991134643555, "rewards/rejected": -4.486175537109375, "step": 4491 }, { "epoch": 0.99, "learning_rate": 9.529432604100223e-06, "logits/chosen": -0.9657220244407654, "logits/rejected": -1.0052200555801392, "logps/chosen": -207.56741333007812, "logps/rejected": -205.02047729492188, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.8620575070381165, "rewards/margins": 4.531893730163574, "rewards/rejected": -5.393951416015625, "step": 4492 }, { "epoch": 0.99, "learning_rate": 9.528673225427831e-06, "logits/chosen": -0.9076850414276123, "logits/rejected": -1.0127971172332764, "logps/chosen": -227.99700927734375, "logps/rejected": -129.20095825195312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 2.400958299636841, "rewards/margins": 6.415898323059082, "rewards/rejected": -4.014939785003662, "step": 4493 }, { "epoch": 0.99, "learning_rate": 9.527913264831817e-06, "logits/chosen": -1.2592380046844482, "logits/rejected": -1.1602340936660767, "logps/chosen": -130.86001586914062, "logps/rejected": -214.6642608642578, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": -0.09058838337659836, "rewards/margins": 2.927507162094116, "rewards/rejected": -3.0180954933166504, "step": 4494 }, { "epoch": 0.99, "learning_rate": 9.52715272240983e-06, "logits/chosen": -1.1419597864151, "logits/rejected": -1.0713636875152588, "logps/chosen": -215.6470947265625, "logps/rejected": -323.3297424316406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.5786194205284119, "rewards/margins": 9.065117835998535, "rewards/rejected": -8.486498832702637, "step": 4495 }, { "epoch": 1.0, "learning_rate": 9.526391598259604e-06, "logits/chosen": -0.9965603351593018, "logits/rejected": -0.9223191738128662, "logps/chosen": -74.99044036865234, "logps/rejected": -171.38064575195312, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -1.000684380531311, "rewards/margins": 3.043921947479248, "rewards/rejected": -4.0446062088012695, "step": 4496 }, { "epoch": 1.0, "learning_rate": 9.525629892478936e-06, "logits/chosen": -1.087290644645691, "logits/rejected": -1.132782220840454, "logps/chosen": -170.79876708984375, "logps/rejected": -363.67254638671875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 3.404257297515869, "rewards/margins": 10.543807983398438, "rewards/rejected": -7.139550685882568, "step": 4497 }, { "epoch": 1.0, "learning_rate": 9.524867605165709e-06, "logits/chosen": -0.8433418273925781, "logits/rejected": -0.7759336233139038, "logps/chosen": -175.751708984375, "logps/rejected": -151.2997589111328, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 2.3131775856018066, "rewards/margins": 10.130376815795898, "rewards/rejected": -7.817199230194092, "step": 4498 }, { "epoch": 1.0, "learning_rate": 9.52410473641787e-06, "logits/chosen": -1.1809378862380981, "logits/rejected": -1.1599470376968384, "logps/chosen": -92.35338592529297, "logps/rejected": -95.60597229003906, "loss": 0.2974, "rewards/accuracies": 1.0, "rewards/chosen": -1.7237876653671265, "rewards/margins": 0.2074822187423706, "rewards/rejected": -1.931269884109497, "step": 4499 }, { "epoch": 1.0, "learning_rate": 9.523341286333448e-06, "logits/chosen": -1.0081368684768677, "logits/rejected": -0.9518804550170898, "logps/chosen": -115.88510131835938, "logps/rejected": -181.6351776123047, "loss": 0.3226, "rewards/accuracies": 1.0, "rewards/chosen": -7.523066997528076, "rewards/margins": 0.09817886352539062, "rewards/rejected": -7.621245861053467, "step": 4500 }, { "epoch": 1.0, "learning_rate": 9.522577255010546e-06, "logits/chosen": -1.1315526962280273, "logits/rejected": -1.153042197227478, "logps/chosen": -109.53440856933594, "logps/rejected": -86.16151428222656, "loss": 0.2157, "rewards/accuracies": 1.0, "rewards/chosen": -0.5283889770507812, "rewards/margins": 0.6171104907989502, "rewards/rejected": -1.1454994678497314, "step": 4501 }, { "epoch": 1.0, "learning_rate": 9.521812642547337e-06, "logits/chosen": -1.2678959369659424, "logits/rejected": -1.2870780229568481, "logps/chosen": -96.73229217529297, "logps/rejected": -129.51202392578125, "loss": 0.1275, "rewards/accuracies": 1.0, "rewards/chosen": -4.109825134277344, "rewards/margins": 1.5760912895202637, "rewards/rejected": -5.685916423797607, "step": 4502 }, { "epoch": 1.0, "learning_rate": 9.521047449042075e-06, "logits/chosen": -0.9396881461143494, "logits/rejected": -0.8938097357749939, "logps/chosen": -80.25664520263672, "logps/rejected": -178.09092712402344, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.149501085281372, "rewards/margins": 8.136543273925781, "rewards/rejected": -9.286044120788574, "step": 4503 }, { "epoch": 1.0, "learning_rate": 9.520281674593084e-06, "logits/chosen": -1.1525521278381348, "logits/rejected": -1.1525521278381348, "logps/chosen": -171.4706268310547, "logps/rejected": -171.4706268310547, "loss": 0.7193, "rewards/accuracies": 0.0, "rewards/chosen": -8.819259643554688, "rewards/margins": 0.0, "rewards/rejected": -8.819259643554688, "step": 4504 }, { "epoch": 1.0, "learning_rate": 9.519515319298765e-06, "logits/chosen": -0.6623517870903015, "logits/rejected": -0.3377048671245575, "logps/chosen": -158.39874267578125, "logps/rejected": -601.5133056640625, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -1.0475739240646362, "rewards/margins": 41.362735748291016, "rewards/rejected": -42.410308837890625, "step": 4505 }, { "epoch": 1.0, "learning_rate": 9.51874838325759e-06, "logits/chosen": -1.2467485666275024, "logits/rejected": -1.2141971588134766, "logps/chosen": -222.79904174804688, "logps/rejected": -315.0966491699219, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.7748855948448181, "rewards/margins": 6.9619646072387695, "rewards/rejected": -6.187078952789307, "step": 4506 }, { "epoch": 1.0, "learning_rate": 9.517980866568112e-06, "logits/chosen": -1.2927125692367554, "logits/rejected": -1.253354549407959, "logps/chosen": -85.16752624511719, "logps/rejected": -188.36343383789062, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": 1.0310333967208862, "rewards/margins": 4.734626770019531, "rewards/rejected": -3.7035934925079346, "step": 4507 }, { "epoch": 1.0, "learning_rate": 9.517212769328952e-06, "logits/chosen": -1.2317579984664917, "logits/rejected": -1.1682759523391724, "logps/chosen": -74.09391784667969, "logps/rejected": -128.75732421875, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": 0.9932098388671875, "rewards/margins": 3.6088943481445312, "rewards/rejected": -2.6156845092773438, "step": 4508 }, { "epoch": 1.0, "learning_rate": 9.516444091638812e-06, "logits/chosen": -0.870094895362854, "logits/rejected": -0.9766585230827332, "logps/chosen": -129.74609375, "logps/rejected": -583.85791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.008477807044983, "rewards/margins": 40.75048828125, "rewards/rejected": -39.74201202392578, "step": 4509 }, { "epoch": 1.0, "learning_rate": 9.515674833596464e-06, "logits/chosen": -1.0679458379745483, "logits/rejected": -1.1338779926300049, "logps/chosen": -124.37984466552734, "logps/rejected": -79.24254608154297, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -0.7333381772041321, "rewards/margins": 4.847156524658203, "rewards/rejected": -5.5804948806762695, "step": 4510 }, { "epoch": 1.0, "learning_rate": 9.514904995300754e-06, "logits/chosen": -1.164879322052002, "logits/rejected": -1.164879322052002, "logps/chosen": -168.82923889160156, "logps/rejected": -168.82923889160156, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.588845252990723, "rewards/margins": 0.0, "rewards/rejected": -5.588845252990723, "step": 4511 }, { "epoch": 1.0, "learning_rate": 9.514134576850605e-06, "logits/chosen": -0.7858877182006836, "logits/rejected": -0.6920402646064758, "logps/chosen": -288.562255859375, "logps/rejected": -798.561767578125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 1.6955841779708862, "rewards/margins": 67.69807434082031, "rewards/rejected": -66.00248718261719, "step": 4512 }, { "epoch": 1.0, "learning_rate": 9.513363578345014e-06, "logits/chosen": -0.9879102110862732, "logits/rejected": -0.9828492403030396, "logps/chosen": -165.53512573242188, "logps/rejected": -191.1047821044922, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.23955383896827698, "rewards/margins": 7.190216064453125, "rewards/rejected": -7.429769992828369, "step": 4513 }, { "epoch": 1.0, "learning_rate": 9.512591999883056e-06, "logits/chosen": -1.3831660747528076, "logits/rejected": -1.3707715272903442, "logps/chosen": -88.83609008789062, "logps/rejected": -161.778076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.21776047348976135, "rewards/margins": 10.390692710876465, "rewards/rejected": -10.608452796936035, "step": 4514 }, { "epoch": 1.0, "learning_rate": 9.511819841563872e-06, "logits/chosen": -1.094248652458191, "logits/rejected": -1.1155539751052856, "logps/chosen": -192.55992126464844, "logps/rejected": -167.83981323242188, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -1.240742564201355, "rewards/margins": 3.1935081481933594, "rewards/rejected": -4.434250831604004, "step": 4515 }, { "epoch": 1.0, "learning_rate": 9.511047103486685e-06, "logits/chosen": -0.7882181406021118, "logits/rejected": -0.8289559483528137, "logps/chosen": -97.59725189208984, "logps/rejected": -75.72624969482422, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -1.0856338739395142, "rewards/margins": 3.6266098022460938, "rewards/rejected": -4.712243556976318, "step": 4516 }, { "epoch": 1.0, "learning_rate": 9.510273785750788e-06, "logits/chosen": -1.1936168670654297, "logits/rejected": -1.1813678741455078, "logps/chosen": -137.6051025390625, "logps/rejected": -210.51113891601562, "loss": 0.1032, "rewards/accuracies": 1.0, "rewards/chosen": -1.3502792119979858, "rewards/margins": 1.8904281854629517, "rewards/rejected": -3.2407073974609375, "step": 4517 }, { "epoch": 1.0, "learning_rate": 9.509499888455554e-06, "logits/chosen": -0.9862060546875, "logits/rejected": -0.9765976667404175, "logps/chosen": -175.38372802734375, "logps/rejected": -175.64923095703125, "loss": 0.0878, "rewards/accuracies": 1.0, "rewards/chosen": 2.5631470680236816, "rewards/margins": 1.6514053344726562, "rewards/rejected": 0.9117416739463806, "step": 4518 }, { "epoch": 1.0, "learning_rate": 9.508725411700424e-06, "logits/chosen": -1.2385963201522827, "logits/rejected": -1.2791253328323364, "logps/chosen": -179.77297973632812, "logps/rejected": -167.05368041992188, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": 1.0605896711349487, "rewards/margins": 4.696815490722656, "rewards/rejected": -3.636225938796997, "step": 4519 }, { "epoch": 1.0, "learning_rate": 9.507950355584917e-06, "logits/chosen": -1.2972732782363892, "logits/rejected": -1.3315423727035522, "logps/chosen": -86.2337646484375, "logps/rejected": -71.81586456298828, "loss": 0.1094, "rewards/accuracies": 1.0, "rewards/chosen": -0.6282310485839844, "rewards/margins": 1.4094784259796143, "rewards/rejected": -2.0377094745635986, "step": 4520 }, { "epoch": 1.0, "learning_rate": 9.507174720208627e-06, "logits/chosen": -1.3561569452285767, "logits/rejected": -1.1700785160064697, "logps/chosen": -73.12028503417969, "logps/rejected": -273.542236328125, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": 0.5337509512901306, "rewards/margins": 3.1056580543518066, "rewards/rejected": -2.5719070434570312, "step": 4521 }, { "epoch": 1.0, "learning_rate": 9.50639850567122e-06, "logits/chosen": -0.8496686220169067, "logits/rejected": -0.8678346872329712, "logps/chosen": -131.75204467773438, "logps/rejected": -185.42193603515625, "loss": 0.0303, "rewards/accuracies": 1.0, "rewards/chosen": -2.478682041168213, "rewards/margins": 2.7966384887695312, "rewards/rejected": -5.275320529937744, "step": 4522 }, { "epoch": 1.0, "learning_rate": 9.505621712072437e-06, "logits/chosen": -1.1783208847045898, "logits/rejected": -1.1685467958450317, "logps/chosen": -167.32687377929688, "logps/rejected": -257.21368408203125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 1.509344458580017, "rewards/margins": 6.420618057250977, "rewards/rejected": -4.91127347946167, "step": 4523 }, { "epoch": 1.0, "learning_rate": 9.504844339512096e-06, "logits/chosen": -0.9067971706390381, "logits/rejected": -1.1533148288726807, "logps/chosen": -229.06173706054688, "logps/rejected": -331.5935974121094, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -5.3643951416015625, "rewards/margins": 19.675445556640625, "rewards/rejected": -25.039840698242188, "step": 4524 }, { "epoch": 1.0, "learning_rate": 9.504066388090088e-06, "logits/chosen": -1.0009188652038574, "logits/rejected": -0.9884634613990784, "logps/chosen": -116.09333038330078, "logps/rejected": -132.67630004882812, "loss": 0.0997, "rewards/accuracies": 1.0, "rewards/chosen": -1.5595909357070923, "rewards/margins": 1.511398434638977, "rewards/rejected": -3.0709893703460693, "step": 4525 }, { "epoch": 1.0, "learning_rate": 9.503287857906374e-06, "logits/chosen": -0.9905901551246643, "logits/rejected": -0.9905901551246643, "logps/chosen": -138.65859985351562, "logps/rejected": -138.65859985351562, "loss": 0.4024, "rewards/accuracies": 0.0, "rewards/chosen": -7.490561008453369, "rewards/margins": 0.0, "rewards/rejected": -7.490561008453369, "step": 4526 }, { "epoch": 1.0, "learning_rate": 9.502508749060998e-06, "logits/chosen": -0.9565033912658691, "logits/rejected": -0.8857504725456238, "logps/chosen": -133.67991638183594, "logps/rejected": -251.62249755859375, "loss": 0.094, "rewards/accuracies": 1.0, "rewards/chosen": -0.2032516449689865, "rewards/margins": 1.6219452619552612, "rewards/rejected": -1.8251968622207642, "step": 4527 }, { "epoch": 1.0, "learning_rate": 9.50172906165407e-06, "logits/chosen": -1.563562273979187, "logits/rejected": -1.563562273979187, "logps/chosen": -60.01377487182617, "logps/rejected": -60.01377487182617, "loss": 0.3773, "rewards/accuracies": 0.0, "rewards/chosen": -3.971546173095703, "rewards/margins": 0.0, "rewards/rejected": -3.971546173095703, "step": 4528 }, { "epoch": 1.0, "learning_rate": 9.50094879578578e-06, "logits/chosen": -1.4777896404266357, "logits/rejected": -1.3818583488464355, "logps/chosen": -128.7670440673828, "logps/rejected": -313.15594482421875, "loss": 0.0625, "rewards/accuracies": 1.0, "rewards/chosen": -0.5768081545829773, "rewards/margins": 4.436967849731445, "rewards/rejected": -5.013775825500488, "step": 4529 }, { "epoch": 1.0, "learning_rate": 9.500167951556392e-06, "logits/chosen": -1.089152216911316, "logits/rejected": -1.163286805152893, "logps/chosen": -168.32803344726562, "logps/rejected": -56.203433990478516, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": 0.35108184814453125, "rewards/margins": 3.651094913482666, "rewards/rejected": -3.3000130653381348, "step": 4530 }, { "epoch": 1.0, "learning_rate": 9.499386529066236e-06, "logits/chosen": -1.3970104455947876, "logits/rejected": -1.4431252479553223, "logps/chosen": -105.48872375488281, "logps/rejected": -108.56237030029297, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.5191788077354431, "rewards/margins": 7.872546672821045, "rewards/rejected": -8.391725540161133, "step": 4531 }, { "epoch": 1.0, "learning_rate": 9.498604528415731e-06, "logits/chosen": -1.1167405843734741, "logits/rejected": -0.9997623562812805, "logps/chosen": -145.02511596679688, "logps/rejected": -301.427001953125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.4701400995254517, "rewards/margins": 4.894838333129883, "rewards/rejected": -6.364978313446045, "step": 4532 }, { "epoch": 1.0, "learning_rate": 9.497821949705356e-06, "logits/chosen": -1.3845012187957764, "logits/rejected": -1.3933963775634766, "logps/chosen": -131.32933044433594, "logps/rejected": -164.30950927734375, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -2.10502028465271, "rewards/margins": 4.408567428588867, "rewards/rejected": -6.513587474822998, "step": 4533 }, { "epoch": 1.0, "learning_rate": 9.497038793035674e-06, "logits/chosen": -1.2059932947158813, "logits/rejected": -1.22434663772583, "logps/chosen": -107.36924743652344, "logps/rejected": -106.48413848876953, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": 0.7099968194961548, "rewards/margins": 2.846691131591797, "rewards/rejected": -2.1366944313049316, "step": 4534 }, { "epoch": 1.0, "learning_rate": 9.496255058507318e-06, "logits/chosen": -0.8859752416610718, "logits/rejected": -0.4397598206996918, "logps/chosen": -152.8932647705078, "logps/rejected": -932.9847412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.7330780029296875, "rewards/margins": 76.46658325195312, "rewards/rejected": -73.73350524902344, "step": 4535 }, { "epoch": 1.0, "learning_rate": 9.495470746220995e-06, "logits/chosen": -1.0734094381332397, "logits/rejected": -1.3123581409454346, "logps/chosen": -246.2750701904297, "logps/rejected": -121.40731048583984, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -3.4464263916015625, "rewards/margins": 2.921748638153076, "rewards/rejected": -6.368175029754639, "step": 4536 }, { "epoch": 1.0, "learning_rate": 9.494685856277488e-06, "logits/chosen": -1.1689050197601318, "logits/rejected": -1.1913654804229736, "logps/chosen": -171.02615356445312, "logps/rejected": -220.0876007080078, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.1464295387268066, "rewards/margins": 8.23237419128418, "rewards/rejected": -6.085945129394531, "step": 4537 }, { "epoch": 1.0, "learning_rate": 9.493900388777654e-06, "logits/chosen": -1.1416925191879272, "logits/rejected": -1.145149827003479, "logps/chosen": -193.46282958984375, "logps/rejected": -208.98651123046875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 2.37442946434021, "rewards/margins": 13.347145080566406, "rewards/rejected": -10.972715377807617, "step": 4538 }, { "epoch": 1.0, "learning_rate": 9.493114343822422e-06, "logits/chosen": -0.9720761775970459, "logits/rejected": -0.9935657978057861, "logps/chosen": -116.33926391601562, "logps/rejected": -116.39444732666016, "loss": 0.0649, "rewards/accuracies": 1.0, "rewards/chosen": -1.8712905645370483, "rewards/margins": 3.5112814903259277, "rewards/rejected": -5.382572174072266, "step": 4539 }, { "epoch": 1.0, "learning_rate": 9.4923277215128e-06, "logits/chosen": -1.0256201028823853, "logits/rejected": -0.6829211115837097, "logps/chosen": -123.75894165039062, "logps/rejected": -281.7989196777344, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 2.2799744606018066, "rewards/margins": 23.323394775390625, "rewards/rejected": -21.043420791625977, "step": 4540 }, { "epoch": 1.01, "learning_rate": 9.491540521949862e-06, "logits/chosen": -0.8366971611976624, "logits/rejected": -0.8110979199409485, "logps/chosen": -177.9940185546875, "logps/rejected": -228.98208618164062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.08776245266199112, "rewards/margins": 7.672461032867432, "rewards/rejected": -7.584698677062988, "step": 4541 }, { "epoch": 1.01, "learning_rate": 9.490752745234767e-06, "logits/chosen": -1.4563343524932861, "logits/rejected": -1.4362565279006958, "logps/chosen": -133.37596130371094, "logps/rejected": -124.40799713134766, "loss": 0.0429, "rewards/accuracies": 1.0, "rewards/chosen": 1.2211166620254517, "rewards/margins": 2.4641411304473877, "rewards/rejected": -1.243024468421936, "step": 4542 }, { "epoch": 1.01, "learning_rate": 9.489964391468739e-06, "logits/chosen": -0.9006439447402954, "logits/rejected": -0.8472732901573181, "logps/chosen": -97.44271850585938, "logps/rejected": -379.9034423828125, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": 3.6947312355041504, "rewards/margins": 22.68297004699707, "rewards/rejected": -18.988239288330078, "step": 4543 }, { "epoch": 1.01, "learning_rate": 9.48917546075308e-06, "logits/chosen": -1.233856201171875, "logits/rejected": -1.2342684268951416, "logps/chosen": -103.89981842041016, "logps/rejected": -126.9547348022461, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": -1.0500519275665283, "rewards/margins": 3.1823928356170654, "rewards/rejected": -4.232444763183594, "step": 4544 }, { "epoch": 1.01, "learning_rate": 9.488385953189165e-06, "logits/chosen": -1.2640924453735352, "logits/rejected": -1.2265548706054688, "logps/chosen": -174.43942260742188, "logps/rejected": -391.2073974609375, "loss": 0.1943, "rewards/accuracies": 1.0, "rewards/chosen": -2.6910462379455566, "rewards/margins": 0.7489073276519775, "rewards/rejected": -3.439953565597534, "step": 4545 }, { "epoch": 1.01, "learning_rate": 9.487595868878447e-06, "logits/chosen": -1.1787816286087036, "logits/rejected": -1.248201847076416, "logps/chosen": -224.97561645507812, "logps/rejected": -254.00765991210938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6102340817451477, "rewards/margins": 8.338891983032227, "rewards/rejected": -8.949126243591309, "step": 4546 }, { "epoch": 1.01, "learning_rate": 9.486805207922445e-06, "logits/chosen": -1.2544735670089722, "logits/rejected": -1.2829238176345825, "logps/chosen": -107.65257263183594, "logps/rejected": -107.6787109375, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": -2.4839799404144287, "rewards/margins": 2.7448179721832275, "rewards/rejected": -5.228797912597656, "step": 4547 }, { "epoch": 1.01, "learning_rate": 9.486013970422762e-06, "logits/chosen": -1.1975972652435303, "logits/rejected": -1.1702004671096802, "logps/chosen": -66.63789367675781, "logps/rejected": -148.23025512695312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.04414978250861168, "rewards/margins": 5.822474002838135, "rewards/rejected": -5.866623878479004, "step": 4548 }, { "epoch": 1.01, "learning_rate": 9.485222156481067e-06, "logits/chosen": -1.0085166692733765, "logits/rejected": -0.7954793572425842, "logps/chosen": -127.85790252685547, "logps/rejected": -265.08111572265625, "loss": 0.9927, "rewards/accuracies": 0.0, "rewards/chosen": -5.696167945861816, "rewards/margins": -1.835486650466919, "rewards/rejected": -3.8606812953948975, "step": 4549 }, { "epoch": 1.01, "learning_rate": 9.484429766199107e-06, "logits/chosen": -1.025166630744934, "logits/rejected": -1.1445122957229614, "logps/chosen": -167.30471801757812, "logps/rejected": -79.03593444824219, "loss": 0.2846, "rewards/accuracies": 1.0, "rewards/chosen": -4.7015509605407715, "rewards/margins": 0.3720569610595703, "rewards/rejected": -5.073607921600342, "step": 4550 }, { "epoch": 1.01, "learning_rate": 9.483636799678703e-06, "logits/chosen": -1.0768448114395142, "logits/rejected": -1.104116678237915, "logps/chosen": -196.60855102539062, "logps/rejected": -110.14007568359375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.8256271481513977, "rewards/margins": 6.2375898361206055, "rewards/rejected": -7.0632171630859375, "step": 4551 }, { "epoch": 1.01, "learning_rate": 9.482843257021747e-06, "logits/chosen": -1.351110577583313, "logits/rejected": -1.3896297216415405, "logps/chosen": -96.53398895263672, "logps/rejected": -127.80622863769531, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.3525962829589844, "rewards/margins": 7.1641035079956055, "rewards/rejected": -8.51669979095459, "step": 4552 }, { "epoch": 1.01, "learning_rate": 9.48204913833021e-06, "logits/chosen": -0.8765813112258911, "logits/rejected": -0.8250256180763245, "logps/chosen": -82.72451782226562, "logps/rejected": -151.8957977294922, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.31522828340530396, "rewards/margins": 6.338253021240234, "rewards/rejected": -6.653481483459473, "step": 4553 }, { "epoch": 1.01, "learning_rate": 9.481254443706133e-06, "logits/chosen": -1.1588809490203857, "logits/rejected": -1.182417869567871, "logps/chosen": -99.64431762695312, "logps/rejected": -138.2383270263672, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.67303466796875, "rewards/margins": 5.18316650390625, "rewards/rejected": -6.856201171875, "step": 4554 }, { "epoch": 1.01, "learning_rate": 9.480459173251634e-06, "logits/chosen": -0.999175488948822, "logits/rejected": -0.9707579016685486, "logps/chosen": -170.00497436523438, "logps/rejected": -226.6504364013672, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.1756118535995483, "rewards/margins": 5.268228054046631, "rewards/rejected": -6.443840026855469, "step": 4555 }, { "epoch": 1.01, "learning_rate": 9.4796633270689e-06, "logits/chosen": -0.8742023706436157, "logits/rejected": -0.8401312232017517, "logps/chosen": -110.50652313232422, "logps/rejected": -112.95018005371094, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -4.807383060455322, "rewards/margins": 3.1407175064086914, "rewards/rejected": -7.948100566864014, "step": 4556 }, { "epoch": 1.01, "learning_rate": 9.478866905260198e-06, "logits/chosen": -1.2805906534194946, "logits/rejected": -1.289400339126587, "logps/chosen": -119.44509887695312, "logps/rejected": -83.66976928710938, "loss": 1.1855, "rewards/accuracies": 0.0, "rewards/chosen": -2.137159824371338, "rewards/margins": -2.2656021118164062, "rewards/rejected": 0.12844239175319672, "step": 4557 }, { "epoch": 1.01, "learning_rate": 9.478069907927867e-06, "logits/chosen": -1.0895161628723145, "logits/rejected": -1.0895161628723145, "logps/chosen": -81.67378234863281, "logps/rejected": -81.67378234863281, "loss": 0.359, "rewards/accuracies": 0.0, "rewards/chosen": -6.5957183837890625, "rewards/margins": 0.0, "rewards/rejected": -6.5957183837890625, "step": 4558 }, { "epoch": 1.01, "learning_rate": 9.477272335174315e-06, "logits/chosen": -0.8986401557922363, "logits/rejected": -0.8957265019416809, "logps/chosen": -171.9632568359375, "logps/rejected": -182.220458984375, "loss": 0.2181, "rewards/accuracies": 1.0, "rewards/chosen": -6.136923313140869, "rewards/margins": 1.015091896057129, "rewards/rejected": -7.152015209197998, "step": 4559 }, { "epoch": 1.01, "learning_rate": 9.476474187102033e-06, "logits/chosen": -1.5259358882904053, "logits/rejected": -1.610282063484192, "logps/chosen": -95.20658874511719, "logps/rejected": -96.8287353515625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.472346544265747, "rewards/margins": 5.3468170166015625, "rewards/rejected": -6.819163799285889, "step": 4560 }, { "epoch": 1.01, "learning_rate": 9.475675463813578e-06, "logits/chosen": -1.3678033351898193, "logits/rejected": -1.4238122701644897, "logps/chosen": -151.103271484375, "logps/rejected": -173.3544921875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 1.2143830060958862, "rewards/margins": 9.76266860961914, "rewards/rejected": -8.548285484313965, "step": 4561 }, { "epoch": 1.01, "learning_rate": 9.474876165411586e-06, "logits/chosen": -1.1888710260391235, "logits/rejected": -1.1888710260391235, "logps/chosen": -170.8905487060547, "logps/rejected": -170.8905487060547, "loss": 0.3517, "rewards/accuracies": 0.0, "rewards/chosen": -5.794976234436035, "rewards/margins": 0.0, "rewards/rejected": -5.794976234436035, "step": 4562 }, { "epoch": 1.01, "learning_rate": 9.474076291998765e-06, "logits/chosen": -0.9155736565589905, "logits/rejected": -0.8916455507278442, "logps/chosen": -110.79932403564453, "logps/rejected": -172.02001953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8238579034805298, "rewards/margins": 5.987366676330566, "rewards/rejected": -6.811224460601807, "step": 4563 }, { "epoch": 1.01, "learning_rate": 9.473275843677893e-06, "logits/chosen": -1.0486040115356445, "logits/rejected": -0.9777343273162842, "logps/chosen": -122.32121276855469, "logps/rejected": -260.3910827636719, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.1964263916015625, "rewards/margins": 8.243331909179688, "rewards/rejected": -8.43975830078125, "step": 4564 }, { "epoch": 1.01, "learning_rate": 9.472474820551831e-06, "logits/chosen": -0.9241214394569397, "logits/rejected": -0.8599464893341064, "logps/chosen": -92.10749053955078, "logps/rejected": -198.2117919921875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.26243361830711365, "rewards/margins": 5.162908554077148, "rewards/rejected": -5.425342082977295, "step": 4565 }, { "epoch": 1.01, "learning_rate": 9.471673222723506e-06, "logits/chosen": -1.259700894355774, "logits/rejected": -1.2588633298873901, "logps/chosen": -92.33366394042969, "logps/rejected": -104.86844635009766, "loss": 0.1141, "rewards/accuracies": 1.0, "rewards/chosen": -1.1817795038223267, "rewards/margins": 2.5847597122192383, "rewards/rejected": -3.7665390968322754, "step": 4566 }, { "epoch": 1.01, "learning_rate": 9.47087105029592e-06, "logits/chosen": -0.7454887628555298, "logits/rejected": -0.7610112428665161, "logps/chosen": -87.90841674804688, "logps/rejected": -42.3382568359375, "loss": 0.6445, "rewards/accuracies": 0.0, "rewards/chosen": -3.8180301189422607, "rewards/margins": -0.9635839462280273, "rewards/rejected": -2.8544461727142334, "step": 4567 }, { "epoch": 1.01, "learning_rate": 9.470068303372153e-06, "logits/chosen": -1.1459370851516724, "logits/rejected": -1.253151535987854, "logps/chosen": -106.13198852539062, "logps/rejected": -95.06034088134766, "loss": 0.0572, "rewards/accuracies": 1.0, "rewards/chosen": -2.4150466918945312, "rewards/margins": 2.133847236633301, "rewards/rejected": -4.548893928527832, "step": 4568 }, { "epoch": 1.01, "learning_rate": 9.469264982055355e-06, "logits/chosen": -0.8843973278999329, "logits/rejected": -0.8591498136520386, "logps/chosen": -113.19474029541016, "logps/rejected": -195.97540283203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.6177665591239929, "rewards/margins": 7.236127853393555, "rewards/rejected": -7.853894233703613, "step": 4569 }, { "epoch": 1.01, "learning_rate": 9.46846108644875e-06, "logits/chosen": -0.7619295716285706, "logits/rejected": -0.7474539279937744, "logps/chosen": -212.8831024169922, "logps/rejected": -244.0849151611328, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 3.424426317214966, "rewards/margins": 11.430389404296875, "rewards/rejected": -8.005963325500488, "step": 4570 }, { "epoch": 1.01, "learning_rate": 9.467656616655636e-06, "logits/chosen": -1.4690289497375488, "logits/rejected": -1.5322175025939941, "logps/chosen": -104.90812683105469, "logps/rejected": -127.80747985839844, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -1.1935303211212158, "rewards/margins": 3.9425718784332275, "rewards/rejected": -5.136102199554443, "step": 4571 }, { "epoch": 1.01, "learning_rate": 9.466851572779388e-06, "logits/chosen": -0.8913989067077637, "logits/rejected": -0.8913989067077637, "logps/chosen": -93.99906158447266, "logps/rejected": -93.99906158447266, "loss": 0.5955, "rewards/accuracies": 0.0, "rewards/chosen": -2.1886003017425537, "rewards/margins": 0.0, "rewards/rejected": -2.1886003017425537, "step": 4572 }, { "epoch": 1.01, "learning_rate": 9.46604595492345e-06, "logits/chosen": -0.6271368861198425, "logits/rejected": -0.572330892086029, "logps/chosen": -281.45404052734375, "logps/rejected": -267.8789367675781, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": 1.4034942388534546, "rewards/margins": 4.7664031982421875, "rewards/rejected": -3.3629090785980225, "step": 4573 }, { "epoch": 1.01, "learning_rate": 9.465239763191345e-06, "logits/chosen": -0.978954553604126, "logits/rejected": -0.9799511432647705, "logps/chosen": -145.19647216796875, "logps/rejected": -175.65576171875, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -1.7253525257110596, "rewards/margins": 2.710218667984009, "rewards/rejected": -4.435571193695068, "step": 4574 }, { "epoch": 1.01, "learning_rate": 9.464432997686664e-06, "logits/chosen": -1.1796172857284546, "logits/rejected": -1.1992336511611938, "logps/chosen": -168.56234741210938, "logps/rejected": -171.92063903808594, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.3648681640625, "rewards/margins": 6.459747314453125, "rewards/rejected": -8.824615478515625, "step": 4575 }, { "epoch": 1.01, "learning_rate": 9.463625658513073e-06, "logits/chosen": -1.4405169486999512, "logits/rejected": -1.466208815574646, "logps/chosen": -153.18838500976562, "logps/rejected": -222.2063751220703, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -2.732769727706909, "rewards/margins": 3.872978448867798, "rewards/rejected": -6.605748176574707, "step": 4576 }, { "epoch": 1.01, "learning_rate": 9.462817745774316e-06, "logits/chosen": -0.9021425843238831, "logits/rejected": -0.8340445160865784, "logps/chosen": -95.30279541015625, "logps/rejected": -211.45462036132812, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.8205353021621704, "rewards/margins": 5.204204082489014, "rewards/rejected": -7.0247392654418945, "step": 4577 }, { "epoch": 1.01, "learning_rate": 9.462009259574207e-06, "logits/chosen": -1.2199541330337524, "logits/rejected": -1.0749925374984741, "logps/chosen": -89.19999694824219, "logps/rejected": -273.574951171875, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -1.1721328496932983, "rewards/margins": 3.074223518371582, "rewards/rejected": -4.24635648727417, "step": 4578 }, { "epoch": 1.01, "learning_rate": 9.461200200016636e-06, "logits/chosen": -1.3784629106521606, "logits/rejected": -1.3130741119384766, "logps/chosen": -180.4338836669922, "logps/rejected": -294.43878173828125, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -1.5280838012695312, "rewards/margins": 3.3594346046447754, "rewards/rejected": -4.887518405914307, "step": 4579 }, { "epoch": 1.01, "learning_rate": 9.460390567205562e-06, "logits/chosen": -1.0239830017089844, "logits/rejected": -0.9739479422569275, "logps/chosen": -118.86593627929688, "logps/rejected": -238.58749389648438, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.3526062071323395, "rewards/margins": 6.81549072265625, "rewards/rejected": -7.168097019195557, "step": 4580 }, { "epoch": 1.01, "learning_rate": 9.459580361245024e-06, "logits/chosen": -1.2992509603500366, "logits/rejected": -1.4134421348571777, "logps/chosen": -221.96670532226562, "logps/rejected": -195.8957061767578, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": 1.197759985923767, "rewards/margins": 8.008356094360352, "rewards/rejected": -6.810595989227295, "step": 4581 }, { "epoch": 1.01, "learning_rate": 9.458769582239128e-06, "logits/chosen": -1.0048065185546875, "logits/rejected": -0.9284613728523254, "logps/chosen": -181.99815368652344, "logps/rejected": -183.41380310058594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.0647064447402954, "rewards/margins": 9.442140579223633, "rewards/rejected": -8.377433776855469, "step": 4582 }, { "epoch": 1.01, "learning_rate": 9.457958230292061e-06, "logits/chosen": -0.9104222655296326, "logits/rejected": -0.9575400948524475, "logps/chosen": -203.22799682617188, "logps/rejected": -185.55972290039062, "loss": 0.0732, "rewards/accuracies": 1.0, "rewards/chosen": 2.485865831375122, "rewards/margins": 1.8465973138809204, "rewards/rejected": 0.6392685174942017, "step": 4583 }, { "epoch": 1.01, "learning_rate": 9.457146305508078e-06, "logits/chosen": -1.0117427110671997, "logits/rejected": -0.9780665040016174, "logps/chosen": -95.5657958984375, "logps/rejected": -203.03489685058594, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.6161292791366577, "rewards/margins": 8.596877098083496, "rewards/rejected": -10.213006019592285, "step": 4584 }, { "epoch": 1.01, "learning_rate": 9.45633380799151e-06, "logits/chosen": -1.2328113317489624, "logits/rejected": -1.2536520957946777, "logps/chosen": -101.54875183105469, "logps/rejected": -114.72206115722656, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -1.8023346662521362, "rewards/margins": 3.0142579078674316, "rewards/rejected": -4.816592693328857, "step": 4585 }, { "epoch": 1.02, "learning_rate": 9.455520737846757e-06, "logits/chosen": -1.2973835468292236, "logits/rejected": -1.246940016746521, "logps/chosen": -130.71405029296875, "logps/rejected": -191.8494873046875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.7433868646621704, "rewards/margins": 7.782205104827881, "rewards/rejected": -7.038818359375, "step": 4586 }, { "epoch": 1.02, "learning_rate": 9.454707095178304e-06, "logits/chosen": -1.3660873174667358, "logits/rejected": -1.2648323774337769, "logps/chosen": -123.3121337890625, "logps/rejected": -264.38409423828125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.462702989578247, "rewards/margins": 6.541719436645508, "rewards/rejected": -8.004422187805176, "step": 4587 }, { "epoch": 1.02, "learning_rate": 9.453892880090696e-06, "logits/chosen": -1.0840860605239868, "logits/rejected": -1.0741417407989502, "logps/chosen": -142.4838104248047, "logps/rejected": -194.0233917236328, "loss": 0.422, "rewards/accuracies": 0.0, "rewards/chosen": -0.9238418936729431, "rewards/margins": -0.28206485509872437, "rewards/rejected": -0.6417770385742188, "step": 4588 }, { "epoch": 1.02, "learning_rate": 9.45307809268856e-06, "logits/chosen": -1.3051820993423462, "logits/rejected": -1.2490547895431519, "logps/chosen": -183.7961883544922, "logps/rejected": -344.7040100097656, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -1.142634630203247, "rewards/margins": 3.2869460582733154, "rewards/rejected": -4.4295806884765625, "step": 4589 }, { "epoch": 1.02, "learning_rate": 9.452262733076594e-06, "logits/chosen": -1.0592315196990967, "logits/rejected": -0.975458562374115, "logps/chosen": -234.67120361328125, "logps/rejected": -373.3935241699219, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.49505922198295593, "rewards/margins": 5.878506183624268, "rewards/rejected": -5.383447170257568, "step": 4590 }, { "epoch": 1.02, "learning_rate": 9.45144680135957e-06, "logits/chosen": -0.8466029763221741, "logits/rejected": -0.7988405823707581, "logps/chosen": -198.6112060546875, "logps/rejected": -361.908935546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.5352447032928467, "rewards/margins": 11.48944091796875, "rewards/rejected": -8.954195976257324, "step": 4591 }, { "epoch": 1.02, "learning_rate": 9.450630297642334e-06, "logits/chosen": -1.2935843467712402, "logits/rejected": -1.361433744430542, "logps/chosen": -114.05070495605469, "logps/rejected": -125.45491027832031, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -0.2783393859863281, "rewards/margins": 6.104912757873535, "rewards/rejected": -6.383252143859863, "step": 4592 }, { "epoch": 1.02, "learning_rate": 9.449813222029802e-06, "logits/chosen": -1.4128451347351074, "logits/rejected": -1.107223391532898, "logps/chosen": -94.03086853027344, "logps/rejected": -884.8570556640625, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/chosen": -0.2975609004497528, "rewards/margins": 73.94654083251953, "rewards/rejected": -74.24410247802734, "step": 4593 }, { "epoch": 1.02, "learning_rate": 9.448995574626969e-06, "logits/chosen": -0.7872308492660522, "logits/rejected": -0.8240788578987122, "logps/chosen": -166.4500274658203, "logps/rejected": -140.62945556640625, "loss": 0.7596, "rewards/accuracies": 0.0, "rewards/chosen": -2.7115280628204346, "rewards/margins": -1.2719329595565796, "rewards/rejected": -1.439595103263855, "step": 4594 }, { "epoch": 1.02, "learning_rate": 9.448177355538899e-06, "logits/chosen": -0.902648389339447, "logits/rejected": -0.8316636681556702, "logps/chosen": -100.30690002441406, "logps/rejected": -191.51803588867188, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": 0.24037933349609375, "rewards/margins": 9.022899627685547, "rewards/rejected": -8.782520294189453, "step": 4595 }, { "epoch": 1.02, "learning_rate": 9.447358564870732e-06, "logits/chosen": -0.9776766896247864, "logits/rejected": -1.0792940855026245, "logps/chosen": -196.30084228515625, "logps/rejected": -132.47401428222656, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": 1.0861603021621704, "rewards/margins": 5.872622489929199, "rewards/rejected": -4.786462306976318, "step": 4596 }, { "epoch": 1.02, "learning_rate": 9.446539202727683e-06, "logits/chosen": -1.2662005424499512, "logits/rejected": -1.2845113277435303, "logps/chosen": -184.17971801757812, "logps/rejected": -199.01776123046875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.3070526123046875, "rewards/margins": 5.888452053070068, "rewards/rejected": -6.195504665374756, "step": 4597 }, { "epoch": 1.02, "learning_rate": 9.445719269215032e-06, "logits/chosen": -1.0477169752120972, "logits/rejected": -0.9836691617965698, "logps/chosen": -136.14141845703125, "logps/rejected": -298.1834411621094, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.9240753650665283, "rewards/margins": 4.813961982727051, "rewards/rejected": -6.738037109375, "step": 4598 }, { "epoch": 1.02, "learning_rate": 9.444898764438144e-06, "logits/chosen": -0.7984649538993835, "logits/rejected": -0.8065223693847656, "logps/chosen": -122.44672393798828, "logps/rejected": -160.83599853515625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.005451202392578125, "rewards/margins": 6.248199462890625, "rewards/rejected": -6.253650665283203, "step": 4599 }, { "epoch": 1.02, "learning_rate": 9.444077688502451e-06, "logits/chosen": -1.2284091711044312, "logits/rejected": -1.2324516773223877, "logps/chosen": -242.77615356445312, "logps/rejected": -222.49856567382812, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.1200073957443237, "rewards/margins": 7.763551712036133, "rewards/rejected": -8.883559226989746, "step": 4600 }, { "epoch": 1.02, "learning_rate": 9.443256041513457e-06, "logits/chosen": -1.1393996477127075, "logits/rejected": -1.2457764148712158, "logps/chosen": -249.4082489013672, "logps/rejected": -182.48536682128906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 1.2737563848495483, "rewards/margins": 9.091806411743164, "rewards/rejected": -7.818049907684326, "step": 4601 }, { "epoch": 1.02, "learning_rate": 9.442433823576741e-06, "logits/chosen": -1.1868656873703003, "logits/rejected": -1.207625150680542, "logps/chosen": -107.94526672363281, "logps/rejected": -143.00222778320312, "loss": 0.1095, "rewards/accuracies": 1.0, "rewards/chosen": -2.2718498706817627, "rewards/margins": 1.406816005706787, "rewards/rejected": -3.67866587638855, "step": 4602 }, { "epoch": 1.02, "learning_rate": 9.441611034797961e-06, "logits/chosen": -1.5716625452041626, "logits/rejected": -1.628831148147583, "logps/chosen": -156.05545043945312, "logps/rejected": -132.105712890625, "loss": 0.1868, "rewards/accuracies": 1.0, "rewards/chosen": 0.405508428812027, "rewards/margins": 5.058084487915039, "rewards/rejected": -4.652575969696045, "step": 4603 }, { "epoch": 1.02, "learning_rate": 9.44078767528284e-06, "logits/chosen": -0.7843746542930603, "logits/rejected": -0.7569283843040466, "logps/chosen": -125.91761779785156, "logps/rejected": -100.64098358154297, "loss": 0.1263, "rewards/accuracies": 1.0, "rewards/chosen": -3.2601318359375, "rewards/margins": 1.2473235130310059, "rewards/rejected": -4.507455348968506, "step": 4604 }, { "epoch": 1.02, "learning_rate": 9.439963745137177e-06, "logits/chosen": -0.9501339197158813, "logits/rejected": -0.8698321580886841, "logps/chosen": -142.85971069335938, "logps/rejected": -302.40313720703125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 5.371455669403076, "rewards/margins": 14.928525924682617, "rewards/rejected": -9.5570707321167, "step": 4605 }, { "epoch": 1.02, "learning_rate": 9.439139244466847e-06, "logits/chosen": -0.8026282787322998, "logits/rejected": -0.7944609522819519, "logps/chosen": -90.50272369384766, "logps/rejected": -110.28907012939453, "loss": 0.7155, "rewards/accuracies": 0.0, "rewards/chosen": -2.970050096511841, "rewards/margins": -1.1577033996582031, "rewards/rejected": -1.8123466968536377, "step": 4606 }, { "epoch": 1.02, "learning_rate": 9.438314173377796e-06, "logits/chosen": -1.1548470258712769, "logits/rejected": -1.1334527730941772, "logps/chosen": -83.27738952636719, "logps/rejected": -170.27288818359375, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.9107498526573181, "rewards/margins": 5.116482734680176, "rewards/rejected": -6.027232646942139, "step": 4607 }, { "epoch": 1.02, "learning_rate": 9.437488531976042e-06, "logits/chosen": -1.0668134689331055, "logits/rejected": -1.0668134689331055, "logps/chosen": -108.71076202392578, "logps/rejected": -108.71076202392578, "loss": 0.3476, "rewards/accuracies": 0.0, "rewards/chosen": -2.9573936462402344, "rewards/margins": 0.0, "rewards/rejected": -2.9573936462402344, "step": 4608 }, { "epoch": 1.02, "learning_rate": 9.43666232036768e-06, "logits/chosen": -1.0102730989456177, "logits/rejected": -1.0409001111984253, "logps/chosen": -183.082275390625, "logps/rejected": -282.9648742675781, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -3.5162689685821533, "rewards/margins": 8.796716690063477, "rewards/rejected": -12.31298542022705, "step": 4609 }, { "epoch": 1.02, "learning_rate": 9.435835538658873e-06, "logits/chosen": -1.1965608596801758, "logits/rejected": -1.2358983755111694, "logps/chosen": -66.3360366821289, "logps/rejected": -133.80673217773438, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -0.8319469690322876, "rewards/margins": 3.254230499267578, "rewards/rejected": -4.086177349090576, "step": 4610 }, { "epoch": 1.02, "learning_rate": 9.435008186955866e-06, "logits/chosen": -1.1484489440917969, "logits/rejected": -1.1259340047836304, "logps/chosen": -191.55105590820312, "logps/rejected": -242.48968505859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7032135128974915, "rewards/margins": 9.123852729797363, "rewards/rejected": -9.827066421508789, "step": 4611 }, { "epoch": 1.02, "learning_rate": 9.434180265364965e-06, "logits/chosen": -1.0282303094863892, "logits/rejected": -0.898517906665802, "logps/chosen": -215.55108642578125, "logps/rejected": -366.95184326171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.5313689708709717, "rewards/margins": 14.833637237548828, "rewards/rejected": -12.302268028259277, "step": 4612 }, { "epoch": 1.02, "learning_rate": 9.43335177399256e-06, "logits/chosen": -1.3246095180511475, "logits/rejected": -1.3447591066360474, "logps/chosen": -85.49101257324219, "logps/rejected": -106.64502716064453, "loss": 0.1841, "rewards/accuracies": 1.0, "rewards/chosen": -0.5989136099815369, "rewards/margins": 0.8293083310127258, "rewards/rejected": -1.4282219409942627, "step": 4613 }, { "epoch": 1.02, "learning_rate": 9.432522712945111e-06, "logits/chosen": -0.6521159410476685, "logits/rejected": -0.6359895467758179, "logps/chosen": -121.62547302246094, "logps/rejected": -131.18301391601562, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.521075427532196, "rewards/margins": 8.278191566467285, "rewards/rejected": -8.799266815185547, "step": 4614 }, { "epoch": 1.02, "learning_rate": 9.43169308232915e-06, "logits/chosen": -1.012054681777954, "logits/rejected": -1.0190106630325317, "logps/chosen": -92.47772216796875, "logps/rejected": -180.28134155273438, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -1.602681040763855, "rewards/margins": 3.405398368835449, "rewards/rejected": -5.008079528808594, "step": 4615 }, { "epoch": 1.02, "learning_rate": 9.430862882251279e-06, "logits/chosen": -1.247459053993225, "logits/rejected": -1.1891512870788574, "logps/chosen": -145.26266479492188, "logps/rejected": -231.1670684814453, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -0.8950088620185852, "rewards/margins": 4.657478332519531, "rewards/rejected": -5.552487373352051, "step": 4616 }, { "epoch": 1.02, "learning_rate": 9.430032112818182e-06, "logits/chosen": -0.8112563490867615, "logits/rejected": -0.876075267791748, "logps/chosen": -98.50923156738281, "logps/rejected": -103.78679656982422, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.8761605024337769, "rewards/margins": 5.671977996826172, "rewards/rejected": -7.548138618469238, "step": 4617 }, { "epoch": 1.02, "learning_rate": 9.429200774136603e-06, "logits/chosen": -0.9396706223487854, "logits/rejected": -0.9219438433647156, "logps/chosen": -109.120849609375, "logps/rejected": -135.99398803710938, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -1.804735541343689, "rewards/margins": 3.6248068809509277, "rewards/rejected": -5.429542541503906, "step": 4618 }, { "epoch": 1.02, "learning_rate": 9.428368866313377e-06, "logits/chosen": -0.8168542385101318, "logits/rejected": -0.7011545896530151, "logps/chosen": -187.9203643798828, "logps/rejected": -280.90802001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 5.381477355957031, "rewards/margins": 10.810134887695312, "rewards/rejected": -5.428657531738281, "step": 4619 }, { "epoch": 1.02, "learning_rate": 9.427536389455394e-06, "logits/chosen": -0.7169693112373352, "logits/rejected": -0.6702921986579895, "logps/chosen": -206.2551727294922, "logps/rejected": -206.55007934570312, "loss": 0.9123, "rewards/accuracies": 0.0, "rewards/chosen": -9.492952346801758, "rewards/margins": -1.6487412452697754, "rewards/rejected": -7.844211101531982, "step": 4620 }, { "epoch": 1.02, "learning_rate": 9.426703343669631e-06, "logits/chosen": -0.7391881346702576, "logits/rejected": -0.718329906463623, "logps/chosen": -273.7261047363281, "logps/rejected": -213.37435913085938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.0970306396484375, "rewards/margins": 9.584501266479492, "rewards/rejected": -9.487470626831055, "step": 4621 }, { "epoch": 1.02, "learning_rate": 9.425869729063129e-06, "logits/chosen": -0.9808112978935242, "logits/rejected": -0.9719831943511963, "logps/chosen": -116.15644836425781, "logps/rejected": -230.78598022460938, "loss": 0.7814, "rewards/accuracies": 0.0, "rewards/chosen": -0.2276565581560135, "rewards/margins": -1.3271881341934204, "rewards/rejected": 1.0995315313339233, "step": 4622 }, { "epoch": 1.02, "learning_rate": 9.425035545743005e-06, "logits/chosen": -1.2035990953445435, "logits/rejected": -1.260310173034668, "logps/chosen": -196.09451293945312, "logps/rejected": -117.14321899414062, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -2.233023166656494, "rewards/margins": 5.555874824523926, "rewards/rejected": -7.78889799118042, "step": 4623 }, { "epoch": 1.02, "learning_rate": 9.424200793816451e-06, "logits/chosen": -0.6475738883018494, "logits/rejected": -0.7089352607727051, "logps/chosen": -127.43602752685547, "logps/rejected": -104.97030639648438, "loss": 0.1707, "rewards/accuracies": 1.0, "rewards/chosen": -0.828961193561554, "rewards/margins": 0.9001815915107727, "rewards/rejected": -1.7291427850723267, "step": 4624 }, { "epoch": 1.02, "learning_rate": 9.423365473390734e-06, "logits/chosen": -1.150259017944336, "logits/rejected": -1.1426070928573608, "logps/chosen": -97.13762664794922, "logps/rejected": -109.74825286865234, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.20988845825195312, "rewards/margins": 4.009777069091797, "rewards/rejected": -4.21966552734375, "step": 4625 }, { "epoch": 1.02, "learning_rate": 9.422529584573183e-06, "logits/chosen": -1.0776010751724243, "logits/rejected": -1.0750608444213867, "logps/chosen": -101.02194213867188, "logps/rejected": -214.80686950683594, "loss": 0.1832, "rewards/accuracies": 1.0, "rewards/chosen": -1.6070404052734375, "rewards/margins": 7.107202529907227, "rewards/rejected": -8.714242935180664, "step": 4626 }, { "epoch": 1.02, "learning_rate": 9.421693127471214e-06, "logits/chosen": -0.9700135588645935, "logits/rejected": -0.8910015225410461, "logps/chosen": -155.0003662109375, "logps/rejected": -234.23887634277344, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.5553619265556335, "rewards/margins": 7.742593288421631, "rewards/rejected": -7.187231540679932, "step": 4627 }, { "epoch": 1.02, "learning_rate": 9.420856102192305e-06, "logits/chosen": -1.40180242061615, "logits/rejected": -1.3953309059143066, "logps/chosen": -87.09941864013672, "logps/rejected": -116.75074005126953, "loss": 0.1208, "rewards/accuracies": 1.0, "rewards/chosen": -0.5346977114677429, "rewards/margins": 1.302405595779419, "rewards/rejected": -1.837103247642517, "step": 4628 }, { "epoch": 1.02, "learning_rate": 9.420018508844017e-06, "logits/chosen": -0.8382074236869812, "logits/rejected": -0.9661083817481995, "logps/chosen": -268.62139892578125, "logps/rejected": -276.26123046875, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.4898391962051392, "rewards/margins": 8.689778327941895, "rewards/rejected": -10.179617881774902, "step": 4629 }, { "epoch": 1.02, "learning_rate": 9.419180347533976e-06, "logits/chosen": -0.9088622331619263, "logits/rejected": -0.8533627390861511, "logps/chosen": -204.02207946777344, "logps/rejected": -214.26820373535156, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.341914415359497, "rewards/margins": 6.367560386657715, "rewards/rejected": -9.709474563598633, "step": 4630 }, { "epoch": 1.03, "learning_rate": 9.418341618369882e-06, "logits/chosen": -1.2149580717086792, "logits/rejected": -1.1719599962234497, "logps/chosen": -89.7044448852539, "logps/rejected": -146.1110076904297, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.5322532653808594, "rewards/margins": 4.360446929931641, "rewards/rejected": -4.8927001953125, "step": 4631 }, { "epoch": 1.03, "learning_rate": 9.417502321459513e-06, "logits/chosen": -1.297444462776184, "logits/rejected": -1.2702434062957764, "logps/chosen": -82.51960754394531, "logps/rejected": -106.78790283203125, "loss": 0.1509, "rewards/accuracies": 1.0, "rewards/chosen": -0.7509223818778992, "rewards/margins": 1.0452156066894531, "rewards/rejected": -1.796138048171997, "step": 4632 }, { "epoch": 1.03, "learning_rate": 9.416662456910714e-06, "logits/chosen": -1.1127458810806274, "logits/rejected": -1.0149269104003906, "logps/chosen": -169.44598388671875, "logps/rejected": -354.9463806152344, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": 2.3685805797576904, "rewards/margins": 15.797759056091309, "rewards/rejected": -13.429178237915039, "step": 4633 }, { "epoch": 1.03, "learning_rate": 9.415822024831407e-06, "logits/chosen": -0.7458092570304871, "logits/rejected": -0.7365390658378601, "logps/chosen": -177.8987274169922, "logps/rejected": -204.3361358642578, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 2.8113694190979004, "rewards/margins": 5.1326704025268555, "rewards/rejected": -2.321301221847534, "step": 4634 }, { "epoch": 1.03, "learning_rate": 9.414981025329585e-06, "logits/chosen": -0.7458258867263794, "logits/rejected": -0.8149555921554565, "logps/chosen": -121.26590728759766, "logps/rejected": -88.36495971679688, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 2.3735618591308594, "rewards/margins": 7.57574987411499, "rewards/rejected": -5.202188014984131, "step": 4635 }, { "epoch": 1.03, "learning_rate": 9.414139458513316e-06, "logits/chosen": -0.8697001338005066, "logits/rejected": -0.9365867376327515, "logps/chosen": -137.89389038085938, "logps/rejected": -115.81224060058594, "loss": 0.0444, "rewards/accuracies": 1.0, "rewards/chosen": 0.05211792141199112, "rewards/margins": 2.477191209793091, "rewards/rejected": -2.4250733852386475, "step": 4636 }, { "epoch": 1.03, "learning_rate": 9.413297324490736e-06, "logits/chosen": -0.8882245421409607, "logits/rejected": -0.8566234111785889, "logps/chosen": -63.40110397338867, "logps/rejected": -162.82308959960938, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": -0.8797374963760376, "rewards/margins": 3.378244400024414, "rewards/rejected": -4.257981777191162, "step": 4637 }, { "epoch": 1.03, "learning_rate": 9.41245462337006e-06, "logits/chosen": -1.0244053602218628, "logits/rejected": -1.2032420635223389, "logps/chosen": -243.2628631591797, "logps/rejected": -64.462890625, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": -1.5749053955078125, "rewards/margins": 2.9825539588928223, "rewards/rejected": -4.557459354400635, "step": 4638 }, { "epoch": 1.03, "learning_rate": 9.41161135525957e-06, "logits/chosen": -0.7976928353309631, "logits/rejected": -0.8765629529953003, "logps/chosen": -175.16610717773438, "logps/rejected": -103.92843627929688, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.491036981344223, "rewards/margins": 7.203517436981201, "rewards/rejected": -7.694554328918457, "step": 4639 }, { "epoch": 1.03, "learning_rate": 9.410767520267629e-06, "logits/chosen": -1.1467198133468628, "logits/rejected": -1.146554708480835, "logps/chosen": -146.4964141845703, "logps/rejected": -176.55889892578125, "loss": 0.0485, "rewards/accuracies": 1.0, "rewards/chosen": -3.1967239379882812, "rewards/margins": 2.2913765907287598, "rewards/rejected": -5.488100528717041, "step": 4640 }, { "epoch": 1.03, "learning_rate": 9.409923118502665e-06, "logits/chosen": -1.0533262491226196, "logits/rejected": -1.0108153820037842, "logps/chosen": -125.53421020507812, "logps/rejected": -239.53424072265625, "loss": 0.8396, "rewards/accuracies": 1.0, "rewards/chosen": -1.3159722089767456, "rewards/margins": 1.445896029472351, "rewards/rejected": -2.7618682384490967, "step": 4641 }, { "epoch": 1.03, "learning_rate": 9.40907815007318e-06, "logits/chosen": -0.7319791316986084, "logits/rejected": -0.6849337220191956, "logps/chosen": -109.51100158691406, "logps/rejected": -185.86669921875, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": -1.6845229864120483, "rewards/margins": 6.2094292640686035, "rewards/rejected": -7.893952369689941, "step": 4642 }, { "epoch": 1.03, "learning_rate": 9.408232615087752e-06, "logits/chosen": -1.0828285217285156, "logits/rejected": -1.0978082418441772, "logps/chosen": -115.30938720703125, "logps/rejected": -162.43405151367188, "loss": 0.1364, "rewards/accuracies": 1.0, "rewards/chosen": -1.057460069656372, "rewards/margins": 1.4110321998596191, "rewards/rejected": -2.468492269515991, "step": 4643 }, { "epoch": 1.03, "learning_rate": 9.40738651365503e-06, "logits/chosen": -0.7628844380378723, "logits/rejected": -0.6725621223449707, "logps/chosen": -202.29232788085938, "logps/rejected": -458.2230224609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.4491302967071533, "rewards/margins": 14.847421646118164, "rewards/rejected": -13.39829158782959, "step": 4644 }, { "epoch": 1.03, "learning_rate": 9.406539845883736e-06, "logits/chosen": -1.3039114475250244, "logits/rejected": -1.2814182043075562, "logps/chosen": -110.04930114746094, "logps/rejected": -152.60057067871094, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.4631393551826477, "rewards/margins": 6.799961566925049, "rewards/rejected": -7.263101100921631, "step": 4645 }, { "epoch": 1.03, "learning_rate": 9.405692611882666e-06, "logits/chosen": -1.437434196472168, "logits/rejected": -1.435003638267517, "logps/chosen": -98.4969482421875, "logps/rejected": -154.52261352539062, "loss": 1.7128, "rewards/accuracies": 1.0, "rewards/chosen": -2.040407657623291, "rewards/margins": 3.776230812072754, "rewards/rejected": -5.816638469696045, "step": 4646 }, { "epoch": 1.03, "learning_rate": 9.404844811760685e-06, "logits/chosen": -0.7646716237068176, "logits/rejected": -0.7589282393455505, "logps/chosen": -168.59353637695312, "logps/rejected": -284.61834716796875, "loss": 0.0723, "rewards/accuracies": 1.0, "rewards/chosen": -1.5909194946289062, "rewards/margins": 2.7956056594848633, "rewards/rejected": -4.3865251541137695, "step": 4647 }, { "epoch": 1.03, "learning_rate": 9.403996445626735e-06, "logits/chosen": -1.086421012878418, "logits/rejected": -1.2128621339797974, "logps/chosen": -197.9169921875, "logps/rejected": -104.1032943725586, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": -0.15263672173023224, "rewards/margins": 2.9281716346740723, "rewards/rejected": -3.080808401107788, "step": 4648 }, { "epoch": 1.03, "learning_rate": 9.403147513589829e-06, "logits/chosen": -0.6833552718162537, "logits/rejected": -0.6066343784332275, "logps/chosen": -179.84133911132812, "logps/rejected": -232.17828369140625, "loss": 0.173, "rewards/accuracies": 1.0, "rewards/chosen": 0.8314483761787415, "rewards/margins": 9.134416580200195, "rewards/rejected": -8.30296802520752, "step": 4649 }, { "epoch": 1.03, "learning_rate": 9.402298015759052e-06, "logits/chosen": -0.8711245656013489, "logits/rejected": -0.9486691951751709, "logps/chosen": -289.8931884765625, "logps/rejected": -126.12798309326172, "loss": 0.0681, "rewards/accuracies": 1.0, "rewards/chosen": -3.301898241043091, "rewards/margins": 1.9241464138031006, "rewards/rejected": -5.226044654846191, "step": 4650 }, { "epoch": 1.03, "learning_rate": 9.401447952243563e-06, "logits/chosen": -0.9146037697792053, "logits/rejected": -0.8527914881706238, "logps/chosen": -82.24134826660156, "logps/rejected": -204.81300354003906, "loss": 0.0784, "rewards/accuracies": 1.0, "rewards/chosen": -0.1273544281721115, "rewards/margins": 1.7740753889083862, "rewards/rejected": -1.9014297723770142, "step": 4651 }, { "epoch": 1.03, "learning_rate": 9.400597323152591e-06, "logits/chosen": -1.541155457496643, "logits/rejected": -1.4828852415084839, "logps/chosen": -84.18882751464844, "logps/rejected": -115.83599090576172, "loss": 0.1629, "rewards/accuracies": 1.0, "rewards/chosen": 0.07179412990808487, "rewards/margins": 0.954780638217926, "rewards/rejected": -0.8829864859580994, "step": 4652 }, { "epoch": 1.03, "learning_rate": 9.399746128595444e-06, "logits/chosen": -1.3939327001571655, "logits/rejected": -1.210875391960144, "logps/chosen": -155.8079376220703, "logps/rejected": -313.8927001953125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.3256378173828125, "rewards/margins": 7.238943576812744, "rewards/rejected": -6.913305759429932, "step": 4653 }, { "epoch": 1.03, "learning_rate": 9.398894368681496e-06, "logits/chosen": -1.3216928243637085, "logits/rejected": -1.301073431968689, "logps/chosen": -102.78981018066406, "logps/rejected": -143.87200927734375, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/chosen": -1.1895188093185425, "rewards/margins": 3.045177459716797, "rewards/rejected": -4.234696388244629, "step": 4654 }, { "epoch": 1.03, "learning_rate": 9.398042043520197e-06, "logits/chosen": -1.235106110572815, "logits/rejected": -1.214873194694519, "logps/chosen": -150.6436767578125, "logps/rejected": -221.93524169921875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 2.9839890003204346, "rewards/margins": 6.1081743240356445, "rewards/rejected": -3.12418532371521, "step": 4655 }, { "epoch": 1.03, "learning_rate": 9.397189153221067e-06, "logits/chosen": -1.3909549713134766, "logits/rejected": -1.2469875812530518, "logps/chosen": -127.24720764160156, "logps/rejected": -248.31594848632812, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 3.7319672107696533, "rewards/margins": 8.554926872253418, "rewards/rejected": -4.822959899902344, "step": 4656 }, { "epoch": 1.03, "learning_rate": 9.396335697893702e-06, "logits/chosen": -1.554051399230957, "logits/rejected": -1.4508817195892334, "logps/chosen": -102.17988586425781, "logps/rejected": -254.75941467285156, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -1.9163017272949219, "rewards/margins": 2.7874503135681152, "rewards/rejected": -4.703752040863037, "step": 4657 }, { "epoch": 1.03, "learning_rate": 9.395481677647767e-06, "logits/chosen": -0.9750792384147644, "logits/rejected": -0.9310154318809509, "logps/chosen": -211.30755615234375, "logps/rejected": -190.26510620117188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.1820709705352783, "rewards/margins": 6.09470272064209, "rewards/rejected": -9.276773452758789, "step": 4658 }, { "epoch": 1.03, "learning_rate": 9.394627092593002e-06, "logits/chosen": -1.3432893753051758, "logits/rejected": -1.3718599081039429, "logps/chosen": -114.60493469238281, "logps/rejected": -77.12934875488281, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.19462738931179047, "rewards/margins": 5.146505355834961, "rewards/rejected": -5.341132640838623, "step": 4659 }, { "epoch": 1.03, "learning_rate": 9.393771942839223e-06, "logits/chosen": -1.0557808876037598, "logits/rejected": -1.0776153802871704, "logps/chosen": -131.6635284423828, "logps/rejected": -74.53724670410156, "loss": 0.0892, "rewards/accuracies": 1.0, "rewards/chosen": -2.259385824203491, "rewards/margins": 1.6338858604431152, "rewards/rejected": -3.8932716846466064, "step": 4660 }, { "epoch": 1.03, "learning_rate": 9.392916228496309e-06, "logits/chosen": -0.9620068669319153, "logits/rejected": -0.9713612794876099, "logps/chosen": -179.86819458007812, "logps/rejected": -158.46572875976562, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -0.22088013589382172, "rewards/margins": 5.662080764770508, "rewards/rejected": -5.882960796356201, "step": 4661 }, { "epoch": 1.03, "learning_rate": 9.392059949674222e-06, "logits/chosen": -1.2110930681228638, "logits/rejected": -1.2114429473876953, "logps/chosen": -192.81524658203125, "logps/rejected": -263.0514831542969, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.5870910882949829, "rewards/margins": 6.65205717086792, "rewards/rejected": -7.239148139953613, "step": 4662 }, { "epoch": 1.03, "learning_rate": 9.39120310648299e-06, "logits/chosen": -1.0661312341690063, "logits/rejected": -1.1341966390609741, "logps/chosen": -193.2498779296875, "logps/rejected": -123.6766128540039, "loss": 0.028, "rewards/accuracies": 1.0, "rewards/chosen": -0.21777191758155823, "rewards/margins": 3.2933998107910156, "rewards/rejected": -3.511171817779541, "step": 4663 }, { "epoch": 1.03, "learning_rate": 9.390345699032712e-06, "logits/chosen": -0.9509082436561584, "logits/rejected": -0.8942767381668091, "logps/chosen": -64.64596557617188, "logps/rejected": -124.63402557373047, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": -0.3337760865688324, "rewards/margins": 3.3871681690216064, "rewards/rejected": -3.7209441661834717, "step": 4664 }, { "epoch": 1.03, "learning_rate": 9.389487727433569e-06, "logits/chosen": -1.1449717283248901, "logits/rejected": -1.1927655935287476, "logps/chosen": -260.1104736328125, "logps/rejected": -221.35092163085938, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.022221326828003, "rewards/margins": 7.005958557128906, "rewards/rejected": -9.028180122375488, "step": 4665 }, { "epoch": 1.03, "learning_rate": 9.388629191795804e-06, "logits/chosen": -1.386398434638977, "logits/rejected": -1.1752902269363403, "logps/chosen": -141.12445068359375, "logps/rejected": -329.2854919433594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.08681793510913849, "rewards/margins": 8.162073135375977, "rewards/rejected": -8.24889087677002, "step": 4666 }, { "epoch": 1.03, "learning_rate": 9.387770092229736e-06, "logits/chosen": -1.0216492414474487, "logits/rejected": -1.0506342649459839, "logps/chosen": -190.39822387695312, "logps/rejected": -423.8086853027344, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.3181900084018707, "rewards/margins": 24.181739807128906, "rewards/rejected": -24.499929428100586, "step": 4667 }, { "epoch": 1.03, "learning_rate": 9.386910428845762e-06, "logits/chosen": -1.0345821380615234, "logits/rejected": -1.0170202255249023, "logps/chosen": -139.03256225585938, "logps/rejected": -137.25155639648438, "loss": 0.1355, "rewards/accuracies": 1.0, "rewards/chosen": -2.8116867542266846, "rewards/margins": 1.285531759262085, "rewards/rejected": -4.0972185134887695, "step": 4668 }, { "epoch": 1.03, "learning_rate": 9.386050201754342e-06, "logits/chosen": -1.335654616355896, "logits/rejected": -1.335654616355896, "logps/chosen": -114.28630828857422, "logps/rejected": -114.28630828857422, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -2.8040435314178467, "rewards/margins": 0.0, "rewards/rejected": -2.8040435314178467, "step": 4669 }, { "epoch": 1.03, "learning_rate": 9.385189411066014e-06, "logits/chosen": -0.9850679039955139, "logits/rejected": -1.0449930429458618, "logps/chosen": -153.6474609375, "logps/rejected": -105.70061492919922, "loss": 0.0595, "rewards/accuracies": 1.0, "rewards/chosen": 0.3784027099609375, "rewards/margins": 2.068528175354004, "rewards/rejected": -1.6901253461837769, "step": 4670 }, { "epoch": 1.03, "learning_rate": 9.384328056891389e-06, "logits/chosen": -1.4267395734786987, "logits/rejected": -1.3813323974609375, "logps/chosen": -86.18040466308594, "logps/rejected": -144.85995483398438, "loss": 0.0462, "rewards/accuracies": 1.0, "rewards/chosen": 0.33057403564453125, "rewards/margins": 2.334150791168213, "rewards/rejected": -2.0035767555236816, "step": 4671 }, { "epoch": 1.03, "learning_rate": 9.38346613934115e-06, "logits/chosen": -1.1809484958648682, "logits/rejected": -1.2550294399261475, "logps/chosen": -222.89334106445312, "logps/rejected": -90.72554016113281, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.863427758216858, "rewards/margins": 4.434660911560059, "rewards/rejected": -6.298088550567627, "step": 4672 }, { "epoch": 1.03, "learning_rate": 9.382603658526048e-06, "logits/chosen": -1.190102219581604, "logits/rejected": -1.1873842477798462, "logps/chosen": -111.16533660888672, "logps/rejected": -150.6309356689453, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.08708114922046661, "rewards/margins": 7.130369663238525, "rewards/rejected": -7.043288707733154, "step": 4673 }, { "epoch": 1.03, "learning_rate": 9.381740614556911e-06, "logits/chosen": -1.3281649351119995, "logits/rejected": -1.358020305633545, "logps/chosen": -114.024658203125, "logps/rejected": -109.90287017822266, "loss": 0.051, "rewards/accuracies": 1.0, "rewards/chosen": -1.1117172241210938, "rewards/margins": 2.2721107006073, "rewards/rejected": -3.3838279247283936, "step": 4674 }, { "epoch": 1.03, "learning_rate": 9.38087700754464e-06, "logits/chosen": -1.0029513835906982, "logits/rejected": -0.9488433599472046, "logps/chosen": -78.74749755859375, "logps/rejected": -180.86343383789062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 2.0917632579803467, "rewards/margins": 7.371980667114258, "rewards/rejected": -5.280217170715332, "step": 4675 }, { "epoch": 1.03, "learning_rate": 9.380012837600205e-06, "logits/chosen": -1.1774399280548096, "logits/rejected": -1.1874853372573853, "logps/chosen": -127.4150161743164, "logps/rejected": -116.5799331665039, "loss": 0.1801, "rewards/accuracies": 1.0, "rewards/chosen": -1.2407463788986206, "rewards/margins": 0.8358381986618042, "rewards/rejected": -2.076584577560425, "step": 4676 }, { "epoch": 1.04, "learning_rate": 9.379148104834648e-06, "logits/chosen": -0.9246182441711426, "logits/rejected": -0.9204569458961487, "logps/chosen": -95.32557678222656, "logps/rejected": -176.70367431640625, "loss": 0.0859, "rewards/accuracies": 1.0, "rewards/chosen": -0.3596855103969574, "rewards/margins": 3.734790802001953, "rewards/rejected": -4.094476222991943, "step": 4677 }, { "epoch": 1.04, "learning_rate": 9.378282809359087e-06, "logits/chosen": -1.1123101711273193, "logits/rejected": -1.0807238817214966, "logps/chosen": -120.18592071533203, "logps/rejected": -140.38504028320312, "loss": 0.0358, "rewards/accuracies": 1.0, "rewards/chosen": -1.8393982648849487, "rewards/margins": 3.4403562545776367, "rewards/rejected": -5.279754638671875, "step": 4678 }, { "epoch": 1.04, "learning_rate": 9.377416951284712e-06, "logits/chosen": -1.09064519405365, "logits/rejected": -1.1314243078231812, "logps/chosen": -126.55111694335938, "logps/rejected": -127.45048522949219, "loss": 0.2544, "rewards/accuracies": 1.0, "rewards/chosen": -3.1984879970550537, "rewards/margins": 0.43297338485717773, "rewards/rejected": -3.6314613819122314, "step": 4679 }, { "epoch": 1.04, "learning_rate": 9.376550530722778e-06, "logits/chosen": -0.8994185328483582, "logits/rejected": -0.8653548359870911, "logps/chosen": -171.6063232421875, "logps/rejected": -160.50750732421875, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.3553726375102997, "rewards/margins": 4.554476261138916, "rewards/rejected": -4.909848690032959, "step": 4680 }, { "epoch": 1.04, "learning_rate": 9.375683547784626e-06, "logits/chosen": -0.8668963313102722, "logits/rejected": -0.9153896570205688, "logps/chosen": -129.9743194580078, "logps/rejected": -165.14385986328125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.3217025697231293, "rewards/margins": 5.613562107086182, "rewards/rejected": -5.935264587402344, "step": 4681 }, { "epoch": 1.04, "learning_rate": 9.374816002581654e-06, "logits/chosen": -1.2446672916412354, "logits/rejected": -1.2446672916412354, "logps/chosen": -80.95905303955078, "logps/rejected": -80.95905303955078, "loss": 0.3674, "rewards/accuracies": 0.0, "rewards/chosen": -0.8036941885948181, "rewards/margins": 0.0, "rewards/rejected": -0.8036941885948181, "step": 4682 }, { "epoch": 1.04, "learning_rate": 9.373947895225345e-06, "logits/chosen": -1.1212958097457886, "logits/rejected": -1.1103756427764893, "logps/chosen": -177.23904418945312, "logps/rejected": -239.10516357421875, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.022973656654358, "rewards/margins": 6.4111223220825195, "rewards/rejected": -7.434095859527588, "step": 4683 }, { "epoch": 1.04, "learning_rate": 9.373079225827243e-06, "logits/chosen": -0.9259780645370483, "logits/rejected": -0.9226512312889099, "logps/chosen": -138.3842010498047, "logps/rejected": -146.9702911376953, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": 0.329598993062973, "rewards/margins": 3.756894588470459, "rewards/rejected": -3.427295684814453, "step": 4684 }, { "epoch": 1.04, "learning_rate": 9.372209994498976e-06, "logits/chosen": -0.9096810221672058, "logits/rejected": -0.7240418791770935, "logps/chosen": -172.20668029785156, "logps/rejected": -307.0745849609375, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -0.3502304255962372, "rewards/margins": 3.9152114391326904, "rewards/rejected": -4.26544189453125, "step": 4685 }, { "epoch": 1.04, "learning_rate": 9.371340201352234e-06, "logits/chosen": -0.8925654888153076, "logits/rejected": -0.9264797568321228, "logps/chosen": -265.6807556152344, "logps/rejected": -167.28790283203125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.15902404487133026, "rewards/margins": 8.459609985351562, "rewards/rejected": -8.618634223937988, "step": 4686 }, { "epoch": 1.04, "learning_rate": 9.370469846498784e-06, "logits/chosen": -1.0001463890075684, "logits/rejected": -0.8382189869880676, "logps/chosen": -82.7486801147461, "logps/rejected": -373.0130615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3056083619594574, "rewards/margins": 9.342633247375488, "rewards/rejected": -9.648241996765137, "step": 4687 }, { "epoch": 1.04, "learning_rate": 9.369598930050466e-06, "logits/chosen": -1.415969729423523, "logits/rejected": -1.3189842700958252, "logps/chosen": -110.29197692871094, "logps/rejected": -231.50494384765625, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": 0.5919967889785767, "rewards/margins": 7.832993984222412, "rewards/rejected": -7.240997314453125, "step": 4688 }, { "epoch": 1.04, "learning_rate": 9.368727452119188e-06, "logits/chosen": -1.1698521375656128, "logits/rejected": -1.1830947399139404, "logps/chosen": -93.16387939453125, "logps/rejected": -200.94810485839844, "loss": 0.047, "rewards/accuracies": 1.0, "rewards/chosen": -0.6547531485557556, "rewards/margins": 14.480711936950684, "rewards/rejected": -15.135464668273926, "step": 4689 }, { "epoch": 1.04, "learning_rate": 9.367855412816935e-06, "logits/chosen": -0.7729520797729492, "logits/rejected": -0.836946964263916, "logps/chosen": -110.96354675292969, "logps/rejected": -63.65674591064453, "loss": 0.0976, "rewards/accuracies": 1.0, "rewards/chosen": -2.3237481117248535, "rewards/margins": 1.8308806419372559, "rewards/rejected": -4.154628753662109, "step": 4690 }, { "epoch": 1.04, "learning_rate": 9.366982812255764e-06, "logits/chosen": -1.1209838390350342, "logits/rejected": -1.1175005435943604, "logps/chosen": -102.0171127319336, "logps/rejected": -128.58218383789062, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": 1.3087379932403564, "rewards/margins": 3.7572107315063477, "rewards/rejected": -2.448472738265991, "step": 4691 }, { "epoch": 1.04, "learning_rate": 9.366109650547798e-06, "logits/chosen": -0.8792722225189209, "logits/rejected": -0.8792722225189209, "logps/chosen": -139.29833984375, "logps/rejected": -139.29833984375, "loss": 0.35, "rewards/accuracies": 0.0, "rewards/chosen": 0.45787355303764343, "rewards/margins": 0.0, "rewards/rejected": 0.45787355303764343, "step": 4692 }, { "epoch": 1.04, "learning_rate": 9.365235927805237e-06, "logits/chosen": -1.3638947010040283, "logits/rejected": -1.106723666191101, "logps/chosen": -99.43278503417969, "logps/rejected": -873.5321044921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.467132568359375, "rewards/margins": 79.51705169677734, "rewards/rejected": -81.98418426513672, "step": 4693 }, { "epoch": 1.04, "learning_rate": 9.364361644140353e-06, "logits/chosen": -1.0541695356369019, "logits/rejected": -0.5661636590957642, "logps/chosen": -200.9142608642578, "logps/rejected": -1114.4749755859375, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -2.347059726715088, "rewards/margins": 99.2882308959961, "rewards/rejected": -101.63529205322266, "step": 4694 }, { "epoch": 1.04, "learning_rate": 9.36348679966549e-06, "logits/chosen": -1.591371774673462, "logits/rejected": -1.591371774673462, "logps/chosen": -150.7239532470703, "logps/rejected": -150.7239532470703, "loss": 0.3877, "rewards/accuracies": 0.0, "rewards/chosen": -2.8999733924865723, "rewards/margins": 0.0, "rewards/rejected": -2.8999733924865723, "step": 4695 }, { "epoch": 1.04, "learning_rate": 9.362611394493063e-06, "logits/chosen": -1.4423388242721558, "logits/rejected": -1.3912781476974487, "logps/chosen": -119.2364501953125, "logps/rejected": -216.33245849609375, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -2.514897108078003, "rewards/margins": 2.944169759750366, "rewards/rejected": -5.459066867828369, "step": 4696 }, { "epoch": 1.04, "learning_rate": 9.361735428735558e-06, "logits/chosen": -0.8639889359474182, "logits/rejected": -0.799555778503418, "logps/chosen": -152.79953002929688, "logps/rejected": -142.77549743652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.563374400138855, "rewards/margins": 10.438608169555664, "rewards/rejected": -8.87523365020752, "step": 4697 }, { "epoch": 1.04, "learning_rate": 9.360858902505539e-06, "logits/chosen": -1.2458000183105469, "logits/rejected": -1.1582602262496948, "logps/chosen": -87.685546875, "logps/rejected": -180.5900115966797, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.34807586669921875, "rewards/margins": 5.206923007965088, "rewards/rejected": -5.554998874664307, "step": 4698 }, { "epoch": 1.04, "learning_rate": 9.359981815915632e-06, "logits/chosen": -1.2947765588760376, "logits/rejected": -1.2965141534805298, "logps/chosen": -60.60956573486328, "logps/rejected": -162.57473754882812, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.29583436250686646, "rewards/margins": 9.041994094848633, "rewards/rejected": -8.746159553527832, "step": 4699 }, { "epoch": 1.04, "learning_rate": 9.359104169078541e-06, "logits/chosen": -1.1401500701904297, "logits/rejected": -1.1401500701904297, "logps/chosen": -108.73936462402344, "logps/rejected": -108.73936462402344, "loss": 0.3488, "rewards/accuracies": 0.0, "rewards/chosen": -3.896379232406616, "rewards/margins": 0.0, "rewards/rejected": -3.896379232406616, "step": 4700 }, { "epoch": 1.04, "learning_rate": 9.358225962107047e-06, "logits/chosen": -1.289674997329712, "logits/rejected": -1.2775874137878418, "logps/chosen": -129.8878936767578, "logps/rejected": -174.2886505126953, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -5.3134050369262695, "rewards/margins": 3.7301807403564453, "rewards/rejected": -9.043585777282715, "step": 4701 }, { "epoch": 1.04, "learning_rate": 9.35734719511399e-06, "logits/chosen": -0.9278123378753662, "logits/rejected": -0.9264957308769226, "logps/chosen": -167.7066650390625, "logps/rejected": -237.78982543945312, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 2.641918897628784, "rewards/margins": 5.3789567947387695, "rewards/rejected": -2.7370376586914062, "step": 4702 }, { "epoch": 1.04, "learning_rate": 9.356467868212295e-06, "logits/chosen": -1.0379295349121094, "logits/rejected": -1.0623563528060913, "logps/chosen": -108.65605163574219, "logps/rejected": -91.7052230834961, "loss": 0.1281, "rewards/accuracies": 1.0, "rewards/chosen": -0.2630867063999176, "rewards/margins": 1.35736083984375, "rewards/rejected": -1.6204475164413452, "step": 4703 }, { "epoch": 1.04, "learning_rate": 9.35558798151495e-06, "logits/chosen": -1.1388046741485596, "logits/rejected": -1.1415802240371704, "logps/chosen": -129.46585083007812, "logps/rejected": -158.65914916992188, "loss": 0.0424, "rewards/accuracies": 1.0, "rewards/chosen": -3.330740451812744, "rewards/margins": 2.4284934997558594, "rewards/rejected": -5.7592339515686035, "step": 4704 }, { "epoch": 1.04, "learning_rate": 9.354707535135022e-06, "logits/chosen": -1.0205227136611938, "logits/rejected": -1.0205227136611938, "logps/chosen": -118.34017181396484, "logps/rejected": -118.34017181396484, "loss": 0.3648, "rewards/accuracies": 0.0, "rewards/chosen": -3.6068687438964844, "rewards/margins": 0.0, "rewards/rejected": -3.6068687438964844, "step": 4705 }, { "epoch": 1.04, "learning_rate": 9.353826529185644e-06, "logits/chosen": -1.0805028676986694, "logits/rejected": -1.0805028676986694, "logps/chosen": -124.69860076904297, "logps/rejected": -124.69860076904297, "loss": 0.3529, "rewards/accuracies": 0.0, "rewards/chosen": -4.201549530029297, "rewards/margins": 0.0, "rewards/rejected": -4.201549530029297, "step": 4706 }, { "epoch": 1.04, "learning_rate": 9.352944963780024e-06, "logits/chosen": -1.209027886390686, "logits/rejected": -1.2393356561660767, "logps/chosen": -126.40440368652344, "logps/rejected": -131.19210815429688, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": -0.8360733389854431, "rewards/margins": 6.449664115905762, "rewards/rejected": -7.28573751449585, "step": 4707 }, { "epoch": 1.04, "learning_rate": 9.352062839031438e-06, "logits/chosen": -0.9622542858123779, "logits/rejected": -0.776237964630127, "logps/chosen": -276.31890869140625, "logps/rejected": -406.34967041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.750276565551758, "rewards/margins": 18.22632598876953, "rewards/rejected": -26.97660255432129, "step": 4708 }, { "epoch": 1.04, "learning_rate": 9.351180155053242e-06, "logits/chosen": -1.065871238708496, "logits/rejected": -1.0732771158218384, "logps/chosen": -79.93708801269531, "logps/rejected": -73.69206237792969, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.266531378030777, "rewards/margins": 4.720071792602539, "rewards/rejected": -4.986603260040283, "step": 4709 }, { "epoch": 1.04, "learning_rate": 9.350296911958854e-06, "logits/chosen": -1.042634129524231, "logits/rejected": -1.1048802137374878, "logps/chosen": -114.86336517333984, "logps/rejected": -127.10332489013672, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.20379638671875, "rewards/margins": 8.044975280761719, "rewards/rejected": -9.248771667480469, "step": 4710 }, { "epoch": 1.04, "learning_rate": 9.34941310986177e-06, "logits/chosen": -1.0632271766662598, "logits/rejected": -1.1089646816253662, "logps/chosen": -214.83004760742188, "logps/rejected": -126.63639068603516, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -1.0272308588027954, "rewards/margins": 4.010534763336182, "rewards/rejected": -5.0377655029296875, "step": 4711 }, { "epoch": 1.04, "learning_rate": 9.348528748875558e-06, "logits/chosen": -1.0893503427505493, "logits/rejected": -1.1213274002075195, "logps/chosen": -168.7078857421875, "logps/rejected": -119.31385040283203, "loss": 0.0359, "rewards/accuracies": 1.0, "rewards/chosen": -1.4759124517440796, "rewards/margins": 2.597609043121338, "rewards/rejected": -4.073521614074707, "step": 4712 }, { "epoch": 1.04, "learning_rate": 9.347643829113856e-06, "logits/chosen": -1.3561357259750366, "logits/rejected": -1.3751875162124634, "logps/chosen": -77.19590759277344, "logps/rejected": -110.6806640625, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.8197494745254517, "rewards/margins": 7.127575874328613, "rewards/rejected": -7.947325229644775, "step": 4713 }, { "epoch": 1.04, "learning_rate": 9.346758350690373e-06, "logits/chosen": -1.0542125701904297, "logits/rejected": -1.1125482320785522, "logps/chosen": -193.62457275390625, "logps/rejected": -128.40487670898438, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.24928437173366547, "rewards/margins": 7.795812129974365, "rewards/rejected": -8.045096397399902, "step": 4714 }, { "epoch": 1.04, "learning_rate": 9.34587231371889e-06, "logits/chosen": -1.0190571546554565, "logits/rejected": -1.0093448162078857, "logps/chosen": -118.89700317382812, "logps/rejected": -98.1634750366211, "loss": 0.0874, "rewards/accuracies": 1.0, "rewards/chosen": -1.6079224348068237, "rewards/margins": 1.6605881452560425, "rewards/rejected": -3.268510580062866, "step": 4715 }, { "epoch": 1.04, "learning_rate": 9.344985718313264e-06, "logits/chosen": -0.6752483248710632, "logits/rejected": -0.4845671057701111, "logps/chosen": -224.83897399902344, "logps/rejected": -911.5174560546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2522140443325043, "rewards/margins": 77.82449340820312, "rewards/rejected": -77.57228088378906, "step": 4716 }, { "epoch": 1.04, "learning_rate": 9.344098564587418e-06, "logits/chosen": -1.2418628931045532, "logits/rejected": -1.1269876956939697, "logps/chosen": -194.8656768798828, "logps/rejected": -274.7246398925781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2219085693359375, "rewards/margins": 8.774456977844238, "rewards/rejected": -8.5525484085083, "step": 4717 }, { "epoch": 1.04, "learning_rate": 9.343210852655348e-06, "logits/chosen": -1.0880610942840576, "logits/rejected": -0.9360173344612122, "logps/chosen": -109.5548095703125, "logps/rejected": -296.3883361816406, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.6238670349121094, "rewards/margins": 11.644713401794434, "rewards/rejected": -12.268580436706543, "step": 4718 }, { "epoch": 1.04, "learning_rate": 9.342322582631125e-06, "logits/chosen": -1.4678510427474976, "logits/rejected": -1.586120843887329, "logps/chosen": -171.5139617919922, "logps/rejected": -268.98687744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.7022873163223267, "rewards/margins": 15.429083824157715, "rewards/rejected": -13.72679615020752, "step": 4719 }, { "epoch": 1.04, "learning_rate": 9.341433754628888e-06, "logits/chosen": -0.9642672538757324, "logits/rejected": -0.8740307688713074, "logps/chosen": -92.85633850097656, "logps/rejected": -234.90660095214844, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": 0.4744415283203125, "rewards/margins": 5.251391887664795, "rewards/rejected": -4.776950359344482, "step": 4720 }, { "epoch": 1.04, "learning_rate": 9.340544368762851e-06, "logits/chosen": -1.2222870588302612, "logits/rejected": -1.2398189306259155, "logps/chosen": -58.45865249633789, "logps/rejected": -88.81177520751953, "loss": 0.1275, "rewards/accuracies": 1.0, "rewards/chosen": -0.554290771484375, "rewards/margins": 1.2360390424728394, "rewards/rejected": -1.7903298139572144, "step": 4721 }, { "epoch": 1.05, "learning_rate": 9.339654425147297e-06, "logits/chosen": -1.2485439777374268, "logits/rejected": -1.310149073600769, "logps/chosen": -205.19635009765625, "logps/rejected": -141.6404571533203, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9590255618095398, "rewards/margins": 9.538814544677734, "rewards/rejected": -10.49783992767334, "step": 4722 }, { "epoch": 1.05, "learning_rate": 9.338763923896583e-06, "logits/chosen": -0.7512615323066711, "logits/rejected": -0.6281927227973938, "logps/chosen": -85.6761245727539, "logps/rejected": -240.7796630859375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.706628441810608, "rewards/margins": 6.589892864227295, "rewards/rejected": -8.296521186828613, "step": 4723 }, { "epoch": 1.05, "learning_rate": 9.337872865125133e-06, "logits/chosen": -1.0780861377716064, "logits/rejected": -1.0839115381240845, "logps/chosen": -152.17010498046875, "logps/rejected": -163.281982421875, "loss": 0.1012, "rewards/accuracies": 1.0, "rewards/chosen": -3.731823682785034, "rewards/margins": 2.517176389694214, "rewards/rejected": -6.249000072479248, "step": 4724 }, { "epoch": 1.05, "learning_rate": 9.336981248947447e-06, "logits/chosen": -1.1509182453155518, "logits/rejected": -1.1165199279785156, "logps/chosen": -79.59693908691406, "logps/rejected": -100.28074645996094, "loss": 0.0998, "rewards/accuracies": 1.0, "rewards/chosen": -1.071431040763855, "rewards/margins": 1.6611555814743042, "rewards/rejected": -2.732586622238159, "step": 4725 }, { "epoch": 1.05, "learning_rate": 9.336089075478098e-06, "logits/chosen": -1.0742369890213013, "logits/rejected": -1.0871418714523315, "logps/chosen": -174.71865844726562, "logps/rejected": -180.10113525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.584665060043335, "rewards/margins": 9.5015869140625, "rewards/rejected": -5.916922092437744, "step": 4726 }, { "epoch": 1.05, "learning_rate": 9.335196344831727e-06, "logits/chosen": -1.4379345178604126, "logits/rejected": -1.3803284168243408, "logps/chosen": -123.7379150390625, "logps/rejected": -215.71414184570312, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": 2.39664626121521, "rewards/margins": 4.660285949707031, "rewards/rejected": -2.2636399269104004, "step": 4727 }, { "epoch": 1.05, "learning_rate": 9.334303057123044e-06, "logits/chosen": -1.1498119831085205, "logits/rejected": -1.1498119831085205, "logps/chosen": -93.77467346191406, "logps/rejected": -93.77467346191406, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.854346752166748, "rewards/margins": 0.0, "rewards/rejected": -4.854346752166748, "step": 4728 }, { "epoch": 1.05, "learning_rate": 9.33340921246684e-06, "logits/chosen": -1.422121286392212, "logits/rejected": -1.3630318641662598, "logps/chosen": -192.845458984375, "logps/rejected": -293.1171875, "loss": 1.1741, "rewards/accuracies": 0.0, "rewards/chosen": -6.2128753662109375, "rewards/margins": -2.247755289077759, "rewards/rejected": -3.9651200771331787, "step": 4729 }, { "epoch": 1.05, "learning_rate": 9.332514810977969e-06, "logits/chosen": -1.3803517818450928, "logits/rejected": -1.5059071779251099, "logps/chosen": -222.9649658203125, "logps/rejected": -308.2569274902344, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.511810302734375, "rewards/margins": 20.913354873657227, "rewards/rejected": -23.4251651763916, "step": 4730 }, { "epoch": 1.05, "learning_rate": 9.331619852771361e-06, "logits/chosen": -0.9827865362167358, "logits/rejected": -0.982487678527832, "logps/chosen": -115.54022216796875, "logps/rejected": -105.17410278320312, "loss": 0.2171, "rewards/accuracies": 1.0, "rewards/chosen": -2.807673692703247, "rewards/margins": 0.6446852684020996, "rewards/rejected": -3.4523589611053467, "step": 4731 }, { "epoch": 1.05, "learning_rate": 9.330724337962013e-06, "logits/chosen": -1.4520925283432007, "logits/rejected": -1.419083595275879, "logps/chosen": -75.04486083984375, "logps/rejected": -132.903076171875, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": -0.9311310052871704, "rewards/margins": 2.536381721496582, "rewards/rejected": -3.467512607574463, "step": 4732 }, { "epoch": 1.05, "learning_rate": 9.329828266665e-06, "logits/chosen": -1.1200600862503052, "logits/rejected": -1.0845468044281006, "logps/chosen": -111.82049560546875, "logps/rejected": -220.55982971191406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5253319144248962, "rewards/margins": 8.02929973602295, "rewards/rejected": -8.554631233215332, "step": 4733 }, { "epoch": 1.05, "learning_rate": 9.328931638995461e-06, "logits/chosen": -1.2202770709991455, "logits/rejected": -1.0511970520019531, "logps/chosen": -241.3313446044922, "logps/rejected": -894.4891357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.8241043090820312, "rewards/margins": 68.04226684570312, "rewards/rejected": -66.2181625366211, "step": 4734 }, { "epoch": 1.05, "learning_rate": 9.328034455068616e-06, "logits/chosen": -1.2921264171600342, "logits/rejected": -1.257340669631958, "logps/chosen": -138.1732177734375, "logps/rejected": -253.31094360351562, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.7634201049804688, "rewards/margins": 7.405644416809082, "rewards/rejected": -11.16906452178955, "step": 4735 }, { "epoch": 1.05, "learning_rate": 9.327136714999745e-06, "logits/chosen": -1.0795903205871582, "logits/rejected": -1.0795903205871582, "logps/chosen": -107.38011169433594, "logps/rejected": -107.38011169433594, "loss": 0.3476, "rewards/accuracies": 0.0, "rewards/chosen": -2.7094993591308594, "rewards/margins": 0.0, "rewards/rejected": -2.7094993591308594, "step": 4736 }, { "epoch": 1.05, "learning_rate": 9.32623841890421e-06, "logits/chosen": -1.3014711141586304, "logits/rejected": -1.381137728691101, "logps/chosen": -244.83770751953125, "logps/rejected": -171.2295379638672, "loss": 0.0873, "rewards/accuracies": 1.0, "rewards/chosen": -0.60491943359375, "rewards/margins": 1.907196044921875, "rewards/rejected": -2.512115478515625, "step": 4737 }, { "epoch": 1.05, "learning_rate": 9.325339566897437e-06, "logits/chosen": -1.2837258577346802, "logits/rejected": -1.3821215629577637, "logps/chosen": -190.06680297851562, "logps/rejected": -161.97015380859375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -2.5312697887420654, "rewards/margins": 4.522604942321777, "rewards/rejected": -7.053874969482422, "step": 4738 }, { "epoch": 1.05, "learning_rate": 9.324440159094927e-06, "logits/chosen": -0.9387958645820618, "logits/rejected": -0.8443081974983215, "logps/chosen": -77.34335327148438, "logps/rejected": -161.62046813964844, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.4194908142089844, "rewards/margins": 6.478785991668701, "rewards/rejected": -6.8982768058776855, "step": 4739 }, { "epoch": 1.05, "learning_rate": 9.323540195612255e-06, "logits/chosen": -1.0812410116195679, "logits/rejected": -1.0471675395965576, "logps/chosen": -169.1190185546875, "logps/rejected": -272.76861572265625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.185565233230591, "rewards/margins": 6.358890533447266, "rewards/rejected": -8.544455528259277, "step": 4740 }, { "epoch": 1.05, "learning_rate": 9.322639676565059e-06, "logits/chosen": -1.0665898323059082, "logits/rejected": -1.1167856454849243, "logps/chosen": -142.97718811035156, "logps/rejected": -115.53465270996094, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -1.7266358137130737, "rewards/margins": 3.674069404602051, "rewards/rejected": -5.400705337524414, "step": 4741 }, { "epoch": 1.05, "learning_rate": 9.321738602069057e-06, "logits/chosen": -1.551680564880371, "logits/rejected": -1.4893800020217896, "logps/chosen": -100.6743392944336, "logps/rejected": -255.51431274414062, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.21233749389648438, "rewards/margins": 5.339873790740967, "rewards/rejected": -5.127536296844482, "step": 4742 }, { "epoch": 1.05, "learning_rate": 9.320836972240034e-06, "logits/chosen": -1.2294015884399414, "logits/rejected": -1.1319334506988525, "logps/chosen": -145.9066162109375, "logps/rejected": -329.3828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8464080691337585, "rewards/margins": 10.576448440551758, "rewards/rejected": -11.422856330871582, "step": 4743 }, { "epoch": 1.05, "learning_rate": 9.319934787193846e-06, "logits/chosen": -1.2787258625030518, "logits/rejected": -1.1691336631774902, "logps/chosen": -131.9823760986328, "logps/rejected": -215.22113037109375, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -0.2397110015153885, "rewards/margins": 3.144671678543091, "rewards/rejected": -3.384382724761963, "step": 4744 }, { "epoch": 1.05, "learning_rate": 9.319032047046422e-06, "logits/chosen": -1.365714192390442, "logits/rejected": -1.4865806102752686, "logps/chosen": -206.57382202148438, "logps/rejected": -127.07438659667969, "loss": 0.1919, "rewards/accuracies": 1.0, "rewards/chosen": -1.8325592279434204, "rewards/margins": 0.7596343755722046, "rewards/rejected": -2.592193603515625, "step": 4745 }, { "epoch": 1.05, "learning_rate": 9.318128751913764e-06, "logits/chosen": -1.086604356765747, "logits/rejected": -1.0980408191680908, "logps/chosen": -170.94863891601562, "logps/rejected": -117.1966552734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.7451248168945312, "rewards/margins": 9.358123779296875, "rewards/rejected": -7.612998962402344, "step": 4746 }, { "epoch": 1.05, "learning_rate": 9.317224901911941e-06, "logits/chosen": -1.4037435054779053, "logits/rejected": -1.4070268869400024, "logps/chosen": -82.27259063720703, "logps/rejected": -115.29894256591797, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": 0.8487457633018494, "rewards/margins": 3.819661855697632, "rewards/rejected": -2.9709160327911377, "step": 4747 }, { "epoch": 1.05, "learning_rate": 9.316320497157097e-06, "logits/chosen": -1.1118444204330444, "logits/rejected": -1.0181829929351807, "logps/chosen": -176.09274291992188, "logps/rejected": -239.2982940673828, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8129761219024658, "rewards/margins": 10.656123161315918, "rewards/rejected": -12.469099044799805, "step": 4748 }, { "epoch": 1.05, "learning_rate": 9.315415537765446e-06, "logits/chosen": -0.8602918386459351, "logits/rejected": -0.8858339786529541, "logps/chosen": -275.9559326171875, "logps/rejected": -167.3052978515625, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -3.5698533058166504, "rewards/margins": 2.835444450378418, "rewards/rejected": -6.405297756195068, "step": 4749 }, { "epoch": 1.05, "learning_rate": 9.314510023853272e-06, "logits/chosen": -0.9227886199951172, "logits/rejected": -0.9598615765571594, "logps/chosen": -166.56973266601562, "logps/rejected": -221.29310607910156, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6930481195449829, "rewards/margins": 8.681772232055664, "rewards/rejected": -9.374820709228516, "step": 4750 }, { "epoch": 1.05, "learning_rate": 9.313603955536931e-06, "logits/chosen": -0.8837896585464478, "logits/rejected": -0.8509042263031006, "logps/chosen": -90.73204040527344, "logps/rejected": -173.83493041992188, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -1.9566940069198608, "rewards/margins": 3.3085098266601562, "rewards/rejected": -5.265203952789307, "step": 4751 }, { "epoch": 1.05, "learning_rate": 9.312697332932852e-06, "logits/chosen": -0.9456909894943237, "logits/rejected": -0.9549131989479065, "logps/chosen": -173.5312042236328, "logps/rejected": -149.62261962890625, "loss": 0.2831, "rewards/accuracies": 1.0, "rewards/chosen": -5.701470375061035, "rewards/margins": 0.27239131927490234, "rewards/rejected": -5.9738616943359375, "step": 4752 }, { "epoch": 1.05, "learning_rate": 9.311790156157533e-06, "logits/chosen": -1.3042672872543335, "logits/rejected": -1.2630631923675537, "logps/chosen": -102.30338287353516, "logps/rejected": -246.92330932617188, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.5626229047775269, "rewards/margins": 5.251912593841553, "rewards/rejected": -6.814535617828369, "step": 4753 }, { "epoch": 1.05, "learning_rate": 9.310882425327544e-06, "logits/chosen": -1.2574011087417603, "logits/rejected": -1.2720800638198853, "logps/chosen": -103.56309509277344, "logps/rejected": -87.99419403076172, "loss": 0.2137, "rewards/accuracies": 1.0, "rewards/chosen": -2.042386770248413, "rewards/margins": 1.4976844787597656, "rewards/rejected": -3.5400712490081787, "step": 4754 }, { "epoch": 1.05, "learning_rate": 9.309974140559525e-06, "logits/chosen": -0.8251798152923584, "logits/rejected": -0.827155590057373, "logps/chosen": -217.1844482421875, "logps/rejected": -246.17662048339844, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 0.5307693481445312, "rewards/margins": 11.307252883911133, "rewards/rejected": -10.776483535766602, "step": 4755 }, { "epoch": 1.05, "learning_rate": 9.309065301970193e-06, "logits/chosen": -1.1496509313583374, "logits/rejected": -1.0403871536254883, "logps/chosen": -120.8730697631836, "logps/rejected": -316.4600524902344, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.28022462129592896, "rewards/margins": 7.382967948913574, "rewards/rejected": -7.6631927490234375, "step": 4756 }, { "epoch": 1.05, "learning_rate": 9.308155909676326e-06, "logits/chosen": -0.9892531037330627, "logits/rejected": -0.9686471223831177, "logps/chosen": -130.1060028076172, "logps/rejected": -156.74752807617188, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.71379554271698, "rewards/margins": 4.284544467926025, "rewards/rejected": -5.998340129852295, "step": 4757 }, { "epoch": 1.05, "learning_rate": 9.307245963794782e-06, "logits/chosen": -0.9091593623161316, "logits/rejected": -0.8663540482521057, "logps/chosen": -120.25057220458984, "logps/rejected": -126.48511505126953, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.2101325988769531, "rewards/margins": 4.31244421005249, "rewards/rejected": -5.522576808929443, "step": 4758 }, { "epoch": 1.05, "learning_rate": 9.306335464442485e-06, "logits/chosen": -1.5039957761764526, "logits/rejected": -1.498217225074768, "logps/chosen": -84.57058715820312, "logps/rejected": -88.10189056396484, "loss": 0.2342, "rewards/accuracies": 1.0, "rewards/chosen": -1.1697548627853394, "rewards/margins": 0.5149955749511719, "rewards/rejected": -1.6847504377365112, "step": 4759 }, { "epoch": 1.05, "learning_rate": 9.305424411736434e-06, "logits/chosen": -1.1055421829223633, "logits/rejected": -1.1428296566009521, "logps/chosen": -221.6743927001953, "logps/rejected": -263.86334228515625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.7445816397666931, "rewards/margins": 5.489439487457275, "rewards/rejected": -6.234021186828613, "step": 4760 }, { "epoch": 1.05, "learning_rate": 9.304512805793696e-06, "logits/chosen": -1.0876027345657349, "logits/rejected": -1.0474543571472168, "logps/chosen": -231.81561279296875, "logps/rejected": -251.6128387451172, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.5904266834259033, "rewards/margins": 8.728151321411133, "rewards/rejected": -10.318577766418457, "step": 4761 }, { "epoch": 1.05, "learning_rate": 9.30360064673141e-06, "logits/chosen": -1.4091107845306396, "logits/rejected": -1.579774022102356, "logps/chosen": -167.62606811523438, "logps/rejected": -105.27494049072266, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.22918395698070526, "rewards/margins": 5.393016815185547, "rewards/rejected": -5.622200965881348, "step": 4762 }, { "epoch": 1.05, "learning_rate": 9.302687934666787e-06, "logits/chosen": -1.1470431089401245, "logits/rejected": -1.165964126586914, "logps/chosen": -212.48204040527344, "logps/rejected": -112.89933776855469, "loss": 0.1038, "rewards/accuracies": 1.0, "rewards/chosen": -5.741023540496826, "rewards/margins": 1.4668593406677246, "rewards/rejected": -7.207882881164551, "step": 4763 }, { "epoch": 1.05, "learning_rate": 9.301774669717108e-06, "logits/chosen": -1.2975949048995972, "logits/rejected": -1.2990602254867554, "logps/chosen": -116.66436767578125, "logps/rejected": -154.00779724121094, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.776763916015625, "rewards/margins": 6.001489162445068, "rewards/rejected": -5.224725246429443, "step": 4764 }, { "epoch": 1.05, "learning_rate": 9.300860851999723e-06, "logits/chosen": -1.199589729309082, "logits/rejected": -1.2202636003494263, "logps/chosen": -158.42556762695312, "logps/rejected": -147.70761108398438, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -3.107128858566284, "rewards/margins": 3.328045606613159, "rewards/rejected": -6.435174465179443, "step": 4765 }, { "epoch": 1.05, "learning_rate": 9.299946481632058e-06, "logits/chosen": -0.8204196095466614, "logits/rejected": -0.8317416310310364, "logps/chosen": -185.00494384765625, "logps/rejected": -157.79083251953125, "loss": 0.0497, "rewards/accuracies": 1.0, "rewards/chosen": 0.8629806637763977, "rewards/margins": 6.462124824523926, "rewards/rejected": -5.599143981933594, "step": 4766 }, { "epoch": 1.06, "learning_rate": 9.299031558731608e-06, "logits/chosen": -0.9165010452270508, "logits/rejected": 0.18760937452316284, "logps/chosen": -76.11498260498047, "logps/rejected": -149.16259765625, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.5282135009765625, "rewards/margins": 8.372673034667969, "rewards/rejected": -9.900886535644531, "step": 4767 }, { "epoch": 1.06, "learning_rate": 9.298116083415937e-06, "logits/chosen": -1.0483499765396118, "logits/rejected": -1.0549403429031372, "logps/chosen": -97.48728942871094, "logps/rejected": -103.39495086669922, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -1.9967682361602783, "rewards/margins": 3.6219632625579834, "rewards/rejected": -5.618731498718262, "step": 4768 }, { "epoch": 1.06, "learning_rate": 9.297200055802683e-06, "logits/chosen": -1.1320208311080933, "logits/rejected": -1.199797511100769, "logps/chosen": -123.5853500366211, "logps/rejected": -116.69281005859375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 2.588233232498169, "rewards/margins": 7.287408828735352, "rewards/rejected": -4.6991753578186035, "step": 4769 }, { "epoch": 1.06, "learning_rate": 9.296283476009551e-06, "logits/chosen": -1.2428499460220337, "logits/rejected": -1.0707666873931885, "logps/chosen": -212.27740478515625, "logps/rejected": -1038.683349609375, "loss": 0.0426, "rewards/accuracies": 1.0, "rewards/chosen": -2.6837921142578125, "rewards/margins": 90.49765014648438, "rewards/rejected": -93.18144226074219, "step": 4770 }, { "epoch": 1.06, "learning_rate": 9.295366344154319e-06, "logits/chosen": -1.2598381042480469, "logits/rejected": -1.3024442195892334, "logps/chosen": -106.39900207519531, "logps/rejected": -142.8274383544922, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.0519752502441406, "rewards/margins": 6.452981948852539, "rewards/rejected": -9.50495719909668, "step": 4771 }, { "epoch": 1.06, "learning_rate": 9.29444866035484e-06, "logits/chosen": -0.7635390758514404, "logits/rejected": -0.768273651599884, "logps/chosen": -256.95556640625, "logps/rejected": -162.0267333984375, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.847644031047821, "rewards/margins": 4.349466323852539, "rewards/rejected": -5.197110176086426, "step": 4772 }, { "epoch": 1.06, "learning_rate": 9.293530424729029e-06, "logits/chosen": -1.0463842153549194, "logits/rejected": -1.1000632047653198, "logps/chosen": -247.39755249023438, "logps/rejected": -116.06031799316406, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 1.6302398443222046, "rewards/margins": 9.300307273864746, "rewards/rejected": -7.67006778717041, "step": 4773 }, { "epoch": 1.06, "learning_rate": 9.292611637394881e-06, "logits/chosen": -1.2235580682754517, "logits/rejected": -0.6211689114570618, "logps/chosen": -142.43319702148438, "logps/rejected": -250.31097412109375, "loss": 0.0891, "rewards/accuracies": 1.0, "rewards/chosen": -5.197152614593506, "rewards/margins": 1.6399383544921875, "rewards/rejected": -6.837090969085693, "step": 4774 }, { "epoch": 1.06, "learning_rate": 9.291692298470457e-06, "logits/chosen": -1.1236480474472046, "logits/rejected": -1.1236480474472046, "logps/chosen": -127.75533294677734, "logps/rejected": -127.75533294677734, "loss": 0.3927, "rewards/accuracies": 0.0, "rewards/chosen": -5.966353893280029, "rewards/margins": 0.0, "rewards/rejected": -5.966353893280029, "step": 4775 }, { "epoch": 1.06, "learning_rate": 9.29077240807389e-06, "logits/chosen": -1.1674100160598755, "logits/rejected": -1.1902035474777222, "logps/chosen": -125.35265350341797, "logps/rejected": -95.12535858154297, "loss": 0.1276, "rewards/accuracies": 1.0, "rewards/chosen": -2.800342559814453, "rewards/margins": 1.2426772117614746, "rewards/rejected": -4.043019771575928, "step": 4776 }, { "epoch": 1.06, "learning_rate": 9.289851966323382e-06, "logits/chosen": -1.175325870513916, "logits/rejected": -1.175325870513916, "logps/chosen": -94.86471557617188, "logps/rejected": -94.86471557617188, "loss": 0.3537, "rewards/accuracies": 0.0, "rewards/chosen": -6.267554759979248, "rewards/margins": 0.0, "rewards/rejected": -6.267554759979248, "step": 4777 }, { "epoch": 1.06, "learning_rate": 9.288930973337212e-06, "logits/chosen": -1.533990502357483, "logits/rejected": -1.6162813901901245, "logps/chosen": -132.20388793945312, "logps/rejected": -207.2196502685547, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": 1.09862220287323, "rewards/margins": 10.09345531463623, "rewards/rejected": -8.994832992553711, "step": 4778 }, { "epoch": 1.06, "learning_rate": 9.288009429233717e-06, "logits/chosen": -0.8756727576255798, "logits/rejected": -0.5870291590690613, "logps/chosen": -128.74822998046875, "logps/rejected": -1199.9227294921875, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": 1.9733794927597046, "rewards/margins": 101.21714782714844, "rewards/rejected": -99.24376678466797, "step": 4779 }, { "epoch": 1.06, "learning_rate": 9.287087334131322e-06, "logits/chosen": -1.0117729902267456, "logits/rejected": -0.9988918900489807, "logps/chosen": -113.24361419677734, "logps/rejected": -169.80764770507812, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": 0.4629928767681122, "rewards/margins": 3.9094772338867188, "rewards/rejected": -3.446484327316284, "step": 4780 }, { "epoch": 1.06, "learning_rate": 9.28616468814851e-06, "logits/chosen": -1.1665773391723633, "logits/rejected": -1.2425036430358887, "logps/chosen": -182.41371154785156, "logps/rejected": -220.8800048828125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.30728912353515625, "rewards/margins": 10.364710807800293, "rewards/rejected": -10.67199993133545, "step": 4781 }, { "epoch": 1.06, "learning_rate": 9.28524149140384e-06, "logits/chosen": -0.899467408657074, "logits/rejected": -0.8187959790229797, "logps/chosen": -136.907958984375, "logps/rejected": -169.88668823242188, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.4592056274414062, "rewards/margins": 4.502723217010498, "rewards/rejected": -5.961928844451904, "step": 4782 }, { "epoch": 1.06, "learning_rate": 9.284317744015938e-06, "logits/chosen": -1.2870938777923584, "logits/rejected": -1.2896337509155273, "logps/chosen": -144.50123596191406, "logps/rejected": -158.18324279785156, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.1715133637189865, "rewards/margins": 6.608105659484863, "rewards/rejected": -6.436592102050781, "step": 4783 }, { "epoch": 1.06, "learning_rate": 9.283393446103506e-06, "logits/chosen": -0.7788021564483643, "logits/rejected": -0.7532666325569153, "logps/chosen": -95.8353271484375, "logps/rejected": -179.3278350830078, "loss": 0.0496, "rewards/accuracies": 1.0, "rewards/chosen": -2.5041069984436035, "rewards/margins": 3.7039504051208496, "rewards/rejected": -6.208057403564453, "step": 4784 }, { "epoch": 1.06, "learning_rate": 9.282468597785312e-06, "logits/chosen": -0.9819914698600769, "logits/rejected": -1.0821808576583862, "logps/chosen": -128.98184204101562, "logps/rejected": -127.33580017089844, "loss": 0.0501, "rewards/accuracies": 1.0, "rewards/chosen": -1.284997582435608, "rewards/margins": 2.2494583129882812, "rewards/rejected": -3.5344560146331787, "step": 4785 }, { "epoch": 1.06, "learning_rate": 9.2815431991802e-06, "logits/chosen": -1.4101976156234741, "logits/rejected": -1.5788044929504395, "logps/chosen": -195.95437622070312, "logps/rejected": -164.57472229003906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.394667148590088, "rewards/margins": 11.459220886230469, "rewards/rejected": -9.064554214477539, "step": 4786 }, { "epoch": 1.06, "learning_rate": 9.280617250407078e-06, "logits/chosen": -1.0106117725372314, "logits/rejected": -0.9657726287841797, "logps/chosen": -117.78118133544922, "logps/rejected": -221.76303100585938, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -2.2059686183929443, "rewards/margins": 7.580136299133301, "rewards/rejected": -9.786105155944824, "step": 4787 }, { "epoch": 1.06, "learning_rate": 9.27969075158493e-06, "logits/chosen": -1.4066095352172852, "logits/rejected": -1.3140419721603394, "logps/chosen": -70.948486328125, "logps/rejected": -121.62637329101562, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": 1.3077529668807983, "rewards/margins": 3.2103424072265625, "rewards/rejected": -1.9025894403457642, "step": 4788 }, { "epoch": 1.06, "learning_rate": 9.278763702832809e-06, "logits/chosen": -1.1071451902389526, "logits/rejected": -1.1054856777191162, "logps/chosen": -79.89669799804688, "logps/rejected": -146.298095703125, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.2330581694841385, "rewards/margins": 5.225313663482666, "rewards/rejected": -5.458371639251709, "step": 4789 }, { "epoch": 1.06, "learning_rate": 9.277836104269837e-06, "logits/chosen": -1.4880484342575073, "logits/rejected": -1.4681065082550049, "logps/chosen": -124.31550598144531, "logps/rejected": -222.27059936523438, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -3.1834259033203125, "rewards/margins": 5.089022636413574, "rewards/rejected": -8.272448539733887, "step": 4790 }, { "epoch": 1.06, "learning_rate": 9.276907956015212e-06, "logits/chosen": -1.5036170482635498, "logits/rejected": -1.5240659713745117, "logps/chosen": -118.89067077636719, "logps/rejected": -169.64694213867188, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 2.4045517444610596, "rewards/margins": 5.147821426391602, "rewards/rejected": -2.743269443511963, "step": 4791 }, { "epoch": 1.06, "learning_rate": 9.275979258188192e-06, "logits/chosen": -1.3628573417663574, "logits/rejected": -1.3465112447738647, "logps/chosen": -158.08200073242188, "logps/rejected": -149.87840270996094, "loss": 0.3565, "rewards/accuracies": 1.0, "rewards/chosen": -2.9336655139923096, "rewards/margins": 3.9056785106658936, "rewards/rejected": -6.839344024658203, "step": 4792 }, { "epoch": 1.06, "learning_rate": 9.275050010908118e-06, "logits/chosen": -1.138661503791809, "logits/rejected": -1.138661503791809, "logps/chosen": -157.91140747070312, "logps/rejected": -157.91140747070312, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.540452480316162, "rewards/margins": 0.0, "rewards/rejected": -5.540452480316162, "step": 4793 }, { "epoch": 1.06, "learning_rate": 9.274120214294395e-06, "logits/chosen": -1.6148701906204224, "logits/rejected": -1.8343274593353271, "logps/chosen": -260.9553527832031, "logps/rejected": -120.2762680053711, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.315901279449463, "rewards/margins": 5.421957969665527, "rewards/rejected": -7.73785924911499, "step": 4794 }, { "epoch": 1.06, "learning_rate": 9.273189868466499e-06, "logits/chosen": -1.6728715896606445, "logits/rejected": -1.6045098304748535, "logps/chosen": -86.1488037109375, "logps/rejected": -93.50724792480469, "loss": 0.0495, "rewards/accuracies": 1.0, "rewards/chosen": -1.1513603925704956, "rewards/margins": 2.347426414489746, "rewards/rejected": -3.4987869262695312, "step": 4795 }, { "epoch": 1.06, "learning_rate": 9.272258973543977e-06, "logits/chosen": -1.392459511756897, "logits/rejected": -1.3649123907089233, "logps/chosen": -84.29371643066406, "logps/rejected": -124.01687622070312, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -1.407293677330017, "rewards/margins": 3.0917510986328125, "rewards/rejected": -4.499044895172119, "step": 4796 }, { "epoch": 1.06, "learning_rate": 9.271327529646447e-06, "logits/chosen": -1.0905033349990845, "logits/rejected": -1.123931646347046, "logps/chosen": -220.92770385742188, "logps/rejected": -250.9026336669922, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.7319977283477783, "rewards/margins": 14.238733291625977, "rewards/rejected": -12.506735801696777, "step": 4797 }, { "epoch": 1.06, "learning_rate": 9.270395536893599e-06, "logits/chosen": -1.1875571012496948, "logits/rejected": -1.1621507406234741, "logps/chosen": -78.67179870605469, "logps/rejected": -212.77853393554688, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": 0.6392288208007812, "rewards/margins": 3.4418532848358154, "rewards/rejected": -2.802624464035034, "step": 4798 }, { "epoch": 1.06, "learning_rate": 9.269462995405189e-06, "logits/chosen": -1.3528724908828735, "logits/rejected": -1.3508151769638062, "logps/chosen": -101.06798553466797, "logps/rejected": -110.88478088378906, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": -0.8124931454658508, "rewards/margins": 2.1265716552734375, "rewards/rejected": -2.9390647411346436, "step": 4799 }, { "epoch": 1.06, "learning_rate": 9.268529905301049e-06, "logits/chosen": -1.2695001363754272, "logits/rejected": -1.3457717895507812, "logps/chosen": -185.32699584960938, "logps/rejected": -94.96701049804688, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -1.2918274402618408, "rewards/margins": 2.6804111003875732, "rewards/rejected": -3.972238540649414, "step": 4800 }, { "epoch": 1.06, "learning_rate": 9.267596266701076e-06, "logits/chosen": -1.6529079675674438, "logits/rejected": -1.608583927154541, "logps/chosen": -80.22259521484375, "logps/rejected": -189.40541076660156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9451133608818054, "rewards/margins": 10.07404899597168, "rewards/rejected": -11.01916217803955, "step": 4801 }, { "epoch": 1.06, "learning_rate": 9.266662079725241e-06, "logits/chosen": -1.3777354955673218, "logits/rejected": -0.9304041266441345, "logps/chosen": -189.21856689453125, "logps/rejected": -1435.544677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0669525861740112, "rewards/margins": 131.71702575683594, "rewards/rejected": -132.7839813232422, "step": 4802 }, { "epoch": 1.06, "learning_rate": 9.265727344493587e-06, "logits/chosen": -1.351318359375, "logits/rejected": -1.3250021934509277, "logps/chosen": -107.09309387207031, "logps/rejected": -191.2437286376953, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3739601373672485, "rewards/margins": 8.485523223876953, "rewards/rejected": -9.85948371887207, "step": 4803 }, { "epoch": 1.06, "learning_rate": 9.264792061126224e-06, "logits/chosen": -1.005020022392273, "logits/rejected": -0.336342453956604, "logps/chosen": -120.20928955078125, "logps/rejected": -384.7467956542969, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -4.942022800445557, "rewards/margins": 24.540069580078125, "rewards/rejected": -29.482091903686523, "step": 4804 }, { "epoch": 1.06, "learning_rate": 9.263856229743334e-06, "logits/chosen": -1.0523834228515625, "logits/rejected": -1.0342704057693481, "logps/chosen": -60.72468948364258, "logps/rejected": -127.48736572265625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.48388099670410156, "rewards/margins": 5.422765254974365, "rewards/rejected": -5.906646251678467, "step": 4805 }, { "epoch": 1.06, "learning_rate": 9.262919850465166e-06, "logits/chosen": -1.2036670446395874, "logits/rejected": -1.194828987121582, "logps/chosen": -108.68252563476562, "logps/rejected": -175.02279663085938, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.6898094415664673, "rewards/margins": 7.5891242027282715, "rewards/rejected": -8.27893352508545, "step": 4806 }, { "epoch": 1.06, "learning_rate": 9.261982923412046e-06, "logits/chosen": -1.559023141860962, "logits/rejected": -1.5677671432495117, "logps/chosen": -92.16020202636719, "logps/rejected": -156.8045196533203, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -4.873316287994385, "rewards/margins": 6.405441761016846, "rewards/rejected": -11.27875804901123, "step": 4807 }, { "epoch": 1.06, "learning_rate": 9.261045448704367e-06, "logits/chosen": -1.00981605052948, "logits/rejected": -0.9836798310279846, "logps/chosen": -181.81674194335938, "logps/rejected": -295.9691162109375, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": 0.8224868774414062, "rewards/margins": 5.377586364746094, "rewards/rejected": -4.5550994873046875, "step": 4808 }, { "epoch": 1.06, "learning_rate": 9.26010742646259e-06, "logits/chosen": -0.8095510601997375, "logits/rejected": -0.7939541935920715, "logps/chosen": -178.230712890625, "logps/rejected": -166.71029663085938, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.5829437375068665, "rewards/margins": 5.239081382751465, "rewards/rejected": -5.822025299072266, "step": 4809 }, { "epoch": 1.06, "learning_rate": 9.259168856807249e-06, "logits/chosen": -1.047403335571289, "logits/rejected": -1.1019257307052612, "logps/chosen": -207.65924072265625, "logps/rejected": -118.90471649169922, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.5072693228721619, "rewards/margins": 9.257481575012207, "rewards/rejected": -8.750212669372559, "step": 4810 }, { "epoch": 1.06, "learning_rate": 9.25822973985895e-06, "logits/chosen": -1.3264867067337036, "logits/rejected": -1.360913634300232, "logps/chosen": -171.28042602539062, "logps/rejected": -99.94285583496094, "loss": 0.3467, "rewards/accuracies": 1.0, "rewards/chosen": 3.249957323074341, "rewards/margins": 8.773375511169434, "rewards/rejected": -5.523417949676514, "step": 4811 }, { "epoch": 1.07, "learning_rate": 9.257290075738365e-06, "logits/chosen": -0.7894576191902161, "logits/rejected": -0.7894576191902161, "logps/chosen": -124.01309967041016, "logps/rejected": -124.01309967041016, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -6.32797384262085, "rewards/margins": 0.0, "rewards/rejected": -6.32797384262085, "step": 4812 }, { "epoch": 1.07, "learning_rate": 9.25634986456624e-06, "logits/chosen": -0.9255576729774475, "logits/rejected": -0.947864830493927, "logps/chosen": -146.37564086914062, "logps/rejected": -158.5887908935547, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.3756210505962372, "rewards/margins": 6.036766529083252, "rewards/rejected": -6.412387371063232, "step": 4813 }, { "epoch": 1.07, "learning_rate": 9.25540910646339e-06, "logits/chosen": -1.2535897493362427, "logits/rejected": -1.2697893381118774, "logps/chosen": -79.40580749511719, "logps/rejected": -146.7842559814453, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -2.1933751106262207, "rewards/margins": 6.031022548675537, "rewards/rejected": -8.224397659301758, "step": 4814 }, { "epoch": 1.07, "learning_rate": 9.254467801550699e-06, "logits/chosen": -1.501932978630066, "logits/rejected": -1.4850049018859863, "logps/chosen": -104.97541809082031, "logps/rejected": -127.61717987060547, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -0.5640930533409119, "rewards/margins": 4.116168022155762, "rewards/rejected": -4.680261135101318, "step": 4815 }, { "epoch": 1.07, "learning_rate": 9.253525949949123e-06, "logits/chosen": -1.0381314754486084, "logits/rejected": -1.0425286293029785, "logps/chosen": -93.66592407226562, "logps/rejected": -81.71327209472656, "loss": 0.3362, "rewards/accuracies": 1.0, "rewards/chosen": -2.8982903957366943, "rewards/margins": 0.043302059173583984, "rewards/rejected": -2.9415924549102783, "step": 4816 }, { "epoch": 1.07, "learning_rate": 9.252583551779687e-06, "logits/chosen": -0.9658272862434387, "logits/rejected": -0.9079230427742004, "logps/chosen": -145.65121459960938, "logps/rejected": -233.99185180664062, "loss": 0.5013, "rewards/accuracies": 1.0, "rewards/chosen": 1.0120590925216675, "rewards/margins": 3.020394802093506, "rewards/rejected": -2.008335828781128, "step": 4817 }, { "epoch": 1.07, "learning_rate": 9.251640607163488e-06, "logits/chosen": -1.333232045173645, "logits/rejected": -1.265242099761963, "logps/chosen": -149.8795928955078, "logps/rejected": -167.79269409179688, "loss": 0.3377, "rewards/accuracies": 1.0, "rewards/chosen": -7.797667026519775, "rewards/margins": 0.5130267143249512, "rewards/rejected": -8.310693740844727, "step": 4818 }, { "epoch": 1.07, "learning_rate": 9.250697116221692e-06, "logits/chosen": -1.0588879585266113, "logits/rejected": -1.0120844841003418, "logps/chosen": -85.82852172851562, "logps/rejected": -161.71728515625, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": 0.5659149289131165, "rewards/margins": 2.826556444168091, "rewards/rejected": -2.260641574859619, "step": 4819 }, { "epoch": 1.07, "learning_rate": 9.249753079075534e-06, "logits/chosen": -1.1837632656097412, "logits/rejected": -1.131617784500122, "logps/chosen": -142.42601013183594, "logps/rejected": -137.80072021484375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 0.15502320230007172, "rewards/margins": 4.482531547546387, "rewards/rejected": -4.327508449554443, "step": 4820 }, { "epoch": 1.07, "learning_rate": 9.248808495846322e-06, "logits/chosen": -1.6117620468139648, "logits/rejected": -1.685811161994934, "logps/chosen": -146.0528564453125, "logps/rejected": -179.23196411132812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.904003918170929, "rewards/margins": 8.35399055480957, "rewards/rejected": -9.257994651794434, "step": 4821 }, { "epoch": 1.07, "learning_rate": 9.247863366655434e-06, "logits/chosen": -1.1584359407424927, "logits/rejected": -1.1584359407424927, "logps/chosen": -172.78695678710938, "logps/rejected": -172.78695678710938, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -8.669459342956543, "rewards/margins": 0.0, "rewards/rejected": -8.669459342956543, "step": 4822 }, { "epoch": 1.07, "learning_rate": 9.246917691624314e-06, "logits/chosen": -1.3943499326705933, "logits/rejected": -1.3904598951339722, "logps/chosen": -104.07394409179688, "logps/rejected": -174.54452514648438, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4461326599121094, "rewards/margins": 6.5890212059021, "rewards/rejected": -7.035153865814209, "step": 4823 }, { "epoch": 1.07, "learning_rate": 9.245971470874477e-06, "logits/chosen": -1.2987067699432373, "logits/rejected": -0.7602558732032776, "logps/chosen": -199.31509399414062, "logps/rejected": -671.7476806640625, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -6.840887546539307, "rewards/margins": 54.15788650512695, "rewards/rejected": -60.998775482177734, "step": 4824 }, { "epoch": 1.07, "learning_rate": 9.245024704527517e-06, "logits/chosen": -1.0867334604263306, "logits/rejected": -1.0867334604263306, "logps/chosen": -101.75945281982422, "logps/rejected": -101.75945281982422, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -3.9946744441986084, "rewards/margins": 0.0, "rewards/rejected": -3.9946744441986084, "step": 4825 }, { "epoch": 1.07, "learning_rate": 9.244077392705085e-06, "logits/chosen": -1.3960964679718018, "logits/rejected": -1.8083018064498901, "logps/chosen": -335.3921203613281, "logps/rejected": -119.73448944091797, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 4.505459785461426, "rewards/margins": 13.338861465454102, "rewards/rejected": -8.833401679992676, "step": 4826 }, { "epoch": 1.07, "learning_rate": 9.243129535528909e-06, "logits/chosen": -1.6697438955307007, "logits/rejected": -1.758931040763855, "logps/chosen": -133.5797576904297, "logps/rejected": -135.8232879638672, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.096522569656372, "rewards/margins": 8.768362998962402, "rewards/rejected": -9.864885330200195, "step": 4827 }, { "epoch": 1.07, "learning_rate": 9.242181133120791e-06, "logits/chosen": -1.0933467149734497, "logits/rejected": -1.098415493965149, "logps/chosen": -123.89073181152344, "logps/rejected": -104.2861557006836, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -2.7852447032928467, "rewards/margins": 3.2366416454315186, "rewards/rejected": -6.021886348724365, "step": 4828 }, { "epoch": 1.07, "learning_rate": 9.241232185602594e-06, "logits/chosen": -1.248900294303894, "logits/rejected": -1.2460954189300537, "logps/chosen": -93.26461791992188, "logps/rejected": -136.92239379882812, "loss": 0.0756, "rewards/accuracies": 1.0, "rewards/chosen": -0.7746567130088806, "rewards/margins": 1.8131988048553467, "rewards/rejected": -2.587855577468872, "step": 4829 }, { "epoch": 1.07, "learning_rate": 9.240282693096257e-06, "logits/chosen": -1.3822388648986816, "logits/rejected": -1.3822388648986816, "logps/chosen": -161.60240173339844, "logps/rejected": -161.60240173339844, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -4.057286739349365, "rewards/margins": 0.0, "rewards/rejected": -4.057286739349365, "step": 4830 }, { "epoch": 1.07, "learning_rate": 9.239332655723787e-06, "logits/chosen": -1.2686151266098022, "logits/rejected": -1.2726699113845825, "logps/chosen": -121.89411926269531, "logps/rejected": -169.21157836914062, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.2862380743026733, "rewards/margins": 4.807586669921875, "rewards/rejected": -6.093824863433838, "step": 4831 }, { "epoch": 1.07, "learning_rate": 9.238382073607262e-06, "logits/chosen": -1.510196328163147, "logits/rejected": -1.5413873195648193, "logps/chosen": -98.36729431152344, "logps/rejected": -74.58818817138672, "loss": 0.1518, "rewards/accuracies": 1.0, "rewards/chosen": -0.5742790102958679, "rewards/margins": 1.0470027923583984, "rewards/rejected": -1.6212818622589111, "step": 4832 }, { "epoch": 1.07, "learning_rate": 9.237430946868829e-06, "logits/chosen": -1.2457836866378784, "logits/rejected": -1.2388215065002441, "logps/chosen": -146.52737426757812, "logps/rejected": -155.17388916015625, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.7125381827354431, "rewards/margins": 4.410984992980957, "rewards/rejected": -5.123523235321045, "step": 4833 }, { "epoch": 1.07, "learning_rate": 9.236479275630707e-06, "logits/chosen": -1.0015296936035156, "logits/rejected": -0.9565207958221436, "logps/chosen": -65.37590026855469, "logps/rejected": -110.65648651123047, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.204545259475708, "rewards/margins": 4.811871528625488, "rewards/rejected": -6.016416549682617, "step": 4834 }, { "epoch": 1.07, "learning_rate": 9.235527060015182e-06, "logits/chosen": -1.5196903944015503, "logits/rejected": -1.4811469316482544, "logps/chosen": -183.55459594726562, "logps/rejected": -257.4115905761719, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.337371826171875, "rewards/margins": 8.936103820800781, "rewards/rejected": -8.598731994628906, "step": 4835 }, { "epoch": 1.07, "learning_rate": 9.23457430014461e-06, "logits/chosen": -1.3576933145523071, "logits/rejected": -1.2791520357131958, "logps/chosen": -100.44612884521484, "logps/rejected": -230.71311950683594, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": -0.1410072296857834, "rewards/margins": 2.6636650562286377, "rewards/rejected": -2.8046722412109375, "step": 4836 }, { "epoch": 1.07, "learning_rate": 9.233620996141421e-06, "logits/chosen": -1.5558322668075562, "logits/rejected": -1.5096458196640015, "logps/chosen": -93.72828674316406, "logps/rejected": -153.27096557617188, "loss": 0.0789, "rewards/accuracies": 1.0, "rewards/chosen": -1.5396636724472046, "rewards/margins": 1.7663384675979614, "rewards/rejected": -3.306002140045166, "step": 4837 }, { "epoch": 1.07, "learning_rate": 9.232667148128112e-06, "logits/chosen": -1.0029813051223755, "logits/rejected": -0.959509015083313, "logps/chosen": -179.55865478515625, "logps/rejected": -284.3196105957031, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.2491455078125, "rewards/margins": 10.909384727478027, "rewards/rejected": -11.158530235290527, "step": 4838 }, { "epoch": 1.07, "learning_rate": 9.231712756227249e-06, "logits/chosen": -1.4022572040557861, "logits/rejected": -1.4022572040557861, "logps/chosen": -227.58966064453125, "logps/rejected": -227.58966064453125, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.193554878234863, "rewards/margins": 0.0, "rewards/rejected": -5.193554878234863, "step": 4839 }, { "epoch": 1.07, "learning_rate": 9.23075782056147e-06, "logits/chosen": -1.50581955909729, "logits/rejected": -1.4355686902999878, "logps/chosen": -124.86444854736328, "logps/rejected": -217.69552612304688, "loss": 0.0678, "rewards/accuracies": 1.0, "rewards/chosen": -1.5343360900878906, "rewards/margins": 5.774729251861572, "rewards/rejected": -7.309065341949463, "step": 4840 }, { "epoch": 1.07, "learning_rate": 9.229802341253482e-06, "logits/chosen": -1.5683503150939941, "logits/rejected": -1.5351815223693848, "logps/chosen": -64.9635238647461, "logps/rejected": -153.9967803955078, "loss": 1.9216, "rewards/accuracies": 1.0, "rewards/chosen": 0.46001434326171875, "rewards/margins": 6.503387451171875, "rewards/rejected": -6.043373107910156, "step": 4841 }, { "epoch": 1.07, "learning_rate": 9.22884631842606e-06, "logits/chosen": -1.4710482358932495, "logits/rejected": -1.3955585956573486, "logps/chosen": -81.71952819824219, "logps/rejected": -136.26986694335938, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": 0.13677826523780823, "rewards/margins": 2.7904863357543945, "rewards/rejected": -2.653707981109619, "step": 4842 }, { "epoch": 1.07, "learning_rate": 9.227889752202052e-06, "logits/chosen": -1.5928882360458374, "logits/rejected": -1.569698452949524, "logps/chosen": -133.0227508544922, "logps/rejected": -197.11846923828125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.18162231147289276, "rewards/margins": 7.696407318115234, "rewards/rejected": -7.878029823303223, "step": 4843 }, { "epoch": 1.07, "learning_rate": 9.226932642704376e-06, "logits/chosen": -1.6757131814956665, "logits/rejected": -1.738222599029541, "logps/chosen": -91.17131042480469, "logps/rejected": -89.359375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7422493100166321, "rewards/margins": 5.646182537078857, "rewards/rejected": -6.388432025909424, "step": 4844 }, { "epoch": 1.07, "learning_rate": 9.225974990056016e-06, "logits/chosen": -1.3399121761322021, "logits/rejected": -1.3399121761322021, "logps/chosen": -74.02845001220703, "logps/rejected": -74.02845001220703, "loss": 0.3534, "rewards/accuracies": 0.0, "rewards/chosen": -1.4691600799560547, "rewards/margins": 0.0, "rewards/rejected": -1.4691600799560547, "step": 4845 }, { "epoch": 1.07, "learning_rate": 9.225016794380027e-06, "logits/chosen": -1.3647812604904175, "logits/rejected": -1.3896870613098145, "logps/chosen": -120.63105010986328, "logps/rejected": -151.22471618652344, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": 0.5562454462051392, "rewards/margins": 3.7465548515319824, "rewards/rejected": -3.1903092861175537, "step": 4846 }, { "epoch": 1.07, "learning_rate": 9.22405805579954e-06, "logits/chosen": -1.502752661705017, "logits/rejected": -1.4877270460128784, "logps/chosen": -150.37356567382812, "logps/rejected": -250.61868286132812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 1.470851182937622, "rewards/margins": 8.84183406829834, "rewards/rejected": -7.370982646942139, "step": 4847 }, { "epoch": 1.07, "learning_rate": 9.223098774437744e-06, "logits/chosen": -1.038651943206787, "logits/rejected": -0.8554325699806213, "logps/chosen": -134.6973114013672, "logps/rejected": -263.101806640625, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -2.2218613624572754, "rewards/margins": 4.074525356292725, "rewards/rejected": -6.29638671875, "step": 4848 }, { "epoch": 1.07, "learning_rate": 9.222138950417908e-06, "logits/chosen": -1.3539116382598877, "logits/rejected": -1.2738239765167236, "logps/chosen": -158.58322143554688, "logps/rejected": -298.609375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": 1.8732620477676392, "rewards/margins": 7.050799369812012, "rewards/rejected": -5.177537441253662, "step": 4849 }, { "epoch": 1.07, "learning_rate": 9.221178583863367e-06, "logits/chosen": -1.4255468845367432, "logits/rejected": -1.4061121940612793, "logps/chosen": -125.57792663574219, "logps/rejected": -191.808837890625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -2.698767900466919, "rewards/margins": 5.121248245239258, "rewards/rejected": -7.820016384124756, "step": 4850 }, { "epoch": 1.07, "learning_rate": 9.220217674897524e-06, "logits/chosen": -1.1731592416763306, "logits/rejected": -0.8073574304580688, "logps/chosen": -119.09559631347656, "logps/rejected": -573.1010131835938, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 0.5104400515556335, "rewards/margins": 47.49821090698242, "rewards/rejected": -46.987770080566406, "step": 4851 }, { "epoch": 1.07, "learning_rate": 9.219256223643857e-06, "logits/chosen": -1.4182732105255127, "logits/rejected": -1.3920140266418457, "logps/chosen": -231.20741271972656, "logps/rejected": -260.2021179199219, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.8591018915176392, "rewards/margins": 6.189428806304932, "rewards/rejected": -5.330327033996582, "step": 4852 }, { "epoch": 1.07, "learning_rate": 9.218294230225908e-06, "logits/chosen": -1.7660539150238037, "logits/rejected": -1.7660539150238037, "logps/chosen": -107.54159545898438, "logps/rejected": -107.54159545898438, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.866774082183838, "rewards/margins": 0.0, "rewards/rejected": -4.866774082183838, "step": 4853 }, { "epoch": 1.07, "learning_rate": 9.217331694767291e-06, "logits/chosen": -1.8066993951797485, "logits/rejected": -1.7584583759307861, "logps/chosen": -125.5981216430664, "logps/rejected": -198.3743896484375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.1293396949768066, "rewards/margins": 5.452463626861572, "rewards/rejected": -7.581803321838379, "step": 4854 }, { "epoch": 1.07, "learning_rate": 9.21636861739169e-06, "logits/chosen": -1.5532609224319458, "logits/rejected": -1.5749973058700562, "logps/chosen": -219.09161376953125, "logps/rejected": -137.9676055908203, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 2.428680419921875, "rewards/margins": 6.292060852050781, "rewards/rejected": -3.8633804321289062, "step": 4855 }, { "epoch": 1.07, "learning_rate": 9.215404998222856e-06, "logits/chosen": -1.3349109888076782, "logits/rejected": -1.2638546228408813, "logps/chosen": -113.30467987060547, "logps/rejected": -182.8738250732422, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -0.7541908621788025, "rewards/margins": 3.507169246673584, "rewards/rejected": -4.261360168457031, "step": 4856 }, { "epoch": 1.08, "learning_rate": 9.214440837384612e-06, "logits/chosen": -1.3634631633758545, "logits/rejected": -0.9964481592178345, "logps/chosen": -132.49270629882812, "logps/rejected": -1216.04443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3297981023788452, "rewards/margins": 104.81753540039062, "rewards/rejected": -106.14733123779297, "step": 4857 }, { "epoch": 1.08, "learning_rate": 9.213476135000853e-06, "logits/chosen": -1.3334625959396362, "logits/rejected": -1.337234377861023, "logps/chosen": -127.55426025390625, "logps/rejected": -161.70069885253906, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -3.377854108810425, "rewards/margins": 5.040961265563965, "rewards/rejected": -8.418815612792969, "step": 4858 }, { "epoch": 1.08, "learning_rate": 9.21251089119554e-06, "logits/chosen": -1.1849861145019531, "logits/rejected": -1.2363991737365723, "logps/chosen": -238.07192993164062, "logps/rejected": -169.46917724609375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.6715423464775085, "rewards/margins": 6.693026065826416, "rewards/rejected": -6.021483898162842, "step": 4859 }, { "epoch": 1.08, "learning_rate": 9.211545106092706e-06, "logits/chosen": -1.4553956985473633, "logits/rejected": -1.4553956985473633, "logps/chosen": -216.48812866210938, "logps/rejected": -216.48812866210938, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -6.4594407081604, "rewards/margins": 0.0, "rewards/rejected": -6.4594407081604, "step": 4860 }, { "epoch": 1.08, "learning_rate": 9.210578779816449e-06, "logits/chosen": -1.4584068059921265, "logits/rejected": -1.5384970903396606, "logps/chosen": -178.5609130859375, "logps/rejected": -172.4431915283203, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -6.0015869140625, "rewards/margins": 7.678318977355957, "rewards/rejected": -13.679905891418457, "step": 4861 }, { "epoch": 1.08, "learning_rate": 9.20961191249094e-06, "logits/chosen": -1.1504933834075928, "logits/rejected": -1.1585323810577393, "logps/chosen": -55.161643981933594, "logps/rejected": -73.72653198242188, "loss": 0.2217, "rewards/accuracies": 1.0, "rewards/chosen": -4.182338237762451, "rewards/margins": 0.5902318954467773, "rewards/rejected": -4.7725701332092285, "step": 4862 }, { "epoch": 1.08, "learning_rate": 9.208644504240418e-06, "logits/chosen": -1.350792407989502, "logits/rejected": -1.350792407989502, "logps/chosen": -170.70501708984375, "logps/rejected": -170.70501708984375, "loss": 0.349, "rewards/accuracies": 0.0, "rewards/chosen": -2.3072235584259033, "rewards/margins": 0.0, "rewards/rejected": -2.3072235584259033, "step": 4863 }, { "epoch": 1.08, "learning_rate": 9.207676555189196e-06, "logits/chosen": -1.433465600013733, "logits/rejected": -1.4269479513168335, "logps/chosen": -80.85115814208984, "logps/rejected": -120.80728149414062, "loss": 0.4012, "rewards/accuracies": 1.0, "rewards/chosen": -0.39605483412742615, "rewards/margins": 2.1595635414123535, "rewards/rejected": -2.5556182861328125, "step": 4864 }, { "epoch": 1.08, "learning_rate": 9.206708065461652e-06, "logits/chosen": -1.5602947473526, "logits/rejected": -1.6222310066223145, "logps/chosen": -99.56517028808594, "logps/rejected": -110.68064880371094, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.0409226417541504, "rewards/margins": 5.911206245422363, "rewards/rejected": -7.952128887176514, "step": 4865 }, { "epoch": 1.08, "learning_rate": 9.205739035182236e-06, "logits/chosen": -1.1737993955612183, "logits/rejected": -1.0568040609359741, "logps/chosen": -51.157684326171875, "logps/rejected": -154.0146484375, "loss": 1.1977, "rewards/accuracies": 0.0, "rewards/chosen": -4.052064418792725, "rewards/margins": -1.620417594909668, "rewards/rejected": -2.4316468238830566, "step": 4866 }, { "epoch": 1.08, "learning_rate": 9.204769464475462e-06, "logits/chosen": -1.0769060850143433, "logits/rejected": -0.9447060823440552, "logps/chosen": -119.50907897949219, "logps/rejected": -336.0135192871094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.4073532819747925, "rewards/margins": 9.722147941589355, "rewards/rejected": -11.129501342773438, "step": 4867 }, { "epoch": 1.08, "learning_rate": 9.20379935346592e-06, "logits/chosen": -1.3267170190811157, "logits/rejected": -1.214482069015503, "logps/chosen": -94.34010314941406, "logps/rejected": -234.01588439941406, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.8420089483261108, "rewards/margins": 4.025042533874512, "rewards/rejected": -5.867051601409912, "step": 4868 }, { "epoch": 1.08, "learning_rate": 9.202828702278265e-06, "logits/chosen": -1.4714622497558594, "logits/rejected": -1.5554717779159546, "logps/chosen": -182.0367431640625, "logps/rejected": -123.60018920898438, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": 3.7554931640625, "rewards/margins": 2.608107089996338, "rewards/rejected": 1.1473861932754517, "step": 4869 }, { "epoch": 1.08, "learning_rate": 9.201857511037228e-06, "logits/chosen": -1.5044152736663818, "logits/rejected": -1.393216609954834, "logps/chosen": -88.62571716308594, "logps/rejected": -227.85214233398438, "loss": 0.2906, "rewards/accuracies": 1.0, "rewards/chosen": -0.0244598388671875, "rewards/margins": 9.972708702087402, "rewards/rejected": -9.99716854095459, "step": 4870 }, { "epoch": 1.08, "learning_rate": 9.200885779867601e-06, "logits/chosen": -1.5823160409927368, "logits/rejected": -1.6122187376022339, "logps/chosen": -102.51290893554688, "logps/rejected": -81.10260009765625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.8683158755302429, "rewards/margins": 6.768503665924072, "rewards/rejected": -5.900187969207764, "step": 4871 }, { "epoch": 1.08, "learning_rate": 9.199913508894251e-06, "logits/chosen": -1.3164825439453125, "logits/rejected": -0.47578534483909607, "logps/chosen": -90.930908203125, "logps/rejected": -404.3019104003906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.08513259887695312, "rewards/margins": 28.199586868286133, "rewards/rejected": -28.11445426940918, "step": 4872 }, { "epoch": 1.08, "learning_rate": 9.198940698242108e-06, "logits/chosen": -1.0344657897949219, "logits/rejected": -0.47104161977767944, "logps/chosen": -83.66325378417969, "logps/rejected": -633.9841918945312, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -0.2903076112270355, "rewards/margins": 49.60419464111328, "rewards/rejected": -49.894500732421875, "step": 4873 }, { "epoch": 1.08, "learning_rate": 9.197967348036182e-06, "logits/chosen": -1.3077399730682373, "logits/rejected": -1.2626625299453735, "logps/chosen": -108.37779998779297, "logps/rejected": -206.17276000976562, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.3115799129009247, "rewards/margins": 5.2935380935668945, "rewards/rejected": -5.6051177978515625, "step": 4874 }, { "epoch": 1.08, "learning_rate": 9.196993458401544e-06, "logits/chosen": -1.3828656673431396, "logits/rejected": -1.4165419340133667, "logps/chosen": -43.15123748779297, "logps/rejected": -19.654714584350586, "loss": 0.2973, "rewards/accuracies": 1.0, "rewards/chosen": -1.1908706426620483, "rewards/margins": 0.2199859619140625, "rewards/rejected": -1.4108566045761108, "step": 4875 }, { "epoch": 1.08, "learning_rate": 9.196019029463335e-06, "logits/chosen": -1.038659930229187, "logits/rejected": -0.9752183556556702, "logps/chosen": -138.1148681640625, "logps/rejected": -255.45700073242188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 1.4904450178146362, "rewards/margins": 6.172481060028076, "rewards/rejected": -4.68203592300415, "step": 4876 }, { "epoch": 1.08, "learning_rate": 9.195044061346767e-06, "logits/chosen": -0.9259746074676514, "logits/rejected": -0.9259746074676514, "logps/chosen": -160.88986206054688, "logps/rejected": -160.88986206054688, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -7.1930975914001465, "rewards/margins": 0.0, "rewards/rejected": -7.1930975914001465, "step": 4877 }, { "epoch": 1.08, "learning_rate": 9.194068554177123e-06, "logits/chosen": -1.098675012588501, "logits/rejected": -1.0832716226577759, "logps/chosen": -67.13626098632812, "logps/rejected": -80.3838119506836, "loss": 0.1515, "rewards/accuracies": 1.0, "rewards/chosen": -1.439361572265625, "rewards/margins": 1.0442726612091064, "rewards/rejected": -2.4836342334747314, "step": 4878 }, { "epoch": 1.08, "learning_rate": 9.19309250807975e-06, "logits/chosen": -1.3292680978775024, "logits/rejected": -1.2632222175598145, "logps/chosen": -273.0953369140625, "logps/rejected": -354.21502685546875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 1.2674407958984375, "rewards/margins": 9.75195026397705, "rewards/rejected": -8.484509468078613, "step": 4879 }, { "epoch": 1.08, "learning_rate": 9.192115923180071e-06, "logits/chosen": -1.2052412033081055, "logits/rejected": -1.1345138549804688, "logps/chosen": -174.41482543945312, "logps/rejected": -299.8959045410156, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.09714660793542862, "rewards/margins": 7.065664768218994, "rewards/rejected": -6.968518257141113, "step": 4880 }, { "epoch": 1.08, "learning_rate": 9.191138799603574e-06, "logits/chosen": -1.37080979347229, "logits/rejected": -1.3099887371063232, "logps/chosen": -53.051692962646484, "logps/rejected": -144.3710174560547, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.4477970600128174, "rewards/margins": 9.108220100402832, "rewards/rejected": -7.6604228019714355, "step": 4881 }, { "epoch": 1.08, "learning_rate": 9.190161137475814e-06, "logits/chosen": -1.3203420639038086, "logits/rejected": -1.3297374248504639, "logps/chosen": -95.00430297851562, "logps/rejected": -131.05104064941406, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.1319854259490967, "rewards/margins": 5.483004570007324, "rewards/rejected": -7.614990234375, "step": 4882 }, { "epoch": 1.08, "learning_rate": 9.189182936922424e-06, "logits/chosen": -1.658273458480835, "logits/rejected": -1.7310678958892822, "logps/chosen": -183.43890380859375, "logps/rejected": -88.09638214111328, "loss": 0.2121, "rewards/accuracies": 1.0, "rewards/chosen": -4.204158306121826, "rewards/margins": 1.5045042037963867, "rewards/rejected": -5.708662509918213, "step": 4883 }, { "epoch": 1.08, "learning_rate": 9.188204198069096e-06, "logits/chosen": -1.228831171989441, "logits/rejected": -1.1641725301742554, "logps/chosen": -96.73361206054688, "logps/rejected": -189.34552001953125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.12540435791015625, "rewards/margins": 7.075778484344482, "rewards/rejected": -7.201182842254639, "step": 4884 }, { "epoch": 1.08, "learning_rate": 9.187224921041595e-06, "logits/chosen": -1.2455039024353027, "logits/rejected": -1.2692822217941284, "logps/chosen": -225.34967041015625, "logps/rejected": -139.20199584960938, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": 1.401574730873108, "rewards/margins": 4.713810920715332, "rewards/rejected": -3.3122360706329346, "step": 4885 }, { "epoch": 1.08, "learning_rate": 9.186245105965758e-06, "logits/chosen": -1.3402456045150757, "logits/rejected": -1.3660070896148682, "logps/chosen": -124.57435607910156, "logps/rejected": -134.42855834960938, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 3.489431858062744, "rewards/margins": 5.157928466796875, "rewards/rejected": -1.6684967279434204, "step": 4886 }, { "epoch": 1.08, "learning_rate": 9.18526475296749e-06, "logits/chosen": -1.4659181833267212, "logits/rejected": -1.4264156818389893, "logps/chosen": -217.61929321289062, "logps/rejected": -191.22637939453125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 0.265829473733902, "rewards/margins": 5.121374607086182, "rewards/rejected": -4.8555450439453125, "step": 4887 }, { "epoch": 1.08, "learning_rate": 9.184283862172763e-06, "logits/chosen": -1.2085607051849365, "logits/rejected": -1.1498827934265137, "logps/chosen": -101.12216186523438, "logps/rejected": -181.44647216796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.0748672485351562, "rewards/margins": 9.752433776855469, "rewards/rejected": -8.677566528320312, "step": 4888 }, { "epoch": 1.08, "learning_rate": 9.183302433707616e-06, "logits/chosen": -1.3431861400604248, "logits/rejected": -1.285514235496521, "logps/chosen": -135.1626739501953, "logps/rejected": -193.06407165527344, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 4.1204423904418945, "rewards/margins": 5.797702312469482, "rewards/rejected": -1.6772598028182983, "step": 4889 }, { "epoch": 1.08, "learning_rate": 9.182320467698164e-06, "logits/chosen": -1.713312029838562, "logits/rejected": -1.713312029838562, "logps/chosen": -245.98162841796875, "logps/rejected": -245.98162841796875, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -9.57733154296875, "rewards/margins": 0.0, "rewards/rejected": -9.57733154296875, "step": 4890 }, { "epoch": 1.08, "learning_rate": 9.181337964270585e-06, "logits/chosen": -1.6242173910140991, "logits/rejected": -1.5050690174102783, "logps/chosen": -105.67167663574219, "logps/rejected": -239.456298828125, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.5123100280761719, "rewards/margins": 4.493016719818115, "rewards/rejected": -6.005326747894287, "step": 4891 }, { "epoch": 1.08, "learning_rate": 9.180354923551129e-06, "logits/chosen": -1.6286104917526245, "logits/rejected": -1.6286104917526245, "logps/chosen": -127.21593475341797, "logps/rejected": -127.21593475341797, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -3.6188790798187256, "rewards/margins": 0.0, "rewards/rejected": -3.6188790798187256, "step": 4892 }, { "epoch": 1.08, "learning_rate": 9.179371345666115e-06, "logits/chosen": -1.0476634502410889, "logits/rejected": -0.6265667676925659, "logps/chosen": -195.70237731933594, "logps/rejected": -943.05517578125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 4.028377056121826, "rewards/margins": 87.15924835205078, "rewards/rejected": -83.13087463378906, "step": 4893 }, { "epoch": 1.08, "learning_rate": 9.178387230741932e-06, "logits/chosen": -1.4110225439071655, "logits/rejected": -1.5130689144134521, "logps/chosen": -161.1171112060547, "logps/rejected": -164.8287353515625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.4389495849609375, "rewards/margins": 9.613609313964844, "rewards/rejected": -9.174659729003906, "step": 4894 }, { "epoch": 1.08, "learning_rate": 9.177402578905032e-06, "logits/chosen": -1.2872074842453003, "logits/rejected": -1.2343246936798096, "logps/chosen": -114.16593933105469, "logps/rejected": -83.92285919189453, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/chosen": -3.4971344470977783, "rewards/margins": 2.8625528812408447, "rewards/rejected": -6.359687328338623, "step": 4895 }, { "epoch": 1.08, "learning_rate": 9.176417390281944e-06, "logits/chosen": -1.530638337135315, "logits/rejected": -1.5391862392425537, "logps/chosen": -95.909423828125, "logps/rejected": -123.19995880126953, "loss": 0.0372, "rewards/accuracies": 1.0, "rewards/chosen": -0.08712921291589737, "rewards/margins": 2.8978264331817627, "rewards/rejected": -2.9849555492401123, "step": 4896 }, { "epoch": 1.08, "learning_rate": 9.17543166499926e-06, "logits/chosen": -1.2151328325271606, "logits/rejected": -1.1434073448181152, "logps/chosen": -140.38070678710938, "logps/rejected": -231.63662719726562, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": -1.1851364374160767, "rewards/margins": 2.4892654418945312, "rewards/rejected": -3.6744019985198975, "step": 4897 }, { "epoch": 1.08, "learning_rate": 9.174445403183645e-06, "logits/chosen": -1.315962553024292, "logits/rejected": -1.315962553024292, "logps/chosen": -206.87152099609375, "logps/rejected": -206.87152099609375, "loss": 0.4001, "rewards/accuracies": 0.0, "rewards/chosen": -11.363264083862305, "rewards/margins": 0.0, "rewards/rejected": -11.363264083862305, "step": 4898 }, { "epoch": 1.08, "learning_rate": 9.173458604961832e-06, "logits/chosen": -1.3841674327850342, "logits/rejected": -1.490290641784668, "logps/chosen": -291.2490539550781, "logps/rejected": -209.0480194091797, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.5933136343955994, "rewards/margins": 9.918911933898926, "rewards/rejected": -9.32559871673584, "step": 4899 }, { "epoch": 1.08, "learning_rate": 9.17247127046062e-06, "logits/chosen": -1.2672115564346313, "logits/rejected": -1.2393317222595215, "logps/chosen": -80.49259185791016, "logps/rejected": -124.61001586914062, "loss": 0.1088, "rewards/accuracies": 1.0, "rewards/chosen": -1.7082222700119019, "rewards/margins": 1.4158920049667358, "rewards/rejected": -3.1241142749786377, "step": 4900 }, { "epoch": 1.08, "learning_rate": 9.17148339980688e-06, "logits/chosen": -1.3297069072723389, "logits/rejected": -1.3297069072723389, "logps/chosen": -277.62933349609375, "logps/rejected": -277.62933349609375, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -7.892834663391113, "rewards/margins": 0.0, "rewards/rejected": -7.892834663391113, "step": 4901 }, { "epoch": 1.08, "learning_rate": 9.170494993127552e-06, "logits/chosen": -1.2924153804779053, "logits/rejected": -1.2924153804779053, "logps/chosen": -187.92257690429688, "logps/rejected": -187.92257690429688, "loss": 0.3497, "rewards/accuracies": 0.0, "rewards/chosen": -3.6934616565704346, "rewards/margins": 0.0, "rewards/rejected": -3.6934616565704346, "step": 4902 }, { "epoch": 1.09, "learning_rate": 9.169506050549641e-06, "logits/chosen": -1.7026724815368652, "logits/rejected": -1.6888258457183838, "logps/chosen": -121.62525939941406, "logps/rejected": -224.61834716796875, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": -1.250078558921814, "rewards/margins": 7.8656535148620605, "rewards/rejected": -9.115732192993164, "step": 4903 }, { "epoch": 1.09, "learning_rate": 9.168516572200227e-06, "logits/chosen": -1.5247949361801147, "logits/rejected": -1.5067757368087769, "logps/chosen": -71.3653793334961, "logps/rejected": -96.60011291503906, "loss": 0.366, "rewards/accuracies": 1.0, "rewards/chosen": 1.2120980024337769, "rewards/margins": 3.230541229248047, "rewards/rejected": -2.0184433460235596, "step": 4904 }, { "epoch": 1.09, "learning_rate": 9.167526558206455e-06, "logits/chosen": -1.5628653764724731, "logits/rejected": -1.6193984746932983, "logps/chosen": -146.52877807617188, "logps/rejected": -107.47992706298828, "loss": 0.3678, "rewards/accuracies": 0.0, "rewards/chosen": -6.117057800292969, "rewards/margins": -0.08175039291381836, "rewards/rejected": -6.03530740737915, "step": 4905 }, { "epoch": 1.09, "learning_rate": 9.166536008695536e-06, "logits/chosen": -1.5299134254455566, "logits/rejected": -1.5875450372695923, "logps/chosen": -252.28253173828125, "logps/rejected": -156.10252380371094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.162524461746216, "rewards/margins": 10.674308776855469, "rewards/rejected": -8.511784553527832, "step": 4906 }, { "epoch": 1.09, "learning_rate": 9.165544923794758e-06, "logits/chosen": -1.7671387195587158, "logits/rejected": -1.7364267110824585, "logps/chosen": -111.33021545410156, "logps/rejected": -162.79867553710938, "loss": 0.0724, "rewards/accuracies": 1.0, "rewards/chosen": 0.4302307069301605, "rewards/margins": 3.8138792514801025, "rewards/rejected": -3.383648633956909, "step": 4907 }, { "epoch": 1.09, "learning_rate": 9.164553303631472e-06, "logits/chosen": -1.1812137365341187, "logits/rejected": -1.2465240955352783, "logps/chosen": -211.909912109375, "logps/rejected": -194.8642578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.6173858642578125, "rewards/margins": 9.197261810302734, "rewards/rejected": -7.579875946044922, "step": 4908 }, { "epoch": 1.09, "learning_rate": 9.163561148333097e-06, "logits/chosen": -1.7783070802688599, "logits/rejected": -1.8392574787139893, "logps/chosen": -181.74525451660156, "logps/rejected": -231.76580810546875, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -1.6439727544784546, "rewards/margins": 5.901464939117432, "rewards/rejected": -7.545437812805176, "step": 4909 }, { "epoch": 1.09, "learning_rate": 9.162568458027122e-06, "logits/chosen": -1.5638587474822998, "logits/rejected": -1.6877940893173218, "logps/chosen": -298.1644287109375, "logps/rejected": -164.82415771484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.0714844465255737, "rewards/margins": 7.1150407791137695, "rewards/rejected": -8.186525344848633, "step": 4910 }, { "epoch": 1.09, "learning_rate": 9.16157523284111e-06, "logits/chosen": -1.042510986328125, "logits/rejected": -1.0487216711044312, "logps/chosen": -161.612548828125, "logps/rejected": -175.41989135742188, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": 2.963388204574585, "rewards/margins": 3.9249117374420166, "rewards/rejected": -0.9615234732627869, "step": 4911 }, { "epoch": 1.09, "learning_rate": 9.16058147290268e-06, "logits/chosen": -1.0554612874984741, "logits/rejected": -0.9884810447692871, "logps/chosen": -94.14016723632812, "logps/rejected": -103.21798706054688, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -2.1361076831817627, "rewards/margins": 3.3526628017425537, "rewards/rejected": -5.488770484924316, "step": 4912 }, { "epoch": 1.09, "learning_rate": 9.159587178339535e-06, "logits/chosen": -1.33314847946167, "logits/rejected": -1.2568942308425903, "logps/chosen": -143.961669921875, "logps/rejected": -171.9481658935547, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 1.3780701160430908, "rewards/margins": 4.451684951782227, "rewards/rejected": -3.0736145973205566, "step": 4913 }, { "epoch": 1.09, "learning_rate": 9.158592349279439e-06, "logits/chosen": -1.4546595811843872, "logits/rejected": -1.4081268310546875, "logps/chosen": -74.83299255371094, "logps/rejected": -125.7206039428711, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 0.651263415813446, "rewards/margins": 4.5954108238220215, "rewards/rejected": -3.9441475868225098, "step": 4914 }, { "epoch": 1.09, "learning_rate": 9.157596985850218e-06, "logits/chosen": -0.910677433013916, "logits/rejected": -0.8001856207847595, "logps/chosen": -214.45068359375, "logps/rejected": -351.6119079589844, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.3850343227386475, "rewards/margins": 8.330700874328613, "rewards/rejected": -10.71573543548584, "step": 4915 }, { "epoch": 1.09, "learning_rate": 9.156601088179785e-06, "logits/chosen": -1.541535496711731, "logits/rejected": -1.5590900182724, "logps/chosen": -105.56379699707031, "logps/rejected": -144.09603881835938, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.326455682516098, "rewards/margins": 3.808173656463623, "rewards/rejected": -4.134629249572754, "step": 4916 }, { "epoch": 1.09, "learning_rate": 9.1556046563961e-06, "logits/chosen": -1.3438361883163452, "logits/rejected": -1.4713705778121948, "logps/chosen": -193.39817810058594, "logps/rejected": -99.39324951171875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": 1.72991943359375, "rewards/margins": 4.526486396789551, "rewards/rejected": -2.7965667247772217, "step": 4917 }, { "epoch": 1.09, "learning_rate": 9.154607690627207e-06, "logits/chosen": -1.3818424940109253, "logits/rejected": -1.359405279159546, "logps/chosen": -87.28260803222656, "logps/rejected": -184.38101196289062, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.0088348388671875, "rewards/margins": 7.036547660827637, "rewards/rejected": -8.045382499694824, "step": 4918 }, { "epoch": 1.09, "learning_rate": 9.153610191001214e-06, "logits/chosen": -1.7275991439819336, "logits/rejected": -1.6883337497711182, "logps/chosen": -80.01904296875, "logps/rejected": -164.21778869628906, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.6006698608398438, "rewards/margins": 4.467230319976807, "rewards/rejected": -5.06790018081665, "step": 4919 }, { "epoch": 1.09, "learning_rate": 9.152612157646297e-06, "logits/chosen": -1.6298954486846924, "logits/rejected": -1.6273094415664673, "logps/chosen": -62.93187713623047, "logps/rejected": -91.88519287109375, "loss": 0.369, "rewards/accuracies": 1.0, "rewards/chosen": 0.0034835815895348787, "rewards/margins": 3.0808186531066895, "rewards/rejected": -3.0773351192474365, "step": 4920 }, { "epoch": 1.09, "learning_rate": 9.1516135906907e-06, "logits/chosen": -1.5598092079162598, "logits/rejected": -1.6061549186706543, "logps/chosen": -158.48866271972656, "logps/rejected": -171.4639129638672, "loss": 0.0938, "rewards/accuracies": 1.0, "rewards/chosen": -2.9651474952697754, "rewards/margins": 1.842238426208496, "rewards/rejected": -4.8073859214782715, "step": 4921 }, { "epoch": 1.09, "learning_rate": 9.150614490262736e-06, "logits/chosen": -1.5842318534851074, "logits/rejected": -1.5280547142028809, "logps/chosen": -149.87379455566406, "logps/rejected": -202.5594482421875, "loss": 0.5764, "rewards/accuracies": 0.0, "rewards/chosen": -4.119070529937744, "rewards/margins": -0.5471742153167725, "rewards/rejected": -3.5718963146209717, "step": 4922 }, { "epoch": 1.09, "learning_rate": 9.149614856490788e-06, "logits/chosen": -1.6394723653793335, "logits/rejected": -1.570604920387268, "logps/chosen": -98.64329528808594, "logps/rejected": -171.78964233398438, "loss": 0.0436, "rewards/accuracies": 1.0, "rewards/chosen": 0.20249024033546448, "rewards/margins": 3.258606195449829, "rewards/rejected": -3.0561158657073975, "step": 4923 }, { "epoch": 1.09, "learning_rate": 9.148614689503307e-06, "logits/chosen": -1.371775507926941, "logits/rejected": -1.371775507926941, "logps/chosen": -223.74542236328125, "logps/rejected": -223.74542236328125, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -8.647467613220215, "rewards/margins": 0.0, "rewards/rejected": -8.647467613220215, "step": 4924 }, { "epoch": 1.09, "learning_rate": 9.147613989428809e-06, "logits/chosen": -1.256111979484558, "logits/rejected": -1.1645333766937256, "logps/chosen": -73.88331604003906, "logps/rejected": -171.42713928222656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.6882247924804688, "rewards/margins": 8.637134552001953, "rewards/rejected": -7.948909282684326, "step": 4925 }, { "epoch": 1.09, "learning_rate": 9.146612756395888e-06, "logits/chosen": -1.394644021987915, "logits/rejected": -1.3865580558776855, "logps/chosen": -111.50537109375, "logps/rejected": -164.47715759277344, "loss": 0.2008, "rewards/accuracies": 1.0, "rewards/chosen": 0.0033111572265625, "rewards/margins": 1.5056060552597046, "rewards/rejected": -1.502294898033142, "step": 4926 }, { "epoch": 1.09, "learning_rate": 9.145610990533193e-06, "logits/chosen": -1.4461320638656616, "logits/rejected": -1.3864599466323853, "logps/chosen": -180.4434356689453, "logps/rejected": -270.7455749511719, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.264913946390152, "rewards/margins": 4.175250053405762, "rewards/rejected": -4.440164089202881, "step": 4927 }, { "epoch": 1.09, "learning_rate": 9.144608691969452e-06, "logits/chosen": -1.040998101234436, "logits/rejected": -0.986751139163971, "logps/chosen": -101.26258850097656, "logps/rejected": -316.6956787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 7.596853733062744, "rewards/margins": 15.575891494750977, "rewards/rejected": -7.979037761688232, "step": 4928 }, { "epoch": 1.09, "learning_rate": 9.143605860833459e-06, "logits/chosen": -1.1758780479431152, "logits/rejected": -0.8941201567649841, "logps/chosen": -128.5267791748047, "logps/rejected": -799.1209716796875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.8296043276786804, "rewards/margins": 68.16345977783203, "rewards/rejected": -68.9930648803711, "step": 4929 }, { "epoch": 1.09, "learning_rate": 9.142602497254071e-06, "logits/chosen": -1.3597172498703003, "logits/rejected": -1.5147624015808105, "logps/chosen": -165.7915802001953, "logps/rejected": -158.09291076660156, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.10325165092945099, "rewards/margins": 6.641038417816162, "rewards/rejected": -6.537786960601807, "step": 4930 }, { "epoch": 1.09, "learning_rate": 9.141598601360225e-06, "logits/chosen": -1.8393930196762085, "logits/rejected": -1.8722118139266968, "logps/chosen": -78.20091247558594, "logps/rejected": -162.0357666015625, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": 0.2779648005962372, "rewards/margins": 9.990448951721191, "rewards/rejected": -9.712484359741211, "step": 4931 }, { "epoch": 1.09, "learning_rate": 9.14059417328091e-06, "logits/chosen": -1.105533480644226, "logits/rejected": -1.0692883729934692, "logps/chosen": -89.44239807128906, "logps/rejected": -121.71220397949219, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 1.8536003828048706, "rewards/margins": 6.274160385131836, "rewards/rejected": -4.420559883117676, "step": 4932 }, { "epoch": 1.09, "learning_rate": 9.139589213145202e-06, "logits/chosen": -0.8081423044204712, "logits/rejected": -0.8204455971717834, "logps/chosen": -180.1768341064453, "logps/rejected": -206.33380126953125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -3.4029786586761475, "rewards/margins": 7.570657730102539, "rewards/rejected": -10.973636627197266, "step": 4933 }, { "epoch": 1.09, "learning_rate": 9.138583721082229e-06, "logits/chosen": -1.620147943496704, "logits/rejected": -1.6163638830184937, "logps/chosen": -92.78582763671875, "logps/rejected": -263.1042785644531, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": 0.04483642801642418, "rewards/margins": 10.231011390686035, "rewards/rejected": -10.186175346374512, "step": 4934 }, { "epoch": 1.09, "learning_rate": 9.137577697221195e-06, "logits/chosen": -1.292180061340332, "logits/rejected": -1.2721874713897705, "logps/chosen": -124.85350036621094, "logps/rejected": -174.06781005859375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5104324221611023, "rewards/margins": 5.826353073120117, "rewards/rejected": -6.336785316467285, "step": 4935 }, { "epoch": 1.09, "learning_rate": 9.136571141691376e-06, "logits/chosen": -1.469604730606079, "logits/rejected": -1.3521381616592407, "logps/chosen": -94.28221130371094, "logps/rejected": -148.59292602539062, "loss": 0.6784, "rewards/accuracies": 0.0, "rewards/chosen": -1.8802673816680908, "rewards/margins": -1.044886827468872, "rewards/rejected": -0.8353805541992188, "step": 4936 }, { "epoch": 1.09, "learning_rate": 9.135564054622108e-06, "logits/chosen": -1.3671928644180298, "logits/rejected": -1.3907103538513184, "logps/chosen": -120.46009063720703, "logps/rejected": -125.55364990234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.37294846773147583, "rewards/margins": 7.510206699371338, "rewards/rejected": -7.137258052825928, "step": 4937 }, { "epoch": 1.09, "learning_rate": 9.134556436142801e-06, "logits/chosen": -1.4598455429077148, "logits/rejected": -1.773347020149231, "logps/chosen": -272.38751220703125, "logps/rejected": -181.61538696289062, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": -1.653497338294983, "rewards/margins": 5.833982467651367, "rewards/rejected": -7.4874796867370605, "step": 4938 }, { "epoch": 1.09, "learning_rate": 9.133548286382932e-06, "logits/chosen": -1.1741983890533447, "logits/rejected": -1.2376008033752441, "logps/chosen": -217.02723693847656, "logps/rejected": -105.37718200683594, "loss": 0.0391, "rewards/accuracies": 1.0, "rewards/chosen": 0.02626495435833931, "rewards/margins": 2.7607223987579346, "rewards/rejected": -2.734457492828369, "step": 4939 }, { "epoch": 1.09, "learning_rate": 9.132539605472044e-06, "logits/chosen": -1.122695803642273, "logits/rejected": -1.1464834213256836, "logps/chosen": -200.10498046875, "logps/rejected": -270.28472900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.3756027221679688, "rewards/margins": 12.172128677368164, "rewards/rejected": -10.796525955200195, "step": 4940 }, { "epoch": 1.09, "learning_rate": 9.131530393539752e-06, "logits/chosen": -1.3399631977081299, "logits/rejected": -1.391692876815796, "logps/chosen": -150.22332763671875, "logps/rejected": -234.6546173095703, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -2.2092225551605225, "rewards/margins": 4.946832656860352, "rewards/rejected": -7.156054973602295, "step": 4941 }, { "epoch": 1.09, "learning_rate": 9.130520650715735e-06, "logits/chosen": -1.4686622619628906, "logits/rejected": -1.4556572437286377, "logps/chosen": -133.6339111328125, "logps/rejected": -193.9486846923828, "loss": 0.3496, "rewards/accuracies": 1.0, "rewards/chosen": -1.5586906671524048, "rewards/margins": 5.0962982177734375, "rewards/rejected": -6.654988765716553, "step": 4942 }, { "epoch": 1.09, "learning_rate": 9.129510377129745e-06, "logits/chosen": -1.0970609188079834, "logits/rejected": -1.0791822671890259, "logps/chosen": -170.45928955078125, "logps/rejected": -136.26373291015625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 2.219280958175659, "rewards/margins": 6.914467811584473, "rewards/rejected": -4.695186614990234, "step": 4943 }, { "epoch": 1.09, "learning_rate": 9.128499572911596e-06, "logits/chosen": -1.1022059917449951, "logits/rejected": -1.0420862436294556, "logps/chosen": -236.12640380859375, "logps/rejected": -173.0255126953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.208425998687744, "rewards/margins": 11.822940826416016, "rewards/rejected": -9.61451530456543, "step": 4944 }, { "epoch": 1.09, "learning_rate": 9.12748823819118e-06, "logits/chosen": -1.298948884010315, "logits/rejected": -1.2862873077392578, "logps/chosen": -88.77433776855469, "logps/rejected": -86.64060974121094, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.9364761114120483, "rewards/margins": 3.7383127212524414, "rewards/rejected": -5.674788951873779, "step": 4945 }, { "epoch": 1.09, "learning_rate": 9.126476373098446e-06, "logits/chosen": -1.2773975133895874, "logits/rejected": -1.3098522424697876, "logps/chosen": -140.9650115966797, "logps/rejected": -116.56017303466797, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.04157562181353569, "rewards/margins": 8.069731712341309, "rewards/rejected": -8.111307144165039, "step": 4946 }, { "epoch": 1.09, "learning_rate": 9.125463977763417e-06, "logits/chosen": -1.3192940950393677, "logits/rejected": -1.2883940935134888, "logps/chosen": -114.04779815673828, "logps/rejected": -110.13555145263672, "loss": 0.1212, "rewards/accuracies": 1.0, "rewards/chosen": -0.3348960876464844, "rewards/margins": 1.2971222400665283, "rewards/rejected": -1.6320183277130127, "step": 4947 }, { "epoch": 1.1, "learning_rate": 9.124451052316185e-06, "logits/chosen": -1.1528040170669556, "logits/rejected": -1.0581021308898926, "logps/chosen": -116.69239044189453, "logps/rejected": -179.9595184326172, "loss": 0.1727, "rewards/accuracies": 1.0, "rewards/chosen": 0.3502052426338196, "rewards/margins": 0.8858757019042969, "rewards/rejected": -0.5356704592704773, "step": 4948 }, { "epoch": 1.1, "learning_rate": 9.123437596886909e-06, "logits/chosen": -1.3514260053634644, "logits/rejected": -1.3453527688980103, "logps/chosen": -114.4319076538086, "logps/rejected": -148.67874145507812, "loss": 0.1047, "rewards/accuracies": 1.0, "rewards/chosen": -2.950188398361206, "rewards/margins": 2.6905105113983154, "rewards/rejected": -5.6406989097595215, "step": 4949 }, { "epoch": 1.1, "learning_rate": 9.122423611605814e-06, "logits/chosen": -1.3338651657104492, "logits/rejected": -1.3066303730010986, "logps/chosen": -104.89220428466797, "logps/rejected": -173.17279052734375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 0.607648491859436, "rewards/margins": 4.322474002838135, "rewards/rejected": -3.714825391769409, "step": 4950 }, { "epoch": 1.1, "learning_rate": 9.121409096603193e-06, "logits/chosen": -1.4925308227539062, "logits/rejected": -1.4904789924621582, "logps/chosen": -91.05001831054688, "logps/rejected": -106.02928161621094, "loss": 0.1814, "rewards/accuracies": 1.0, "rewards/chosen": -1.4385483264923096, "rewards/margins": 0.8359389305114746, "rewards/rejected": -2.274487257003784, "step": 4951 }, { "epoch": 1.1, "learning_rate": 9.120394052009412e-06, "logits/chosen": -1.3375465869903564, "logits/rejected": -1.3101332187652588, "logps/chosen": -54.70014953613281, "logps/rejected": -131.2396697998047, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.5134391784667969, "rewards/margins": 6.5760416984558105, "rewards/rejected": -6.062602519989014, "step": 4952 }, { "epoch": 1.1, "learning_rate": 9.1193784779549e-06, "logits/chosen": -1.341132640838623, "logits/rejected": -1.325577974319458, "logps/chosen": -236.7545623779297, "logps/rejected": -276.65777587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.24329987168312073, "rewards/margins": 10.501842498779297, "rewards/rejected": -10.745141983032227, "step": 4953 }, { "epoch": 1.1, "learning_rate": 9.118362374570158e-06, "logits/chosen": -1.325170874595642, "logits/rejected": -1.3126811981201172, "logps/chosen": -165.80514526367188, "logps/rejected": -162.19007873535156, "loss": 0.0479, "rewards/accuracies": 1.0, "rewards/chosen": -0.4618667662143707, "rewards/margins": 3.9331769943237305, "rewards/rejected": -4.395043849945068, "step": 4954 }, { "epoch": 1.1, "learning_rate": 9.117345741985749e-06, "logits/chosen": -1.071577548980713, "logits/rejected": -1.088358759880066, "logps/chosen": -204.08348083496094, "logps/rejected": -181.87881469726562, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": 2.147007703781128, "rewards/margins": 4.1690473556518555, "rewards/rejected": -2.0220398902893066, "step": 4955 }, { "epoch": 1.1, "learning_rate": 9.116328580332309e-06, "logits/chosen": -1.056926965713501, "logits/rejected": -1.0780634880065918, "logps/chosen": -107.92901611328125, "logps/rejected": -115.6429214477539, "loss": 0.1062, "rewards/accuracies": 1.0, "rewards/chosen": -0.9830284118652344, "rewards/margins": 1.507267713546753, "rewards/rejected": -2.4902961254119873, "step": 4956 }, { "epoch": 1.1, "learning_rate": 9.115310889740545e-06, "logits/chosen": -1.2083837985992432, "logits/rejected": -1.152915596961975, "logps/chosen": -170.84573364257812, "logps/rejected": -106.76923370361328, "loss": 0.9372, "rewards/accuracies": 0.0, "rewards/chosen": -8.863654136657715, "rewards/margins": -0.8145875930786133, "rewards/rejected": -8.049066543579102, "step": 4957 }, { "epoch": 1.1, "learning_rate": 9.114292670341222e-06, "logits/chosen": -1.1657509803771973, "logits/rejected": -1.0960561037063599, "logps/chosen": -100.38410949707031, "logps/rejected": -275.8734130859375, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": 1.6167885065078735, "rewards/margins": 11.098206520080566, "rewards/rejected": -9.481417655944824, "step": 4958 }, { "epoch": 1.1, "learning_rate": 9.113273922265183e-06, "logits/chosen": -1.1538050174713135, "logits/rejected": -1.1035797595977783, "logps/chosen": -167.66207885742188, "logps/rejected": -327.09442138671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.6905288696289062, "rewards/margins": 9.39586353302002, "rewards/rejected": -8.705334663391113, "step": 4959 }, { "epoch": 1.1, "learning_rate": 9.112254645643332e-06, "logits/chosen": -1.4211474657058716, "logits/rejected": -1.355778694152832, "logps/chosen": -127.60993957519531, "logps/rejected": -221.5953369140625, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -3.8877029418945312, "rewards/margins": 3.2407379150390625, "rewards/rejected": -7.128440856933594, "step": 4960 }, { "epoch": 1.1, "learning_rate": 9.111234840606647e-06, "logits/chosen": -1.4174401760101318, "logits/rejected": -1.417381763458252, "logps/chosen": -104.83029174804688, "logps/rejected": -145.61593627929688, "loss": 0.0593, "rewards/accuracies": 1.0, "rewards/chosen": 0.16203156113624573, "rewards/margins": 2.077227830886841, "rewards/rejected": -1.9151962995529175, "step": 4961 }, { "epoch": 1.1, "learning_rate": 9.110214507286167e-06, "logits/chosen": -1.0436711311340332, "logits/rejected": -1.1057952642440796, "logps/chosen": -142.762939453125, "logps/rejected": -132.51144409179688, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -0.05638733133673668, "rewards/margins": 3.3642425537109375, "rewards/rejected": -3.4206299781799316, "step": 4962 }, { "epoch": 1.1, "learning_rate": 9.109193645813001e-06, "logits/chosen": -1.1677069664001465, "logits/rejected": -0.6316215395927429, "logps/chosen": -135.65499877929688, "logps/rejected": -1007.9734497070312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0006120204925537, "rewards/margins": 90.71371459960938, "rewards/rejected": -92.71432495117188, "step": 4963 }, { "epoch": 1.1, "learning_rate": 9.10817225631833e-06, "logits/chosen": -1.6225565671920776, "logits/rejected": -1.6283564567565918, "logps/chosen": -83.57684326171875, "logps/rejected": -148.44882202148438, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.26766204833984375, "rewards/margins": 6.134471416473389, "rewards/rejected": -6.402133464813232, "step": 4964 }, { "epoch": 1.1, "learning_rate": 9.107150338933403e-06, "logits/chosen": -1.5323501825332642, "logits/rejected": -1.541756510734558, "logps/chosen": -59.98514938354492, "logps/rejected": -125.01303100585938, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.7210147976875305, "rewards/margins": 5.123331546783447, "rewards/rejected": -5.844346523284912, "step": 4965 }, { "epoch": 1.1, "learning_rate": 9.10612789378953e-06, "logits/chosen": -1.1791911125183105, "logits/rejected": -1.1791911125183105, "logps/chosen": -206.2564697265625, "logps/rejected": -206.2564697265625, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -9.018915176391602, "rewards/margins": 0.0, "rewards/rejected": -9.018915176391602, "step": 4966 }, { "epoch": 1.1, "learning_rate": 9.105104921018092e-06, "logits/chosen": -1.6281481981277466, "logits/rejected": -1.8337476253509521, "logps/chosen": -190.20928955078125, "logps/rejected": -95.86091613769531, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 1.854090929031372, "rewards/margins": 8.489175796508789, "rewards/rejected": -6.635084629058838, "step": 4967 }, { "epoch": 1.1, "learning_rate": 9.10408142075054e-06, "logits/chosen": -1.0925124883651733, "logits/rejected": -0.9734663367271423, "logps/chosen": -219.58859252929688, "logps/rejected": -341.220458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.9038055539131165, "rewards/margins": 12.619125366210938, "rewards/rejected": -11.715319633483887, "step": 4968 }, { "epoch": 1.1, "learning_rate": 9.103057393118392e-06, "logits/chosen": -1.544331431388855, "logits/rejected": -1.572282075881958, "logps/chosen": -123.72381591796875, "logps/rejected": -113.1611328125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 0.146342471241951, "rewards/margins": 5.1729912757873535, "rewards/rejected": -5.026648998260498, "step": 4969 }, { "epoch": 1.1, "learning_rate": 9.102032838253232e-06, "logits/chosen": -1.3191227912902832, "logits/rejected": -1.2766467332839966, "logps/chosen": -94.79154968261719, "logps/rejected": -274.878662109375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.5529083609580994, "rewards/margins": 10.383743286132812, "rewards/rejected": -9.830835342407227, "step": 4970 }, { "epoch": 1.1, "learning_rate": 9.101007756286713e-06, "logits/chosen": -1.0285643339157104, "logits/rejected": -1.0285643339157104, "logps/chosen": -190.45431518554688, "logps/rejected": -190.45431518554688, "loss": 0.3508, "rewards/accuracies": 0.0, "rewards/chosen": -1.424713134765625, "rewards/margins": 0.0, "rewards/rejected": -1.424713134765625, "step": 4971 }, { "epoch": 1.1, "learning_rate": 9.099982147350558e-06, "logits/chosen": -1.03049635887146, "logits/rejected": -0.9383856058120728, "logps/chosen": -110.09544372558594, "logps/rejected": -225.5023193359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4642685055732727, "rewards/margins": 7.948179244995117, "rewards/rejected": -8.412447929382324, "step": 4972 }, { "epoch": 1.1, "learning_rate": 9.098956011576552e-06, "logits/chosen": -1.3606808185577393, "logits/rejected": -1.357688307762146, "logps/chosen": -67.3503646850586, "logps/rejected": -83.5569839477539, "loss": 0.0794, "rewards/accuracies": 1.0, "rewards/chosen": -3.3663792610168457, "rewards/margins": 1.8847780227661133, "rewards/rejected": -5.251157283782959, "step": 4973 }, { "epoch": 1.1, "learning_rate": 9.097929349096551e-06, "logits/chosen": -1.6268091201782227, "logits/rejected": -1.7304314374923706, "logps/chosen": -166.94113159179688, "logps/rejected": -132.09954833984375, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": 0.11687012016773224, "rewards/margins": 3.832028388977051, "rewards/rejected": -3.715158224105835, "step": 4974 }, { "epoch": 1.1, "learning_rate": 9.09690216004248e-06, "logits/chosen": -1.2668726444244385, "logits/rejected": -1.2668726444244385, "logps/chosen": -186.02554321289062, "logps/rejected": -186.02554321289062, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -6.418724060058594, "rewards/margins": 0.0, "rewards/rejected": -6.418724060058594, "step": 4975 }, { "epoch": 1.1, "learning_rate": 9.09587444454633e-06, "logits/chosen": -1.7200039625167847, "logits/rejected": -1.769335389137268, "logps/chosen": -119.95770263671875, "logps/rejected": -95.21343994140625, "loss": 0.369, "rewards/accuracies": 0.0, "rewards/chosen": -2.022778272628784, "rewards/margins": -0.08746945858001709, "rewards/rejected": -1.935308814048767, "step": 4976 }, { "epoch": 1.1, "learning_rate": 9.094846202740162e-06, "logits/chosen": -1.3809324502944946, "logits/rejected": -1.4553577899932861, "logps/chosen": -148.83383178710938, "logps/rejected": -141.66552734375, "loss": 0.1337, "rewards/accuracies": 1.0, "rewards/chosen": -3.2052292823791504, "rewards/margins": 3.6656746864318848, "rewards/rejected": -6.870903968811035, "step": 4977 }, { "epoch": 1.1, "learning_rate": 9.0938174347561e-06, "logits/chosen": -1.5997838973999023, "logits/rejected": -1.6028435230255127, "logps/chosen": -130.33152770996094, "logps/rejected": -186.74386596679688, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.327127069234848, "rewards/margins": 5.523449897766113, "rewards/rejected": -5.196322917938232, "step": 4978 }, { "epoch": 1.1, "learning_rate": 9.092788140726338e-06, "logits/chosen": -1.418145775794983, "logits/rejected": -1.3462461233139038, "logps/chosen": -106.04092407226562, "logps/rejected": -193.013671875, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": 0.612170398235321, "rewards/margins": 8.580540657043457, "rewards/rejected": -7.968369960784912, "step": 4979 }, { "epoch": 1.1, "learning_rate": 9.091758320783139e-06, "logits/chosen": -1.1420650482177734, "logits/rejected": -1.1379286050796509, "logps/chosen": -104.08784484863281, "logps/rejected": -113.18133544921875, "loss": 0.5515, "rewards/accuracies": 0.0, "rewards/chosen": -2.161364793777466, "rewards/margins": -0.6996239423751831, "rewards/rejected": -1.4617408514022827, "step": 4980 }, { "epoch": 1.1, "learning_rate": 9.090727975058833e-06, "logits/chosen": -0.9515698552131653, "logits/rejected": -0.9255498647689819, "logps/chosen": -95.55264282226562, "logps/rejected": -188.8141632080078, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.42297670245170593, "rewards/margins": 5.838040351867676, "rewards/rejected": -6.261016845703125, "step": 4981 }, { "epoch": 1.1, "learning_rate": 9.089697103685815e-06, "logits/chosen": -1.1654199361801147, "logits/rejected": -1.2906169891357422, "logps/chosen": -146.08477783203125, "logps/rejected": -144.47866821289062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.918649435043335, "rewards/margins": 8.4236421585083, "rewards/rejected": -11.342291831970215, "step": 4982 }, { "epoch": 1.1, "learning_rate": 9.08866570679655e-06, "logits/chosen": -1.4609583616256714, "logits/rejected": -1.4800465106964111, "logps/chosen": -140.11715698242188, "logps/rejected": -194.54983520507812, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -4.329902648925781, "rewards/margins": 3.955136299133301, "rewards/rejected": -8.285038948059082, "step": 4983 }, { "epoch": 1.1, "learning_rate": 9.087633784523574e-06, "logits/chosen": -0.9347721338272095, "logits/rejected": -0.8859577775001526, "logps/chosen": -178.718505859375, "logps/rejected": -247.4149169921875, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -1.8996765613555908, "rewards/margins": 2.957293748855591, "rewards/rejected": -4.856970310211182, "step": 4984 }, { "epoch": 1.1, "learning_rate": 9.08660133699948e-06, "logits/chosen": -1.5127613544464111, "logits/rejected": -1.5409599542617798, "logps/chosen": -207.54116821289062, "logps/rejected": -228.6842498779297, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.1117005348205566, "rewards/margins": 6.367895603179932, "rewards/rejected": -8.479596138000488, "step": 4985 }, { "epoch": 1.1, "learning_rate": 9.085568364356939e-06, "logits/chosen": -1.3564010858535767, "logits/rejected": -1.269267201423645, "logps/chosen": -122.68829345703125, "logps/rejected": -223.08042907714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.14406585693359375, "rewards/margins": 12.259062767028809, "rewards/rejected": -12.403128623962402, "step": 4986 }, { "epoch": 1.1, "learning_rate": 9.084534866728683e-06, "logits/chosen": -1.2427912950515747, "logits/rejected": -1.2756133079528809, "logps/chosen": -91.77074432373047, "logps/rejected": -108.605224609375, "loss": 0.3878, "rewards/accuracies": 1.0, "rewards/chosen": -2.3175179958343506, "rewards/margins": 1.6843535900115967, "rewards/rejected": -4.001871585845947, "step": 4987 }, { "epoch": 1.1, "learning_rate": 9.083500844247517e-06, "logits/chosen": -0.8029349446296692, "logits/rejected": -0.8227599263191223, "logps/chosen": -96.09251403808594, "logps/rejected": -87.2544937133789, "loss": 0.8136, "rewards/accuracies": 0.0, "rewards/chosen": -3.1135292053222656, "rewards/margins": -0.43491387367248535, "rewards/rejected": -2.6786153316497803, "step": 4988 }, { "epoch": 1.1, "learning_rate": 9.082466297046308e-06, "logits/chosen": -1.3454604148864746, "logits/rejected": -1.308485984802246, "logps/chosen": -83.24140167236328, "logps/rejected": -145.4414825439453, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 0.9082237482070923, "rewards/margins": 11.73754596710205, "rewards/rejected": -10.82932186126709, "step": 4989 }, { "epoch": 1.1, "learning_rate": 9.081431225257994e-06, "logits/chosen": -1.400681495666504, "logits/rejected": -1.5952919721603394, "logps/chosen": -205.0711669921875, "logps/rejected": -174.44873046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.3865509033203125, "rewards/margins": 8.260953903198242, "rewards/rejected": -6.8744025230407715, "step": 4990 }, { "epoch": 1.1, "learning_rate": 9.08039562901558e-06, "logits/chosen": -1.5011690855026245, "logits/rejected": -1.4578720331192017, "logps/chosen": -122.11405944824219, "logps/rejected": -264.5347595214844, "loss": 0.0346, "rewards/accuracies": 1.0, "rewards/chosen": -5.6354241371154785, "rewards/margins": 3.6806254386901855, "rewards/rejected": -9.316049575805664, "step": 4991 }, { "epoch": 1.1, "learning_rate": 9.079359508452138e-06, "logits/chosen": -1.333735466003418, "logits/rejected": -1.453900933265686, "logps/chosen": -263.62158203125, "logps/rejected": -177.2660369873047, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -2.447134494781494, "rewards/margins": 3.6428017616271973, "rewards/rejected": -6.089936256408691, "step": 4992 }, { "epoch": 1.11, "learning_rate": 9.078322863700803e-06, "logits/chosen": -0.7680993676185608, "logits/rejected": -0.4099072813987732, "logps/chosen": -97.44738006591797, "logps/rejected": -702.1663208007812, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": 0.8188140988349915, "rewards/margins": 59.49989318847656, "rewards/rejected": -58.68107986450195, "step": 4993 }, { "epoch": 1.11, "learning_rate": 9.077285694894786e-06, "logits/chosen": -1.3165650367736816, "logits/rejected": -1.2796274423599243, "logps/chosen": -42.57305145263672, "logps/rejected": -152.69610595703125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.8529018759727478, "rewards/margins": 12.10196304321289, "rewards/rejected": -11.249061584472656, "step": 4994 }, { "epoch": 1.11, "learning_rate": 9.076248002167357e-06, "logits/chosen": -1.3870760202407837, "logits/rejected": -1.4010742902755737, "logps/chosen": -166.85968017578125, "logps/rejected": -293.660400390625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.9232498407363892, "rewards/margins": 13.260881423950195, "rewards/rejected": -12.337631225585938, "step": 4995 }, { "epoch": 1.11, "learning_rate": 9.07520978565186e-06, "logits/chosen": -1.209869146347046, "logits/rejected": -1.2005525827407837, "logps/chosen": -79.62799835205078, "logps/rejected": -114.31358337402344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.16507339477539062, "rewards/margins": 8.360064506530762, "rewards/rejected": -8.194991111755371, "step": 4996 }, { "epoch": 1.11, "learning_rate": 9.074171045481701e-06, "logits/chosen": -1.2436065673828125, "logits/rejected": -1.3316612243652344, "logps/chosen": -180.6125946044922, "logps/rejected": -212.0574951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.8672409057617188, "rewards/margins": 10.978629112243652, "rewards/rejected": -10.111388206481934, "step": 4997 }, { "epoch": 1.11, "learning_rate": 9.073131781790358e-06, "logits/chosen": -1.6862596273422241, "logits/rejected": -1.4100019931793213, "logps/chosen": -150.8028564453125, "logps/rejected": -412.8255920410156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.7606140375137329, "rewards/margins": 12.138428688049316, "rewards/rejected": -11.377814292907715, "step": 4998 }, { "epoch": 1.11, "learning_rate": 9.072091994711372e-06, "logits/chosen": -1.1774150133132935, "logits/rejected": -1.1757128238677979, "logps/chosen": -102.1629867553711, "logps/rejected": -205.8553466796875, "loss": 0.1668, "rewards/accuracies": 1.0, "rewards/chosen": -0.9316245913505554, "rewards/margins": 0.9380043148994446, "rewards/rejected": -1.86962890625, "step": 4999 }, { "epoch": 1.11, "learning_rate": 9.071051684378352e-06, "logits/chosen": -1.6033873558044434, "logits/rejected": -1.6049046516418457, "logps/chosen": -145.13241577148438, "logps/rejected": -152.17324829101562, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": -1.2994660139083862, "rewards/margins": 2.548100471496582, "rewards/rejected": -3.8475663661956787, "step": 5000 }, { "epoch": 1.11, "learning_rate": 9.07001085092498e-06, "logits/chosen": -1.1233717203140259, "logits/rejected": -1.0534448623657227, "logps/chosen": -105.10104370117188, "logps/rejected": -197.03240966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.000225067138671875, "rewards/margins": 9.372196197509766, "rewards/rejected": -9.372421264648438, "step": 5001 }, { "epoch": 1.11, "learning_rate": 9.068969494484996e-06, "logits/chosen": -1.3316329717636108, "logits/rejected": -1.3009830713272095, "logps/chosen": -100.22021484375, "logps/rejected": -133.50665283203125, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6187179684638977, "rewards/margins": 3.363943576812744, "rewards/rejected": -3.982661485671997, "step": 5002 }, { "epoch": 1.11, "learning_rate": 9.067927615192214e-06, "logits/chosen": -1.4446274042129517, "logits/rejected": -1.4435921907424927, "logps/chosen": -84.97937774658203, "logps/rejected": -104.08094024658203, "loss": 0.1917, "rewards/accuracies": 1.0, "rewards/chosen": -2.1644935607910156, "rewards/margins": 0.7631003856658936, "rewards/rejected": -2.927593946456909, "step": 5003 }, { "epoch": 1.11, "learning_rate": 9.066885213180512e-06, "logits/chosen": -1.1882870197296143, "logits/rejected": -1.0334798097610474, "logps/chosen": -73.27195739746094, "logps/rejected": -379.0445556640625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.1720123291015625, "rewards/margins": 9.286166191101074, "rewards/rejected": -9.458178520202637, "step": 5004 }, { "epoch": 1.11, "learning_rate": 9.065842288583838e-06, "logits/chosen": -0.9412264227867126, "logits/rejected": -0.9070184826850891, "logps/chosen": -210.3184814453125, "logps/rejected": -328.14471435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.564300537109375, "rewards/margins": 11.738092422485352, "rewards/rejected": -17.302392959594727, "step": 5005 }, { "epoch": 1.11, "learning_rate": 9.064798841536203e-06, "logits/chosen": -1.1948363780975342, "logits/rejected": -1.191062092781067, "logps/chosen": -118.50216674804688, "logps/rejected": -148.26939392089844, "loss": 0.151, "rewards/accuracies": 1.0, "rewards/chosen": -4.066349983215332, "rewards/margins": 1.0422215461730957, "rewards/rejected": -5.108571529388428, "step": 5006 }, { "epoch": 1.11, "learning_rate": 9.063754872171686e-06, "logits/chosen": -1.413510799407959, "logits/rejected": -1.413510799407959, "logps/chosen": -298.0531311035156, "logps/rejected": -298.0531311035156, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -11.964741706848145, "rewards/margins": 0.0, "rewards/rejected": -11.964741706848145, "step": 5007 }, { "epoch": 1.11, "learning_rate": 9.062710380624439e-06, "logits/chosen": -1.1736451387405396, "logits/rejected": -0.8105230927467346, "logps/chosen": -147.6637725830078, "logps/rejected": -958.0023803710938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.830047607421875, "rewards/margins": 77.53064727783203, "rewards/rejected": -76.70059967041016, "step": 5008 }, { "epoch": 1.11, "learning_rate": 9.061665367028676e-06, "logits/chosen": -1.1171643733978271, "logits/rejected": -1.149488925933838, "logps/chosen": -207.60662841796875, "logps/rejected": -188.15103149414062, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.9360305666923523, "rewards/margins": 4.69844388961792, "rewards/rejected": -5.634474277496338, "step": 5009 }, { "epoch": 1.11, "learning_rate": 9.060619831518676e-06, "logits/chosen": -0.9553580284118652, "logits/rejected": -0.9816384315490723, "logps/chosen": -251.88241577148438, "logps/rejected": -76.95658874511719, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -2.451373338699341, "rewards/margins": 2.947073221206665, "rewards/rejected": -5.398446559906006, "step": 5010 }, { "epoch": 1.11, "learning_rate": 9.05957377422879e-06, "logits/chosen": -1.297837257385254, "logits/rejected": -1.328770637512207, "logps/chosen": -146.04220581054688, "logps/rejected": -218.53759765625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 4.14361572265625, "rewards/margins": 8.27727222442627, "rewards/rejected": -4.1336565017700195, "step": 5011 }, { "epoch": 1.11, "learning_rate": 9.058527195293431e-06, "logits/chosen": -1.1784250736236572, "logits/rejected": -1.139528751373291, "logps/chosen": -80.36377716064453, "logps/rejected": -131.51109313964844, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -0.5545616149902344, "rewards/margins": 4.083643436431885, "rewards/rejected": -4.638205051422119, "step": 5012 }, { "epoch": 1.11, "learning_rate": 9.057480094847085e-06, "logits/chosen": -1.1457339525222778, "logits/rejected": -0.4324871301651001, "logps/chosen": -142.76437377929688, "logps/rejected": -386.21673583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5994354486465454, "rewards/margins": 17.334768295288086, "rewards/rejected": -17.9342041015625, "step": 5013 }, { "epoch": 1.11, "learning_rate": 9.056432473024302e-06, "logits/chosen": -1.4835463762283325, "logits/rejected": -1.4733059406280518, "logps/chosen": -199.4075927734375, "logps/rejected": -264.72088623046875, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -0.12245788425207138, "rewards/margins": 3.636099100112915, "rewards/rejected": -3.758557081222534, "step": 5014 }, { "epoch": 1.11, "learning_rate": 9.055384329959695e-06, "logits/chosen": -1.4554747343063354, "logits/rejected": -1.3390337228775024, "logps/chosen": -212.50994873046875, "logps/rejected": -399.2326965332031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.082815647125244, "rewards/margins": 13.974279403686523, "rewards/rejected": -16.05709457397461, "step": 5015 }, { "epoch": 1.11, "learning_rate": 9.054335665787952e-06, "logits/chosen": -1.3721165657043457, "logits/rejected": -1.3311283588409424, "logps/chosen": -182.9616241455078, "logps/rejected": -136.53306579589844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 4.655208110809326, "rewards/margins": 9.482799530029297, "rewards/rejected": -4.8275909423828125, "step": 5016 }, { "epoch": 1.11, "learning_rate": 9.053286480643822e-06, "logits/chosen": -1.4618215560913086, "logits/rejected": -1.461092472076416, "logps/chosen": -197.97523498535156, "logps/rejected": -163.7464141845703, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -4.476689338684082, "rewards/margins": 5.011200904846191, "rewards/rejected": -9.487890243530273, "step": 5017 }, { "epoch": 1.11, "learning_rate": 9.052236774662123e-06, "logits/chosen": -1.285188913345337, "logits/rejected": -1.2323888540267944, "logps/chosen": -127.05866241455078, "logps/rejected": -203.3360595703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6784767508506775, "rewards/margins": 8.065364837646484, "rewards/rejected": -8.743841171264648, "step": 5018 }, { "epoch": 1.11, "learning_rate": 9.051186547977739e-06, "logits/chosen": -1.201857566833496, "logits/rejected": -1.1314340829849243, "logps/chosen": -78.41229248046875, "logps/rejected": -186.4993896484375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 1.2430938482284546, "rewards/margins": 6.722455024719238, "rewards/rejected": -5.479361057281494, "step": 5019 }, { "epoch": 1.11, "learning_rate": 9.050135800725623e-06, "logits/chosen": -1.3421456813812256, "logits/rejected": -1.3079320192337036, "logps/chosen": -99.8392562866211, "logps/rejected": -138.5006866455078, "loss": 0.3325, "rewards/accuracies": 1.0, "rewards/chosen": -1.7549095153808594, "rewards/margins": 0.05703127384185791, "rewards/rejected": -1.8119407892227173, "step": 5020 }, { "epoch": 1.11, "learning_rate": 9.049084533040794e-06, "logits/chosen": -0.9554332494735718, "logits/rejected": -0.9430705308914185, "logps/chosen": -121.81387329101562, "logps/rejected": -172.98590087890625, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -3.1572372913360596, "rewards/margins": 2.529503107070923, "rewards/rejected": -5.686740398406982, "step": 5021 }, { "epoch": 1.11, "learning_rate": 9.048032745058335e-06, "logits/chosen": -1.665063738822937, "logits/rejected": -1.685802936553955, "logps/chosen": -99.48951721191406, "logps/rejected": -164.28125, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.8459068536758423, "rewards/margins": 4.209726810455322, "rewards/rejected": -6.055633544921875, "step": 5022 }, { "epoch": 1.11, "learning_rate": 9.0469804369134e-06, "logits/chosen": -1.3562836647033691, "logits/rejected": -0.6614292860031128, "logps/chosen": -127.97773742675781, "logps/rejected": -719.9029541015625, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -2.675032138824463, "rewards/margins": 47.02391815185547, "rewards/rejected": -49.698951721191406, "step": 5023 }, { "epoch": 1.11, "learning_rate": 9.045927608741207e-06, "logits/chosen": -0.9650021195411682, "logits/rejected": -0.9341034889221191, "logps/chosen": -69.51475524902344, "logps/rejected": -108.97087097167969, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -1.306864619255066, "rewards/margins": 4.131489276885986, "rewards/rejected": -5.438354015350342, "step": 5024 }, { "epoch": 1.11, "learning_rate": 9.044874260677043e-06, "logits/chosen": -1.1361058950424194, "logits/rejected": -1.1361058950424194, "logps/chosen": -125.78706359863281, "logps/rejected": -125.78706359863281, "loss": 0.8447, "rewards/accuracies": 0.0, "rewards/chosen": -4.1116943359375, "rewards/margins": 0.0, "rewards/rejected": -4.1116943359375, "step": 5025 }, { "epoch": 1.11, "learning_rate": 9.043820392856259e-06, "logits/chosen": -0.9885136485099792, "logits/rejected": -1.0032684803009033, "logps/chosen": -171.4616241455078, "logps/rejected": -175.0986328125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 4.343696594238281, "rewards/margins": 11.110597610473633, "rewards/rejected": -6.766900539398193, "step": 5026 }, { "epoch": 1.11, "learning_rate": 9.042766005414278e-06, "logits/chosen": -1.4592785835266113, "logits/rejected": -1.567621111869812, "logps/chosen": -179.56317138671875, "logps/rejected": -114.50082397460938, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -0.14971923828125, "rewards/margins": 2.913412570953369, "rewards/rejected": -3.063131809234619, "step": 5027 }, { "epoch": 1.11, "learning_rate": 9.041711098486583e-06, "logits/chosen": -0.9689509272575378, "logits/rejected": -0.8307622671127319, "logps/chosen": -157.15869140625, "logps/rejected": -298.7553405761719, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -3.1022019386291504, "rewards/margins": 3.9258651733398438, "rewards/rejected": -7.028067111968994, "step": 5028 }, { "epoch": 1.11, "learning_rate": 9.040655672208727e-06, "logits/chosen": -1.1368359327316284, "logits/rejected": -1.13535737991333, "logps/chosen": -86.95294189453125, "logps/rejected": -90.7032470703125, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -2.9049267768859863, "rewards/margins": 3.786656379699707, "rewards/rejected": -6.691583156585693, "step": 5029 }, { "epoch": 1.11, "learning_rate": 9.03959972671633e-06, "logits/chosen": -1.5548170804977417, "logits/rejected": -1.5387529134750366, "logps/chosen": -131.11233520507812, "logps/rejected": -189.06471252441406, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.30655214190483093, "rewards/margins": 4.073997974395752, "rewards/rejected": -4.380549907684326, "step": 5030 }, { "epoch": 1.11, "learning_rate": 9.03854326214508e-06, "logits/chosen": -1.310096025466919, "logits/rejected": -1.2829498052597046, "logps/chosen": -104.29360961914062, "logps/rejected": -174.87860107421875, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -0.5675239562988281, "rewards/margins": 3.1148247718811035, "rewards/rejected": -3.6823487281799316, "step": 5031 }, { "epoch": 1.11, "learning_rate": 9.037486278630729e-06, "logits/chosen": -1.1991831064224243, "logits/rejected": -1.1764036417007446, "logps/chosen": -126.07466125488281, "logps/rejected": -196.9075469970703, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.8687485456466675, "rewards/margins": 6.352561950683594, "rewards/rejected": -8.22131061553955, "step": 5032 }, { "epoch": 1.11, "learning_rate": 9.036428776309096e-06, "logits/chosen": -0.9332665205001831, "logits/rejected": -0.9114693403244019, "logps/chosen": -188.60702514648438, "logps/rejected": -271.2414245605469, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.2856826782226562, "rewards/margins": 4.756834506988525, "rewards/rejected": -6.042517185211182, "step": 5033 }, { "epoch": 1.11, "learning_rate": 9.03537075531607e-06, "logits/chosen": -0.9367154240608215, "logits/rejected": -0.9824554920196533, "logps/chosen": -127.5881118774414, "logps/rejected": -88.51252746582031, "loss": 1.1701, "rewards/accuracies": 0.0, "rewards/chosen": -3.36399245262146, "rewards/margins": -2.238778829574585, "rewards/rejected": -1.125213623046875, "step": 5034 }, { "epoch": 1.11, "learning_rate": 9.034312215787603e-06, "logits/chosen": -1.0948301553726196, "logits/rejected": -1.1176865100860596, "logps/chosen": -184.6617431640625, "logps/rejected": -202.48776245117188, "loss": 0.9436, "rewards/accuracies": 1.0, "rewards/chosen": -1.1325165033340454, "rewards/margins": 4.483194351196289, "rewards/rejected": -5.615710735321045, "step": 5035 }, { "epoch": 1.11, "learning_rate": 9.033253157859715e-06, "logits/chosen": -1.2774059772491455, "logits/rejected": -1.3016868829727173, "logps/chosen": -216.10604858398438, "logps/rejected": -195.12423706054688, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.10018157958984375, "rewards/margins": 5.737724304199219, "rewards/rejected": -5.8379058837890625, "step": 5036 }, { "epoch": 1.11, "learning_rate": 9.03219358166849e-06, "logits/chosen": -1.0330657958984375, "logits/rejected": -0.9476354718208313, "logps/chosen": -144.40896606445312, "logps/rejected": -122.00225830078125, "loss": 0.0868, "rewards/accuracies": 1.0, "rewards/chosen": -1.2277374267578125, "rewards/margins": 1.669874668121338, "rewards/rejected": -2.8976120948791504, "step": 5037 }, { "epoch": 1.12, "learning_rate": 9.031133487350084e-06, "logits/chosen": -1.0624144077301025, "logits/rejected": -1.0697163343429565, "logps/chosen": -105.87617492675781, "logps/rejected": -140.1686248779297, "loss": 0.3893, "rewards/accuracies": 1.0, "rewards/chosen": -2.219041585922241, "rewards/margins": 8.881112098693848, "rewards/rejected": -11.100153923034668, "step": 5038 }, { "epoch": 1.12, "learning_rate": 9.030072875040714e-06, "logits/chosen": -1.2619417905807495, "logits/rejected": -1.2650535106658936, "logps/chosen": -75.25315856933594, "logps/rejected": -93.1064224243164, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -1.3903640508651733, "rewards/margins": 2.991128921508789, "rewards/rejected": -4.381493091583252, "step": 5039 }, { "epoch": 1.12, "learning_rate": 9.029011744876669e-06, "logits/chosen": -1.218300461769104, "logits/rejected": -1.1732470989227295, "logps/chosen": -166.1983642578125, "logps/rejected": -261.00787353515625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.620500326156616, "rewards/margins": 6.060929298400879, "rewards/rejected": -9.681429862976074, "step": 5040 }, { "epoch": 1.12, "learning_rate": 9.027950096994299e-06, "logits/chosen": -1.0738016366958618, "logits/rejected": -1.0285439491271973, "logps/chosen": -103.8702392578125, "logps/rejected": -241.8509521484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.8632095456123352, "rewards/margins": 8.607658386230469, "rewards/rejected": -7.744449138641357, "step": 5041 }, { "epoch": 1.12, "learning_rate": 9.026887931530026e-06, "logits/chosen": -1.3228812217712402, "logits/rejected": -1.3228812217712402, "logps/chosen": -106.66593933105469, "logps/rejected": -106.66593933105469, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -1.7847130298614502, "rewards/margins": 0.0, "rewards/rejected": -1.7847130298614502, "step": 5042 }, { "epoch": 1.12, "learning_rate": 9.025825248620332e-06, "logits/chosen": -1.6359517574310303, "logits/rejected": -1.587918758392334, "logps/chosen": -160.05552673339844, "logps/rejected": -239.71249389648438, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.8024688959121704, "rewards/margins": 7.485785007476807, "rewards/rejected": -8.288253784179688, "step": 5043 }, { "epoch": 1.12, "learning_rate": 9.024762048401775e-06, "logits/chosen": -1.3027620315551758, "logits/rejected": -1.37626314163208, "logps/chosen": -236.8238525390625, "logps/rejected": -106.71678924560547, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": 0.02698669396340847, "rewards/margins": 2.3891334533691406, "rewards/rejected": -2.3621468544006348, "step": 5044 }, { "epoch": 1.12, "learning_rate": 9.023698331010966e-06, "logits/chosen": -1.4664300680160522, "logits/rejected": -1.5173429250717163, "logps/chosen": -174.32366943359375, "logps/rejected": -121.2403564453125, "loss": 0.5376, "rewards/accuracies": 0.0, "rewards/chosen": -1.322503685951233, "rewards/margins": -0.6566238403320312, "rewards/rejected": -0.6658798456192017, "step": 5045 }, { "epoch": 1.12, "learning_rate": 9.022634096584597e-06, "logits/chosen": -1.1400407552719116, "logits/rejected": -1.096722960472107, "logps/chosen": -82.3880615234375, "logps/rejected": -191.94680786132812, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.3767135739326477, "rewards/margins": 7.288074970245361, "rewards/rejected": -7.664788722991943, "step": 5046 }, { "epoch": 1.12, "learning_rate": 9.021569345259415e-06, "logits/chosen": -1.374102234840393, "logits/rejected": -1.3302644491195679, "logps/chosen": -111.82183837890625, "logps/rejected": -86.11199188232422, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": -2.5468766689300537, "rewards/margins": 2.5485360622406006, "rewards/rejected": -5.095412731170654, "step": 5047 }, { "epoch": 1.12, "learning_rate": 9.02050407717224e-06, "logits/chosen": -1.2616442441940308, "logits/rejected": -1.3452900648117065, "logps/chosen": -151.9796905517578, "logps/rejected": -108.16531372070312, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -5.198982238769531, "rewards/margins": 2.535489559173584, "rewards/rejected": -7.734471797943115, "step": 5048 }, { "epoch": 1.12, "learning_rate": 9.019438292459958e-06, "logits/chosen": -1.0729737281799316, "logits/rejected": -0.9874464273452759, "logps/chosen": -227.5264434814453, "logps/rejected": -249.47802734375, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": 2.8266189098358154, "rewards/margins": 5.671290397644043, "rewards/rejected": -2.8446717262268066, "step": 5049 }, { "epoch": 1.12, "learning_rate": 9.018371991259516e-06, "logits/chosen": -0.9811500906944275, "logits/rejected": -0.9086806178092957, "logps/chosen": -100.27284240722656, "logps/rejected": -185.8962860107422, "loss": 0.135, "rewards/accuracies": 1.0, "rewards/chosen": -0.6537505984306335, "rewards/margins": 1.1715927124023438, "rewards/rejected": -1.825343370437622, "step": 5050 }, { "epoch": 1.12, "learning_rate": 9.017305173707932e-06, "logits/chosen": -1.479653000831604, "logits/rejected": -0.8157921433448792, "logps/chosen": -168.091552734375, "logps/rejected": -577.720458984375, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -7.538269996643066, "rewards/margins": 36.5247802734375, "rewards/rejected": -44.06304931640625, "step": 5051 }, { "epoch": 1.12, "learning_rate": 9.016237839942294e-06, "logits/chosen": -1.1655077934265137, "logits/rejected": -1.1401126384735107, "logps/chosen": -144.4537811279297, "logps/rejected": -172.06414794921875, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": -1.8059974908828735, "rewards/margins": 3.0110650062561035, "rewards/rejected": -4.8170623779296875, "step": 5052 }, { "epoch": 1.12, "learning_rate": 9.015169990099746e-06, "logits/chosen": -1.339135766029358, "logits/rejected": -0.6191924810409546, "logps/chosen": -108.625732421875, "logps/rejected": -408.02105712890625, "loss": 0.0582, "rewards/accuracies": 1.0, "rewards/chosen": -1.6680694818496704, "rewards/margins": 27.22064781188965, "rewards/rejected": -28.888717651367188, "step": 5053 }, { "epoch": 1.12, "learning_rate": 9.014101624317506e-06, "logits/chosen": -0.9793131351470947, "logits/rejected": -0.9386478662490845, "logps/chosen": -112.32516479492188, "logps/rejected": -140.59127807617188, "loss": 0.0643, "rewards/accuracies": 1.0, "rewards/chosen": -0.5370239615440369, "rewards/margins": 1.9861388206481934, "rewards/rejected": -2.523162841796875, "step": 5054 }, { "epoch": 1.12, "learning_rate": 9.013032742732858e-06, "logits/chosen": -1.2854669094085693, "logits/rejected": -1.304770588874817, "logps/chosen": -193.13229370117188, "logps/rejected": -185.3011474609375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.3694778680801392, "rewards/margins": 6.419055461883545, "rewards/rejected": -7.7885332107543945, "step": 5055 }, { "epoch": 1.12, "learning_rate": 9.01196334548315e-06, "logits/chosen": -1.0182225704193115, "logits/rejected": -1.1199604272842407, "logps/chosen": -189.41542053222656, "logps/rejected": -168.8255615234375, "loss": 0.13, "rewards/accuracies": 1.0, "rewards/chosen": -3.9645798206329346, "rewards/margins": 1.2650139331817627, "rewards/rejected": -5.229593753814697, "step": 5056 }, { "epoch": 1.12, "learning_rate": 9.010893432705796e-06, "logits/chosen": -1.2495182752609253, "logits/rejected": -1.2798240184783936, "logps/chosen": -97.3532485961914, "logps/rejected": -192.7996826171875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 1.080409288406372, "rewards/margins": 10.493115425109863, "rewards/rejected": -9.41270637512207, "step": 5057 }, { "epoch": 1.12, "learning_rate": 9.009823004538278e-06, "logits/chosen": -1.313944697380066, "logits/rejected": -1.2817543745040894, "logps/chosen": -90.32369995117188, "logps/rejected": -191.7779083251953, "loss": 0.0529, "rewards/accuracies": 1.0, "rewards/chosen": 1.3044662475585938, "rewards/margins": 4.561459541320801, "rewards/rejected": -3.256993055343628, "step": 5058 }, { "epoch": 1.12, "learning_rate": 9.008752061118143e-06, "logits/chosen": -1.3255290985107422, "logits/rejected": -1.4301220178604126, "logps/chosen": -271.7581481933594, "logps/rejected": -229.18783569335938, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.27239686250686646, "rewards/margins": 7.587237358093262, "rewards/rejected": -7.8596343994140625, "step": 5059 }, { "epoch": 1.12, "learning_rate": 9.007680602583005e-06, "logits/chosen": -1.7069499492645264, "logits/rejected": -1.8747433423995972, "logps/chosen": -136.0331573486328, "logps/rejected": -117.66228485107422, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.31928253173828125, "rewards/margins": 8.245145797729492, "rewards/rejected": -8.564428329467773, "step": 5060 }, { "epoch": 1.12, "learning_rate": 9.006608629070543e-06, "logits/chosen": -1.526425838470459, "logits/rejected": -1.4505521059036255, "logps/chosen": -57.446407318115234, "logps/rejected": -185.73443603515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.7808658480644226, "rewards/margins": 8.229023933410645, "rewards/rejected": -7.448157787322998, "step": 5061 }, { "epoch": 1.12, "learning_rate": 9.005536140718506e-06, "logits/chosen": -0.992209792137146, "logits/rejected": -0.990275502204895, "logps/chosen": -193.60916137695312, "logps/rejected": -120.09942626953125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 1.0878311395645142, "rewards/margins": 7.0862884521484375, "rewards/rejected": -5.998457431793213, "step": 5062 }, { "epoch": 1.12, "learning_rate": 9.004463137664701e-06, "logits/chosen": -1.1429860591888428, "logits/rejected": -1.111088514328003, "logps/chosen": -151.33262634277344, "logps/rejected": -165.9309844970703, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.10963287204504013, "rewards/margins": 5.065725803375244, "rewards/rejected": -5.175358772277832, "step": 5063 }, { "epoch": 1.12, "learning_rate": 9.003389620047012e-06, "logits/chosen": -1.4229388236999512, "logits/rejected": -1.363982915878296, "logps/chosen": -127.44975280761719, "logps/rejected": -200.33120727539062, "loss": 0.3949, "rewards/accuracies": 0.0, "rewards/chosen": -0.9111968874931335, "rewards/margins": -0.1849212646484375, "rewards/rejected": -0.726275622844696, "step": 5064 }, { "epoch": 1.12, "learning_rate": 9.002315588003378e-06, "logits/chosen": -1.4984599351882935, "logits/rejected": -1.4984599351882935, "logps/chosen": -152.3567352294922, "logps/rejected": -152.3567352294922, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -5.630043983459473, "rewards/margins": 0.0, "rewards/rejected": -5.630043983459473, "step": 5065 }, { "epoch": 1.12, "learning_rate": 9.001241041671814e-06, "logits/chosen": -1.2399591207504272, "logits/rejected": -1.3193410634994507, "logps/chosen": -192.0474853515625, "logps/rejected": -113.04383087158203, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 1.5192962884902954, "rewards/margins": 5.269288539886475, "rewards/rejected": -3.7499923706054688, "step": 5066 }, { "epoch": 1.12, "learning_rate": 9.000165981190396e-06, "logits/chosen": -1.3658369779586792, "logits/rejected": -1.3500022888183594, "logps/chosen": -116.28773498535156, "logps/rejected": -158.33291625976562, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 1.0555237531661987, "rewards/margins": 7.309362411499023, "rewards/rejected": -6.253838539123535, "step": 5067 }, { "epoch": 1.12, "learning_rate": 8.999090406697263e-06, "logits/chosen": -1.274808406829834, "logits/rejected": -1.2504011392593384, "logps/chosen": -111.93927001953125, "logps/rejected": -298.216552734375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.13504333794116974, "rewards/margins": 6.56387186050415, "rewards/rejected": -6.698915004730225, "step": 5068 }, { "epoch": 1.12, "learning_rate": 8.998014318330627e-06, "logits/chosen": -1.3987194299697876, "logits/rejected": -1.3730125427246094, "logps/chosen": -166.67556762695312, "logps/rejected": -214.91329956054688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.0658600330352783, "rewards/margins": 10.446428298950195, "rewards/rejected": -8.380568504333496, "step": 5069 }, { "epoch": 1.12, "learning_rate": 8.996937716228763e-06, "logits/chosen": -0.9006990194320679, "logits/rejected": -0.8679617047309875, "logps/chosen": -198.70230102539062, "logps/rejected": -161.57421875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 3.1197509765625, "rewards/margins": 5.5868024826049805, "rewards/rejected": -2.4670517444610596, "step": 5070 }, { "epoch": 1.12, "learning_rate": 8.99586060053001e-06, "logits/chosen": -1.2099254131317139, "logits/rejected": -1.1851954460144043, "logps/chosen": -87.28024291992188, "logps/rejected": -130.14892578125, "loss": 0.0436, "rewards/accuracies": 1.0, "rewards/chosen": -0.012088012881577015, "rewards/margins": 2.4112839698791504, "rewards/rejected": -2.4233720302581787, "step": 5071 }, { "epoch": 1.12, "learning_rate": 8.994782971372776e-06, "logits/chosen": -1.3109558820724487, "logits/rejected": -1.229761004447937, "logps/chosen": -174.2635040283203, "logps/rejected": -264.0106201171875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.30368804931640625, "rewards/margins": 5.35806131362915, "rewards/rejected": -5.054373264312744, "step": 5072 }, { "epoch": 1.12, "learning_rate": 8.993704828895533e-06, "logits/chosen": -1.1286346912384033, "logits/rejected": -0.6061442494392395, "logps/chosen": -38.74592590332031, "logps/rejected": -388.3143615722656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5623844265937805, "rewards/margins": 28.427396774291992, "rewards/rejected": -28.98978042602539, "step": 5073 }, { "epoch": 1.12, "learning_rate": 8.99262617323682e-06, "logits/chosen": -1.4984256029129028, "logits/rejected": -1.4052103757858276, "logps/chosen": -130.0329132080078, "logps/rejected": -216.25189208984375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.32340699434280396, "rewards/margins": 4.847233295440674, "rewards/rejected": -5.170640468597412, "step": 5074 }, { "epoch": 1.12, "learning_rate": 8.991547004535244e-06, "logits/chosen": -1.1732923984527588, "logits/rejected": -1.1337734460830688, "logps/chosen": -146.97720336914062, "logps/rejected": -152.0596923828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 2.15354323387146, "rewards/margins": 12.68388557434082, "rewards/rejected": -10.530342102050781, "step": 5075 }, { "epoch": 1.12, "learning_rate": 8.99046732292947e-06, "logits/chosen": -2.0338199138641357, "logits/rejected": -1.7986557483673096, "logps/chosen": -182.07888793945312, "logps/rejected": -276.25103759765625, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -1.9612168073654175, "rewards/margins": 9.316352844238281, "rewards/rejected": -11.277569770812988, "step": 5076 }, { "epoch": 1.12, "learning_rate": 8.98938712855824e-06, "logits/chosen": -1.125383973121643, "logits/rejected": -1.1360008716583252, "logps/chosen": -186.46640014648438, "logps/rejected": -159.7367401123047, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 3.993072509765625, "rewards/margins": 4.9025678634643555, "rewards/rejected": -0.9094955325126648, "step": 5077 }, { "epoch": 1.12, "learning_rate": 8.988306421560354e-06, "logits/chosen": -1.7371819019317627, "logits/rejected": -1.712402582168579, "logps/chosen": -65.54656219482422, "logps/rejected": -76.54510498046875, "loss": 0.372, "rewards/accuracies": 1.0, "rewards/chosen": -0.1693195402622223, "rewards/margins": 0.5807689428329468, "rewards/rejected": -0.7500885128974915, "step": 5078 }, { "epoch": 1.12, "learning_rate": 8.98722520207468e-06, "logits/chosen": -1.0804860591888428, "logits/rejected": -1.0517979860305786, "logps/chosen": -88.55795288085938, "logps/rejected": -141.76072692871094, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": -0.4536842405796051, "rewards/margins": 4.075047492980957, "rewards/rejected": -4.528731822967529, "step": 5079 }, { "epoch": 1.12, "learning_rate": 8.986143470240152e-06, "logits/chosen": -1.4686789512634277, "logits/rejected": -1.4360111951828003, "logps/chosen": -106.7260513305664, "logps/rejected": -126.85739135742188, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": 0.9582450985908508, "rewards/margins": 4.618964672088623, "rewards/rejected": -3.660719394683838, "step": 5080 }, { "epoch": 1.12, "learning_rate": 8.98506122619577e-06, "logits/chosen": -1.0505857467651367, "logits/rejected": -0.9786771535873413, "logps/chosen": -77.68057250976562, "logps/rejected": -277.78546142578125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.9041526913642883, "rewards/margins": 7.142385482788086, "rewards/rejected": -6.238232612609863, "step": 5081 }, { "epoch": 1.12, "learning_rate": 8.983978470080603e-06, "logits/chosen": -1.2043782472610474, "logits/rejected": -1.2055765390396118, "logps/chosen": -144.5298614501953, "logps/rejected": -154.96401977539062, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -0.21133270859718323, "rewards/margins": 4.026515007019043, "rewards/rejected": -4.237847805023193, "step": 5082 }, { "epoch": 1.13, "learning_rate": 8.982895202033776e-06, "logits/chosen": -1.2120416164398193, "logits/rejected": -1.2082191705703735, "logps/chosen": -91.48439025878906, "logps/rejected": -152.51193237304688, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": -0.82928466796875, "rewards/margins": 2.9481523036956787, "rewards/rejected": -3.7774369716644287, "step": 5083 }, { "epoch": 1.13, "learning_rate": 8.981811422194493e-06, "logits/chosen": -1.441881537437439, "logits/rejected": -1.405694603919983, "logps/chosen": -114.46943664550781, "logps/rejected": -118.88472747802734, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": 2.2012863159179688, "rewards/margins": 5.116291046142578, "rewards/rejected": -2.9150047302246094, "step": 5084 }, { "epoch": 1.13, "learning_rate": 8.980727130702014e-06, "logits/chosen": -1.2234007120132446, "logits/rejected": -1.2107211351394653, "logps/chosen": -108.5849609375, "logps/rejected": -158.00828552246094, "loss": 0.1179, "rewards/accuracies": 1.0, "rewards/chosen": -1.673754096031189, "rewards/margins": 3.7427687644958496, "rewards/rejected": -5.416522979736328, "step": 5085 }, { "epoch": 1.13, "learning_rate": 8.979642327695668e-06, "logits/chosen": -1.5292890071868896, "logits/rejected": -1.408422827720642, "logps/chosen": -109.90250396728516, "logps/rejected": -253.56861877441406, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": -2.0951988697052, "rewards/margins": 2.8926475048065186, "rewards/rejected": -4.987846374511719, "step": 5086 }, { "epoch": 1.13, "learning_rate": 8.978557013314848e-06, "logits/chosen": -1.4740042686462402, "logits/rejected": -1.4722278118133545, "logps/chosen": -130.91766357421875, "logps/rejected": -157.03579711914062, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -1.0735000371932983, "rewards/margins": 3.123185634613037, "rewards/rejected": -4.196685791015625, "step": 5087 }, { "epoch": 1.13, "learning_rate": 8.977471187699019e-06, "logits/chosen": -1.6057018041610718, "logits/rejected": -1.6806586980819702, "logps/chosen": -147.3975067138672, "logps/rejected": -138.00917053222656, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -0.5767745971679688, "rewards/margins": 9.72265338897705, "rewards/rejected": -10.29942798614502, "step": 5088 }, { "epoch": 1.13, "learning_rate": 8.976384850987702e-06, "logits/chosen": -1.490038275718689, "logits/rejected": -1.461578369140625, "logps/chosen": -75.69953918457031, "logps/rejected": -151.01678466796875, "loss": 0.07, "rewards/accuracies": 1.0, "rewards/chosen": -1.1086281538009644, "rewards/margins": 1.935498833656311, "rewards/rejected": -3.0441269874572754, "step": 5089 }, { "epoch": 1.13, "learning_rate": 8.97529800332049e-06, "logits/chosen": -1.393755316734314, "logits/rejected": -1.3663527965545654, "logps/chosen": -157.56932067871094, "logps/rejected": -118.00951385498047, "loss": 0.1016, "rewards/accuracies": 1.0, "rewards/chosen": 3.9580368995666504, "rewards/margins": 6.63113260269165, "rewards/rejected": -2.673095703125, "step": 5090 }, { "epoch": 1.13, "learning_rate": 8.974210644837042e-06, "logits/chosen": -1.23897385597229, "logits/rejected": -1.1768568754196167, "logps/chosen": -112.99568176269531, "logps/rejected": -317.4172058105469, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": 0.4947464168071747, "rewards/margins": 4.5748701095581055, "rewards/rejected": -4.0801239013671875, "step": 5091 }, { "epoch": 1.13, "learning_rate": 8.973122775677078e-06, "logits/chosen": -1.2248460054397583, "logits/rejected": -1.1921555995941162, "logps/chosen": -67.40673828125, "logps/rejected": -135.57728576660156, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -0.0950164794921875, "rewards/margins": 3.692864179611206, "rewards/rejected": -3.7878806591033936, "step": 5092 }, { "epoch": 1.13, "learning_rate": 8.97203439598039e-06, "logits/chosen": -1.316402554512024, "logits/rejected": -1.333106517791748, "logps/chosen": -170.22203063964844, "logps/rejected": -122.18267059326172, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.1749801635742188, "rewards/margins": 7.2405500411987305, "rewards/rejected": -8.41553020477295, "step": 5093 }, { "epoch": 1.13, "learning_rate": 8.970945505886832e-06, "logits/chosen": -1.2921043634414673, "logits/rejected": -1.2916522026062012, "logps/chosen": -137.70265197753906, "logps/rejected": -133.39120483398438, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 1.8531646728515625, "rewards/margins": 3.5315468311309814, "rewards/rejected": -1.678382158279419, "step": 5094 }, { "epoch": 1.13, "learning_rate": 8.96985610553632e-06, "logits/chosen": -1.090988278388977, "logits/rejected": -1.0977811813354492, "logps/chosen": -178.19691467285156, "logps/rejected": -127.99000549316406, "loss": 0.044, "rewards/accuracies": 1.0, "rewards/chosen": -1.1471970081329346, "rewards/margins": 2.384932041168213, "rewards/rejected": -3.5321290493011475, "step": 5095 }, { "epoch": 1.13, "learning_rate": 8.968766195068845e-06, "logits/chosen": -1.167589545249939, "logits/rejected": -1.1142460107803345, "logps/chosen": -89.44369506835938, "logps/rejected": -109.10503387451172, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": 0.554669976234436, "rewards/margins": 4.903652191162109, "rewards/rejected": -4.348982334136963, "step": 5096 }, { "epoch": 1.13, "learning_rate": 8.967675774624451e-06, "logits/chosen": -1.2220572233200073, "logits/rejected": -1.2221981287002563, "logps/chosen": -163.0343017578125, "logps/rejected": -198.70230102539062, "loss": 0.1665, "rewards/accuracies": 1.0, "rewards/chosen": -0.8580261468887329, "rewards/margins": 5.1004838943481445, "rewards/rejected": -5.958509922027588, "step": 5097 }, { "epoch": 1.13, "learning_rate": 8.96658484434326e-06, "logits/chosen": -1.385246992111206, "logits/rejected": -1.385246992111206, "logps/chosen": -111.53075408935547, "logps/rejected": -111.53075408935547, "loss": 0.4001, "rewards/accuracies": 0.0, "rewards/chosen": -2.992971897125244, "rewards/margins": 0.0, "rewards/rejected": -2.992971897125244, "step": 5098 }, { "epoch": 1.13, "learning_rate": 8.96549340436545e-06, "logits/chosen": -1.3583734035491943, "logits/rejected": -1.3614988327026367, "logps/chosen": -74.57057189941406, "logps/rejected": -154.84869384765625, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/chosen": 0.8800430297851562, "rewards/margins": 6.585601806640625, "rewards/rejected": -5.705558776855469, "step": 5099 }, { "epoch": 1.13, "learning_rate": 8.964401454831273e-06, "logits/chosen": -1.13348388671875, "logits/rejected": -1.129966139793396, "logps/chosen": -192.59820556640625, "logps/rejected": -119.06724548339844, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 3.76629638671875, "rewards/margins": 8.097957611083984, "rewards/rejected": -4.331660747528076, "step": 5100 }, { "epoch": 1.13, "learning_rate": 8.963308995881037e-06, "logits/chosen": -1.6343224048614502, "logits/rejected": -1.7107118368148804, "logps/chosen": -209.8395233154297, "logps/rejected": -180.76417541503906, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -0.4396316707134247, "rewards/margins": 2.8822433948516846, "rewards/rejected": -3.3218750953674316, "step": 5101 }, { "epoch": 1.13, "learning_rate": 8.962216027655123e-06, "logits/chosen": -1.8090652227401733, "logits/rejected": -1.7965271472930908, "logps/chosen": -109.94770050048828, "logps/rejected": -126.17941284179688, "loss": 0.3721, "rewards/accuracies": 1.0, "rewards/chosen": 0.018022919073700905, "rewards/margins": 0.37806016206741333, "rewards/rejected": -0.360037237405777, "step": 5102 }, { "epoch": 1.13, "learning_rate": 8.961122550293975e-06, "logits/chosen": -1.0217562913894653, "logits/rejected": -1.020028829574585, "logps/chosen": -110.2844009399414, "logps/rejected": -132.9000244140625, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -2.8264451026916504, "rewards/margins": 4.223481178283691, "rewards/rejected": -7.049926280975342, "step": 5103 }, { "epoch": 1.13, "learning_rate": 8.960028563938101e-06, "logits/chosen": -1.1311819553375244, "logits/rejected": -1.026504635810852, "logps/chosen": -68.93292236328125, "logps/rejected": -220.27867126464844, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.5485992431640625, "rewards/margins": 8.687387466430664, "rewards/rejected": -8.138788223266602, "step": 5104 }, { "epoch": 1.13, "learning_rate": 8.958934068728078e-06, "logits/chosen": -1.37053644657135, "logits/rejected": -1.338059663772583, "logps/chosen": -127.94660186767578, "logps/rejected": -175.54568481445312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6766838431358337, "rewards/margins": 5.998476982116699, "rewards/rejected": -6.675160884857178, "step": 5105 }, { "epoch": 1.13, "learning_rate": 8.957839064804542e-06, "logits/chosen": -0.9732241630554199, "logits/rejected": -0.9969248175621033, "logps/chosen": -133.4158477783203, "logps/rejected": -252.3564453125, "loss": 0.1003, "rewards/accuracies": 1.0, "rewards/chosen": -9.051802635192871, "rewards/margins": 1.5042839050292969, "rewards/rejected": -10.556086540222168, "step": 5106 }, { "epoch": 1.13, "learning_rate": 8.9567435523082e-06, "logits/chosen": -1.0587165355682373, "logits/rejected": -1.0731405019760132, "logps/chosen": -72.79966735839844, "logps/rejected": -108.25606536865234, "loss": 0.3573, "rewards/accuracies": 1.0, "rewards/chosen": -0.1991630643606186, "rewards/margins": 3.831979751586914, "rewards/rejected": -4.031142711639404, "step": 5107 }, { "epoch": 1.13, "learning_rate": 8.955647531379826e-06, "logits/chosen": -1.2577295303344727, "logits/rejected": -1.2564908266067505, "logps/chosen": -85.35610961914062, "logps/rejected": -203.5819549560547, "loss": 0.9627, "rewards/accuracies": 1.0, "rewards/chosen": -1.050048828125, "rewards/margins": 10.161410331726074, "rewards/rejected": -11.211459159851074, "step": 5108 }, { "epoch": 1.13, "learning_rate": 8.954551002160252e-06, "logits/chosen": -1.5131809711456299, "logits/rejected": -1.2180113792419434, "logps/chosen": -124.31397247314453, "logps/rejected": -387.083251953125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.6355446577072144, "rewards/margins": 10.606161117553711, "rewards/rejected": -12.241705894470215, "step": 5109 }, { "epoch": 1.13, "learning_rate": 8.95345396479038e-06, "logits/chosen": -1.4695160388946533, "logits/rejected": -1.4642243385314941, "logps/chosen": -132.12094116210938, "logps/rejected": -191.16648864746094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.652374267578125, "rewards/margins": 7.991572856903076, "rewards/rejected": -7.339198589324951, "step": 5110 }, { "epoch": 1.13, "learning_rate": 8.952356419411177e-06, "logits/chosen": -1.1269453763961792, "logits/rejected": -1.1403744220733643, "logps/chosen": -187.24932861328125, "logps/rejected": -202.43325805664062, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": 2.3420441150665283, "rewards/margins": 3.4544448852539062, "rewards/rejected": -1.1124008893966675, "step": 5111 }, { "epoch": 1.13, "learning_rate": 8.951258366163677e-06, "logits/chosen": -1.4602258205413818, "logits/rejected": -1.3981188535690308, "logps/chosen": -84.13418579101562, "logps/rejected": -126.67752075195312, "loss": 0.0707, "rewards/accuracies": 1.0, "rewards/chosen": -0.15021514892578125, "rewards/margins": 1.885206699371338, "rewards/rejected": -2.035421848297119, "step": 5112 }, { "epoch": 1.13, "learning_rate": 8.950159805188973e-06, "logits/chosen": -1.1279256343841553, "logits/rejected": -1.1313326358795166, "logps/chosen": -65.91020202636719, "logps/rejected": -180.7554931640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.12595215439796448, "rewards/margins": 7.425972938537598, "rewards/rejected": -7.551925182342529, "step": 5113 }, { "epoch": 1.13, "learning_rate": 8.949060736628233e-06, "logits/chosen": -1.590380072593689, "logits/rejected": -1.590380072593689, "logps/chosen": -185.06507873535156, "logps/rejected": -185.06507873535156, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -7.097219944000244, "rewards/margins": 0.0, "rewards/rejected": -7.097219944000244, "step": 5114 }, { "epoch": 1.13, "learning_rate": 8.94796116062268e-06, "logits/chosen": -1.2481908798217773, "logits/rejected": -1.2471058368682861, "logps/chosen": -87.41676330566406, "logps/rejected": -221.2803497314453, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.8156219720840454, "rewards/margins": 9.801466941833496, "rewards/rejected": -8.985844612121582, "step": 5115 }, { "epoch": 1.13, "learning_rate": 8.946861077313609e-06, "logits/chosen": -1.062410831451416, "logits/rejected": -1.1731650829315186, "logps/chosen": -185.4117889404297, "logps/rejected": -139.80880737304688, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.3295913636684418, "rewards/margins": 5.203548431396484, "rewards/rejected": -5.533139705657959, "step": 5116 }, { "epoch": 1.13, "learning_rate": 8.945760486842377e-06, "logits/chosen": -1.2825417518615723, "logits/rejected": -1.1943285465240479, "logps/chosen": -74.70277404785156, "logps/rejected": -156.1358184814453, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.24889527261257172, "rewards/margins": 7.644491195678711, "rewards/rejected": -7.893386363983154, "step": 5117 }, { "epoch": 1.13, "learning_rate": 8.944659389350409e-06, "logits/chosen": -1.550404667854309, "logits/rejected": -1.498780369758606, "logps/chosen": -70.41046142578125, "logps/rejected": -208.88186645507812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 1.8402512073516846, "rewards/margins": 5.88675594329834, "rewards/rejected": -4.046504497528076, "step": 5118 }, { "epoch": 1.13, "learning_rate": 8.94355778497919e-06, "logits/chosen": -1.179770588874817, "logits/rejected": -1.1437047719955444, "logps/chosen": -95.12644958496094, "logps/rejected": -151.62266540527344, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": 0.01432113628834486, "rewards/margins": 4.47157621383667, "rewards/rejected": -4.457254886627197, "step": 5119 }, { "epoch": 1.13, "learning_rate": 8.942455673870278e-06, "logits/chosen": -1.1123740673065186, "logits/rejected": -1.1123740673065186, "logps/chosen": -54.270774841308594, "logps/rejected": -54.270774841308594, "loss": 0.3498, "rewards/accuracies": 0.0, "rewards/chosen": -3.0877058506011963, "rewards/margins": 0.0, "rewards/rejected": -3.0877058506011963, "step": 5120 }, { "epoch": 1.13, "learning_rate": 8.941353056165288e-06, "logits/chosen": -1.5097711086273193, "logits/rejected": -1.5414785146713257, "logps/chosen": -122.81751251220703, "logps/rejected": -117.21784973144531, "loss": 0.2204, "rewards/accuracies": 1.0, "rewards/chosen": -2.67814040184021, "rewards/margins": 0.5908987522125244, "rewards/rejected": -3.2690391540527344, "step": 5121 }, { "epoch": 1.13, "learning_rate": 8.940249932005904e-06, "logits/chosen": -1.437696099281311, "logits/rejected": -1.4692310094833374, "logps/chosen": -102.94893646240234, "logps/rejected": -86.20921325683594, "loss": 0.1869, "rewards/accuracies": 1.0, "rewards/chosen": 0.2793678343296051, "rewards/margins": 6.65948486328125, "rewards/rejected": -6.380116939544678, "step": 5122 }, { "epoch": 1.13, "learning_rate": 8.939146301533878e-06, "logits/chosen": -1.3933615684509277, "logits/rejected": -1.4152920246124268, "logps/chosen": -210.2113037109375, "logps/rejected": -218.16677856445312, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 4.019308567047119, "rewards/margins": 8.836198806762695, "rewards/rejected": -4.816890239715576, "step": 5123 }, { "epoch": 1.13, "learning_rate": 8.938042164891021e-06, "logits/chosen": -1.1825063228607178, "logits/rejected": -1.1323496103286743, "logps/chosen": -128.5985870361328, "logps/rejected": -190.0377197265625, "loss": 0.0292, "rewards/accuracies": 1.0, "rewards/chosen": -1.7850860357284546, "rewards/margins": 3.11346435546875, "rewards/rejected": -4.898550510406494, "step": 5124 }, { "epoch": 1.13, "learning_rate": 8.936937522219212e-06, "logits/chosen": -1.108795404434204, "logits/rejected": -1.0935077667236328, "logps/chosen": -52.215675354003906, "logps/rejected": -106.52908325195312, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": 0.04843444749712944, "rewards/margins": 2.724498748779297, "rewards/rejected": -2.6760642528533936, "step": 5125 }, { "epoch": 1.13, "learning_rate": 8.935832373660397e-06, "logits/chosen": -1.1183003187179565, "logits/rejected": -1.0941904783248901, "logps/chosen": -180.6783447265625, "logps/rejected": -357.32720947265625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 1.8211272954940796, "rewards/margins": 6.470883369445801, "rewards/rejected": -4.649755954742432, "step": 5126 }, { "epoch": 1.13, "learning_rate": 8.934726719356582e-06, "logits/chosen": -1.50654935836792, "logits/rejected": -1.6197407245635986, "logps/chosen": -236.0203857421875, "logps/rejected": -213.49478149414062, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 0.17583008110523224, "rewards/margins": 10.30339527130127, "rewards/rejected": -10.127565383911133, "step": 5127 }, { "epoch": 1.14, "learning_rate": 8.933620559449842e-06, "logits/chosen": -0.996508002281189, "logits/rejected": -0.996508002281189, "logps/chosen": -115.22943878173828, "logps/rejected": -115.22943878173828, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -3.5429847240448, "rewards/margins": 0.0, "rewards/rejected": -3.5429847240448, "step": 5128 }, { "epoch": 1.14, "learning_rate": 8.932513894082317e-06, "logits/chosen": -0.8600790500640869, "logits/rejected": -0.8314328193664551, "logps/chosen": -127.4046630859375, "logps/rejected": -143.74560546875, "loss": 0.0749, "rewards/accuracies": 1.0, "rewards/chosen": -1.5049896240234375, "rewards/margins": 1.86602783203125, "rewards/rejected": -3.3710174560546875, "step": 5129 }, { "epoch": 1.14, "learning_rate": 8.93140672339621e-06, "logits/chosen": -1.5310813188552856, "logits/rejected": -1.5294485092163086, "logps/chosen": -131.39450073242188, "logps/rejected": -121.279296875, "loss": 0.5436, "rewards/accuracies": 0.0, "rewards/chosen": -4.928238868713379, "rewards/margins": -0.6758394241333008, "rewards/rejected": -4.252399444580078, "step": 5130 }, { "epoch": 1.14, "learning_rate": 8.930299047533792e-06, "logits/chosen": -1.247105598449707, "logits/rejected": -1.2512701749801636, "logps/chosen": -269.2335205078125, "logps/rejected": -204.18649291992188, "loss": 0.4678, "rewards/accuracies": 0.0, "rewards/chosen": -4.121167182922363, "rewards/margins": -0.3987762928009033, "rewards/rejected": -3.72239089012146, "step": 5131 }, { "epoch": 1.14, "learning_rate": 8.929190866637391e-06, "logits/chosen": -1.6340985298156738, "logits/rejected": -1.5910426378250122, "logps/chosen": -109.07208251953125, "logps/rejected": -182.98443603515625, "loss": 0.046, "rewards/accuracies": 1.0, "rewards/chosen": -1.7407913208007812, "rewards/margins": 2.3396072387695312, "rewards/rejected": -4.0803985595703125, "step": 5132 }, { "epoch": 1.14, "learning_rate": 8.92808218084941e-06, "logits/chosen": -1.3482719659805298, "logits/rejected": -1.3600717782974243, "logps/chosen": -206.87359619140625, "logps/rejected": -186.033203125, "loss": 0.0464, "rewards/accuracies": 1.0, "rewards/chosen": -1.651824951171875, "rewards/margins": 2.481719970703125, "rewards/rejected": -4.133544921875, "step": 5133 }, { "epoch": 1.14, "learning_rate": 8.926972990312314e-06, "logits/chosen": -1.2472103834152222, "logits/rejected": -0.8984546065330505, "logps/chosen": -119.63899230957031, "logps/rejected": -233.379638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.98236083984375, "rewards/margins": 13.466784477233887, "rewards/rejected": -14.449145317077637, "step": 5134 }, { "epoch": 1.14, "learning_rate": 8.925863295168628e-06, "logits/chosen": -1.4316836595535278, "logits/rejected": -1.4397318363189697, "logps/chosen": -212.89486694335938, "logps/rejected": -311.9720458984375, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": 1.0378235578536987, "rewards/margins": 13.232904434204102, "rewards/rejected": -12.195080757141113, "step": 5135 }, { "epoch": 1.14, "learning_rate": 8.924753095560945e-06, "logits/chosen": -1.426212191581726, "logits/rejected": -1.5055042505264282, "logps/chosen": -195.84918212890625, "logps/rejected": -142.30938720703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 3.7724685668945312, "rewards/margins": 14.471839904785156, "rewards/rejected": -10.699371337890625, "step": 5136 }, { "epoch": 1.14, "learning_rate": 8.923642391631924e-06, "logits/chosen": -1.3444583415985107, "logits/rejected": -1.2968418598175049, "logps/chosen": -127.88534545898438, "logps/rejected": -226.63514709472656, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -5.022891998291016, "rewards/margins": 4.669864654541016, "rewards/rejected": -9.692756652832031, "step": 5137 }, { "epoch": 1.14, "learning_rate": 8.922531183524287e-06, "logits/chosen": -0.9453160762786865, "logits/rejected": -0.9402642250061035, "logps/chosen": -77.89155578613281, "logps/rejected": -97.50269317626953, "loss": 0.2385, "rewards/accuracies": 1.0, "rewards/chosen": -2.5679969787597656, "rewards/margins": 0.49207544326782227, "rewards/rejected": -3.060072422027588, "step": 5138 }, { "epoch": 1.14, "learning_rate": 8.921419471380826e-06, "logits/chosen": -1.2689474821090698, "logits/rejected": -1.2689474821090698, "logps/chosen": -260.5657043457031, "logps/rejected": -260.5657043457031, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -11.552919387817383, "rewards/margins": 0.0, "rewards/rejected": -11.552919387817383, "step": 5139 }, { "epoch": 1.14, "learning_rate": 8.920307255344386e-06, "logits/chosen": -1.4103610515594482, "logits/rejected": -1.3968772888183594, "logps/chosen": -84.66899871826172, "logps/rejected": -89.87602233886719, "loss": 0.2559, "rewards/accuracies": 1.0, "rewards/chosen": -0.9553489685058594, "rewards/margins": 0.40292584896087646, "rewards/rejected": -1.3582748174667358, "step": 5140 }, { "epoch": 1.14, "learning_rate": 8.91919453555789e-06, "logits/chosen": -0.9147647619247437, "logits/rejected": -0.8977452516555786, "logps/chosen": -233.572998046875, "logps/rejected": -185.94094848632812, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": -5.053309917449951, "rewards/margins": 2.7225613594055176, "rewards/rejected": -7.775871276855469, "step": 5141 }, { "epoch": 1.14, "learning_rate": 8.918081312164318e-06, "logits/chosen": -1.2253550291061401, "logits/rejected": -1.2670809030532837, "logps/chosen": -174.98678588867188, "logps/rejected": -96.79060363769531, "loss": 0.0417, "rewards/accuracies": 1.0, "rewards/chosen": -2.5944886207580566, "rewards/margins": 3.9683637619018555, "rewards/rejected": -6.562852382659912, "step": 5142 }, { "epoch": 1.14, "learning_rate": 8.916967585306715e-06, "logits/chosen": -1.044108510017395, "logits/rejected": -0.9583067297935486, "logps/chosen": -131.23297119140625, "logps/rejected": -215.57986450195312, "loss": 0.158, "rewards/accuracies": 1.0, "rewards/chosen": -1.3692489862442017, "rewards/margins": 4.987684726715088, "rewards/rejected": -6.35693359375, "step": 5143 }, { "epoch": 1.14, "learning_rate": 8.915853355128192e-06, "logits/chosen": -1.5224459171295166, "logits/rejected": -1.5122090578079224, "logps/chosen": -102.41754150390625, "logps/rejected": -83.95738220214844, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.44747239351272583, "rewards/margins": 5.16289758682251, "rewards/rejected": -5.61037015914917, "step": 5144 }, { "epoch": 1.14, "learning_rate": 8.91473862177193e-06, "logits/chosen": -1.2394315004348755, "logits/rejected": -1.2059719562530518, "logps/chosen": -112.9026107788086, "logps/rejected": -150.92347717285156, "loss": 0.3638, "rewards/accuracies": 1.0, "rewards/chosen": -4.4595208168029785, "rewards/margins": 3.35097599029541, "rewards/rejected": -7.810496807098389, "step": 5145 }, { "epoch": 1.14, "learning_rate": 8.913623385381163e-06, "logits/chosen": -1.5064313411712646, "logits/rejected": -1.4862173795700073, "logps/chosen": -84.01325225830078, "logps/rejected": -103.35884857177734, "loss": 0.2356, "rewards/accuracies": 1.0, "rewards/chosen": -0.9045189023017883, "rewards/margins": 0.5216171145439148, "rewards/rejected": -1.4261360168457031, "step": 5146 }, { "epoch": 1.14, "learning_rate": 8.9125076460992e-06, "logits/chosen": -1.1812793016433716, "logits/rejected": -1.1070034503936768, "logps/chosen": -179.50379943847656, "logps/rejected": -231.44549560546875, "loss": 0.0481, "rewards/accuracies": 1.0, "rewards/chosen": -2.047534227371216, "rewards/margins": 5.684844970703125, "rewards/rejected": -7.73237943649292, "step": 5147 }, { "epoch": 1.14, "learning_rate": 8.91139140406941e-06, "logits/chosen": -1.283382773399353, "logits/rejected": -1.2493640184402466, "logps/chosen": -268.4276123046875, "logps/rejected": -168.0025177001953, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.3321151733398438, "rewards/margins": 5.420774936676025, "rewards/rejected": -6.752890110015869, "step": 5148 }, { "epoch": 1.14, "learning_rate": 8.910274659435226e-06, "logits/chosen": -1.251640796661377, "logits/rejected": -1.251640796661377, "logps/chosen": -179.76683044433594, "logps/rejected": -179.76683044433594, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -8.29589557647705, "rewards/margins": 0.0, "rewards/rejected": -8.29589557647705, "step": 5149 }, { "epoch": 1.14, "learning_rate": 8.90915741234015e-06, "logits/chosen": -1.3970391750335693, "logits/rejected": -1.4122381210327148, "logps/chosen": -116.93295288085938, "logps/rejected": -230.1153106689453, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.3456077575683594, "rewards/margins": 8.99002742767334, "rewards/rejected": -8.64441967010498, "step": 5150 }, { "epoch": 1.14, "learning_rate": 8.908039662927743e-06, "logits/chosen": -1.0881913900375366, "logits/rejected": -1.0013678073883057, "logps/chosen": -112.61190795898438, "logps/rejected": -294.47705078125, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.2405213117599487, "rewards/margins": 4.099865436553955, "rewards/rejected": -5.340386867523193, "step": 5151 }, { "epoch": 1.14, "learning_rate": 8.906921411341634e-06, "logits/chosen": -1.3658825159072876, "logits/rejected": -1.3658825159072876, "logps/chosen": -153.43887329101562, "logps/rejected": -153.43887329101562, "loss": 0.4981, "rewards/accuracies": 0.0, "rewards/chosen": -5.534816741943359, "rewards/margins": 0.0, "rewards/rejected": -5.534816741943359, "step": 5152 }, { "epoch": 1.14, "learning_rate": 8.905802657725516e-06, "logits/chosen": -1.3648704290390015, "logits/rejected": -1.2902171611785889, "logps/chosen": -158.31378173828125, "logps/rejected": -171.66903686523438, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.13937683403491974, "rewards/margins": 5.1778154373168945, "rewards/rejected": -5.317192077636719, "step": 5153 }, { "epoch": 1.14, "learning_rate": 8.904683402223146e-06, "logits/chosen": -1.2014926671981812, "logits/rejected": -1.1271740198135376, "logps/chosen": -118.62004089355469, "logps/rejected": -211.09136962890625, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -0.07570495456457138, "rewards/margins": 3.2025985717773438, "rewards/rejected": -3.278303623199463, "step": 5154 }, { "epoch": 1.14, "learning_rate": 8.903563644978346e-06, "logits/chosen": -1.2317452430725098, "logits/rejected": -1.2370715141296387, "logps/chosen": -68.1624755859375, "logps/rejected": -128.03878784179688, "loss": 0.0845, "rewards/accuracies": 1.0, "rewards/chosen": -1.6069087982177734, "rewards/margins": 1.6921467781066895, "rewards/rejected": -3.299055576324463, "step": 5155 }, { "epoch": 1.14, "learning_rate": 8.902443386135e-06, "logits/chosen": -1.0795310735702515, "logits/rejected": -1.0824265480041504, "logps/chosen": -21.205371856689453, "logps/rejected": -43.07133483886719, "loss": 0.0606, "rewards/accuracies": 1.0, "rewards/chosen": -0.884581983089447, "rewards/margins": 2.0496506690979004, "rewards/rejected": -2.934232711791992, "step": 5156 }, { "epoch": 1.14, "learning_rate": 8.90132262583706e-06, "logits/chosen": -1.5005978345870972, "logits/rejected": -1.5060994625091553, "logps/chosen": -90.7493896484375, "logps/rejected": -122.23965454101562, "loss": 0.0488, "rewards/accuracies": 1.0, "rewards/chosen": -1.2613006830215454, "rewards/margins": 2.2896437644958496, "rewards/rejected": -3.5509445667266846, "step": 5157 }, { "epoch": 1.14, "learning_rate": 8.900201364228542e-06, "logits/chosen": -1.2616280317306519, "logits/rejected": -1.2616280317306519, "logps/chosen": -132.77149963378906, "logps/rejected": -132.77149963378906, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.193234920501709, "rewards/margins": 0.0, "rewards/rejected": -4.193234920501709, "step": 5158 }, { "epoch": 1.14, "learning_rate": 8.899079601453524e-06, "logits/chosen": -1.414534091949463, "logits/rejected": -1.370199203491211, "logps/chosen": -80.42406463623047, "logps/rejected": -118.64859771728516, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -0.7895744442939758, "rewards/margins": 3.000200033187866, "rewards/rejected": -3.7897744178771973, "step": 5159 }, { "epoch": 1.14, "learning_rate": 8.897957337656151e-06, "logits/chosen": -1.2603826522827148, "logits/rejected": -1.16720712184906, "logps/chosen": -80.83084106445312, "logps/rejected": -98.21064758300781, "loss": 0.5114, "rewards/accuracies": 1.0, "rewards/chosen": -0.11319885402917862, "rewards/margins": 6.537422180175781, "rewards/rejected": -6.650620937347412, "step": 5160 }, { "epoch": 1.14, "learning_rate": 8.89683457298063e-06, "logits/chosen": -1.11940336227417, "logits/rejected": -1.0507800579071045, "logps/chosen": -59.18193817138672, "logps/rejected": -143.3551788330078, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": 0.15674400329589844, "rewards/margins": 7.429064750671387, "rewards/rejected": -7.272320747375488, "step": 5161 }, { "epoch": 1.14, "learning_rate": 8.895711307571235e-06, "logits/chosen": -1.3131654262542725, "logits/rejected": -1.322693943977356, "logps/chosen": -123.44456481933594, "logps/rejected": -137.5186004638672, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.1964019536972046, "rewards/margins": 5.441359519958496, "rewards/rejected": -6.63776159286499, "step": 5162 }, { "epoch": 1.14, "learning_rate": 8.894587541572301e-06, "logits/chosen": -1.2566604614257812, "logits/rejected": -1.2566604614257812, "logps/chosen": -79.57791137695312, "logps/rejected": -79.57791137695312, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -0.6978783011436462, "rewards/margins": 0.0, "rewards/rejected": -0.6978783011436462, "step": 5163 }, { "epoch": 1.14, "learning_rate": 8.89346327512823e-06, "logits/chosen": -1.273353934288025, "logits/rejected": -1.3171685934066772, "logps/chosen": -204.369873046875, "logps/rejected": -100.27913665771484, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -3.1502182483673096, "rewards/margins": 3.6741883754730225, "rewards/rejected": -6.824406623840332, "step": 5164 }, { "epoch": 1.14, "learning_rate": 8.89233850838349e-06, "logits/chosen": -1.3903911113739014, "logits/rejected": -1.4106574058532715, "logps/chosen": -109.34857177734375, "logps/rejected": -135.94989013671875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 1.0532928705215454, "rewards/margins": 7.40451192855835, "rewards/rejected": -6.351219177246094, "step": 5165 }, { "epoch": 1.14, "learning_rate": 8.891213241482606e-06, "logits/chosen": -1.407873511314392, "logits/rejected": -1.4603160619735718, "logps/chosen": -99.79044342041016, "logps/rejected": -183.68734741210938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5876182913780212, "rewards/margins": 13.410233497619629, "rewards/rejected": -13.997851371765137, "step": 5166 }, { "epoch": 1.14, "learning_rate": 8.890087474570174e-06, "logits/chosen": -0.977921187877655, "logits/rejected": -0.9815186858177185, "logps/chosen": -115.55352783203125, "logps/rejected": -102.41806030273438, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -2.967353105545044, "rewards/margins": 4.226892471313477, "rewards/rejected": -7.1942458152771, "step": 5167 }, { "epoch": 1.14, "learning_rate": 8.888961207790856e-06, "logits/chosen": -1.3146096467971802, "logits/rejected": -1.3308440446853638, "logps/chosen": -158.98912048339844, "logps/rejected": -155.17648315429688, "loss": 0.1026, "rewards/accuracies": 1.0, "rewards/chosen": -4.171212196350098, "rewards/margins": 1.4872655868530273, "rewards/rejected": -5.658477783203125, "step": 5168 }, { "epoch": 1.14, "learning_rate": 8.887834441289369e-06, "logits/chosen": -1.4267436265945435, "logits/rejected": -1.345914363861084, "logps/chosen": -148.36611938476562, "logps/rejected": -291.960205078125, "loss": 0.4273, "rewards/accuracies": 0.0, "rewards/chosen": -1.518280029296875, "rewards/margins": -0.3001037836074829, "rewards/rejected": -1.218176245689392, "step": 5169 }, { "epoch": 1.14, "learning_rate": 8.886707175210503e-06, "logits/chosen": -1.0295149087905884, "logits/rejected": -0.9862969517707825, "logps/chosen": -115.54940032958984, "logps/rejected": -153.1649932861328, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.646196722984314, "rewards/margins": 5.512022495269775, "rewards/rejected": -7.158219337463379, "step": 5170 }, { "epoch": 1.14, "learning_rate": 8.88557940969911e-06, "logits/chosen": -1.1020792722702026, "logits/rejected": -1.110862374305725, "logps/chosen": -54.448463439941406, "logps/rejected": -60.79328536987305, "loss": 0.2246, "rewards/accuracies": 1.0, "rewards/chosen": -3.5607292652130127, "rewards/margins": 0.7056891918182373, "rewards/rejected": -4.26641845703125, "step": 5171 }, { "epoch": 1.14, "learning_rate": 8.884451144900104e-06, "logits/chosen": -1.2992867231369019, "logits/rejected": -0.6312559247016907, "logps/chosen": -137.61013793945312, "logps/rejected": -762.13232421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.427145481109619, "rewards/margins": 53.56941223144531, "rewards/rejected": -55.996559143066406, "step": 5172 }, { "epoch": 1.14, "learning_rate": 8.88332238095846e-06, "logits/chosen": -1.3006477355957031, "logits/rejected": -1.128175973892212, "logps/chosen": -188.1053466796875, "logps/rejected": -376.4892883300781, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.8639618158340454, "rewards/margins": 12.133804321289062, "rewards/rejected": -12.997766494750977, "step": 5173 }, { "epoch": 1.15, "learning_rate": 8.882193118019229e-06, "logits/chosen": -1.2063202857971191, "logits/rejected": -1.1939693689346313, "logps/chosen": -248.5135498046875, "logps/rejected": -187.43362426757812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.09074554592370987, "rewards/margins": 6.055792331695557, "rewards/rejected": -6.146537780761719, "step": 5174 }, { "epoch": 1.15, "learning_rate": 8.881063356227513e-06, "logits/chosen": -1.5169777870178223, "logits/rejected": -1.5377943515777588, "logps/chosen": -103.46138000488281, "logps/rejected": -142.22402954101562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.38441696763038635, "rewards/margins": 10.582799911499023, "rewards/rejected": -10.967216491699219, "step": 5175 }, { "epoch": 1.15, "learning_rate": 8.879933095728485e-06, "logits/chosen": -1.1458081007003784, "logits/rejected": -1.1755043268203735, "logps/chosen": -152.4324951171875, "logps/rejected": -167.36224365234375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 3.2679107189178467, "rewards/margins": 5.3816118240356445, "rewards/rejected": -2.1137008666992188, "step": 5176 }, { "epoch": 1.15, "learning_rate": 8.878802336667384e-06, "logits/chosen": -1.7843211889266968, "logits/rejected": -1.1377248764038086, "logps/chosen": -157.90188598632812, "logps/rejected": -1267.6326904296875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -8.238393783569336, "rewards/margins": 110.46344757080078, "rewards/rejected": -118.70184326171875, "step": 5177 }, { "epoch": 1.15, "learning_rate": 8.877671079189505e-06, "logits/chosen": -1.5122005939483643, "logits/rejected": -1.5956218242645264, "logps/chosen": -108.56983184814453, "logps/rejected": -149.56817626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2991645932197571, "rewards/margins": 10.472757339477539, "rewards/rejected": -10.77192211151123, "step": 5178 }, { "epoch": 1.15, "learning_rate": 8.876539323440214e-06, "logits/chosen": -1.164238691329956, "logits/rejected": -1.0424796342849731, "logps/chosen": -225.2216033935547, "logps/rejected": -304.40863037109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.6502105593681335, "rewards/margins": 9.098055839538574, "rewards/rejected": -8.447845458984375, "step": 5179 }, { "epoch": 1.15, "learning_rate": 8.87540706956494e-06, "logits/chosen": -1.2831835746765137, "logits/rejected": -1.300094723701477, "logps/chosen": -215.98410034179688, "logps/rejected": -226.21554565429688, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": 1.3471786975860596, "rewards/margins": 14.064095497131348, "rewards/rejected": -12.716917037963867, "step": 5180 }, { "epoch": 1.15, "learning_rate": 8.874274317709173e-06, "logits/chosen": -1.4115135669708252, "logits/rejected": -1.3677911758422852, "logps/chosen": -63.08380889892578, "logps/rejected": -113.23096466064453, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": 0.8363716006278992, "rewards/margins": 3.306060791015625, "rewards/rejected": -2.469689130783081, "step": 5181 }, { "epoch": 1.15, "learning_rate": 8.873141068018469e-06, "logits/chosen": -1.4628639221191406, "logits/rejected": -1.4628639221191406, "logps/chosen": -120.32286071777344, "logps/rejected": -120.32286071777344, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.533111572265625, "rewards/margins": 0.0, "rewards/rejected": -5.533111572265625, "step": 5182 }, { "epoch": 1.15, "learning_rate": 8.872007320638449e-06, "logits/chosen": -1.3867191076278687, "logits/rejected": -1.32430899143219, "logps/chosen": -118.14191436767578, "logps/rejected": -203.52703857421875, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": 0.1575676053762436, "rewards/margins": 3.2641470432281494, "rewards/rejected": -3.106579542160034, "step": 5183 }, { "epoch": 1.15, "learning_rate": 8.870873075714797e-06, "logits/chosen": -1.1524544954299927, "logits/rejected": -1.1618809700012207, "logps/chosen": -95.24942016601562, "logps/rejected": -123.38650512695312, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -1.0306671857833862, "rewards/margins": 3.076960563659668, "rewards/rejected": -4.107627868652344, "step": 5184 }, { "epoch": 1.15, "learning_rate": 8.86973833339326e-06, "logits/chosen": -0.9663048386573792, "logits/rejected": -0.9118029475212097, "logps/chosen": -166.92092895507812, "logps/rejected": -216.17178344726562, "loss": 0.4422, "rewards/accuracies": 0.0, "rewards/chosen": -1.2047302722930908, "rewards/margins": -0.30997776985168457, "rewards/rejected": -0.8947525024414062, "step": 5185 }, { "epoch": 1.15, "learning_rate": 8.86860309381965e-06, "logits/chosen": -1.474018931388855, "logits/rejected": -1.5241981744766235, "logps/chosen": -67.57492065429688, "logps/rejected": -43.235477447509766, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -0.16420899331569672, "rewards/margins": 3.4474003314971924, "rewards/rejected": -3.6116092205047607, "step": 5186 }, { "epoch": 1.15, "learning_rate": 8.867467357139842e-06, "logits/chosen": -1.4259648323059082, "logits/rejected": -1.5262436866760254, "logps/chosen": -194.81814575195312, "logps/rejected": -133.85159301757812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.09299774467945099, "rewards/margins": 6.035420894622803, "rewards/rejected": -5.942423343658447, "step": 5187 }, { "epoch": 1.15, "learning_rate": 8.866331123499775e-06, "logits/chosen": -1.1285438537597656, "logits/rejected": -1.206296443939209, "logps/chosen": -300.78948974609375, "logps/rejected": -178.5148162841797, "loss": 0.0413, "rewards/accuracies": 1.0, "rewards/chosen": -6.731314182281494, "rewards/margins": 2.498351573944092, "rewards/rejected": -9.229665756225586, "step": 5188 }, { "epoch": 1.15, "learning_rate": 8.865194393045452e-06, "logits/chosen": -0.9591251611709595, "logits/rejected": -0.9647480249404907, "logps/chosen": -67.56759643554688, "logps/rejected": -66.30914306640625, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -1.8459080457687378, "rewards/margins": 3.1257901191711426, "rewards/rejected": -4.97169828414917, "step": 5189 }, { "epoch": 1.15, "learning_rate": 8.864057165922944e-06, "logits/chosen": -1.4745104312896729, "logits/rejected": -1.4987366199493408, "logps/chosen": -130.34498596191406, "logps/rejected": -90.38540649414062, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -2.0173966884613037, "rewards/margins": 4.28203010559082, "rewards/rejected": -6.299426555633545, "step": 5190 }, { "epoch": 1.15, "learning_rate": 8.862919442278379e-06, "logits/chosen": -1.1197315454483032, "logits/rejected": -1.1197315454483032, "logps/chosen": -147.82183837890625, "logps/rejected": -147.82183837890625, "loss": 0.349, "rewards/accuracies": 0.0, "rewards/chosen": -5.044114589691162, "rewards/margins": 0.0, "rewards/rejected": -5.044114589691162, "step": 5191 }, { "epoch": 1.15, "learning_rate": 8.86178122225795e-06, "logits/chosen": -1.2576905488967896, "logits/rejected": -1.207143783569336, "logps/chosen": -86.98171997070312, "logps/rejected": -162.34608459472656, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": -2.741445302963257, "rewards/margins": 2.1555426120758057, "rewards/rejected": -4.8969879150390625, "step": 5192 }, { "epoch": 1.15, "learning_rate": 8.860642506007919e-06, "logits/chosen": -1.1226035356521606, "logits/rejected": -1.140687346458435, "logps/chosen": -105.00414276123047, "logps/rejected": -127.87876892089844, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.1466972380876541, "rewards/margins": 5.443583011627197, "rewards/rejected": -5.590280055999756, "step": 5193 }, { "epoch": 1.15, "learning_rate": 8.859503293674605e-06, "logits/chosen": -1.572415828704834, "logits/rejected": -1.540571689605713, "logps/chosen": -166.08416748046875, "logps/rejected": -162.88514709472656, "loss": 0.1476, "rewards/accuracies": 1.0, "rewards/chosen": -2.355154514312744, "rewards/margins": 1.069596767425537, "rewards/rejected": -3.4247512817382812, "step": 5194 }, { "epoch": 1.15, "learning_rate": 8.858363585404397e-06, "logits/chosen": -1.1235295534133911, "logits/rejected": -0.9832884073257446, "logps/chosen": -110.97772216796875, "logps/rejected": -309.19573974609375, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -2.2471566200256348, "rewards/margins": 3.4448981285095215, "rewards/rejected": -5.692054748535156, "step": 5195 }, { "epoch": 1.15, "learning_rate": 8.857223381343742e-06, "logits/chosen": -1.4621189832687378, "logits/rejected": -1.5081334114074707, "logps/chosen": -182.106201171875, "logps/rejected": -178.98602294921875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 1.7399170398712158, "rewards/margins": 6.13736629486084, "rewards/rejected": -4.397449016571045, "step": 5196 }, { "epoch": 1.15, "learning_rate": 8.856082681639158e-06, "logits/chosen": -1.056158185005188, "logits/rejected": -1.0629260540008545, "logps/chosen": -292.01605224609375, "logps/rejected": -256.78857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.7045990228652954, "rewards/margins": 12.27518367767334, "rewards/rejected": -11.570584297180176, "step": 5197 }, { "epoch": 1.15, "learning_rate": 8.854941486437216e-06, "logits/chosen": -1.3143051862716675, "logits/rejected": -1.5175727605819702, "logps/chosen": -267.23052978515625, "logps/rejected": -190.80731201171875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 1.4602234363555908, "rewards/margins": 7.496813774108887, "rewards/rejected": -6.036590576171875, "step": 5198 }, { "epoch": 1.15, "learning_rate": 8.853799795884562e-06, "logits/chosen": -1.3968185186386108, "logits/rejected": -1.3968185186386108, "logps/chosen": -83.14102935791016, "logps/rejected": -83.14102935791016, "loss": 0.8057, "rewards/accuracies": 0.0, "rewards/chosen": -6.431812286376953, "rewards/margins": 0.0, "rewards/rejected": -6.431812286376953, "step": 5199 }, { "epoch": 1.15, "learning_rate": 8.852657610127898e-06, "logits/chosen": -1.2836995124816895, "logits/rejected": -1.2771685123443604, "logps/chosen": -119.91326904296875, "logps/rejected": -161.3605499267578, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.7828178405761719, "rewards/margins": 4.948549747467041, "rewards/rejected": -6.731367588043213, "step": 5200 }, { "epoch": 1.15, "learning_rate": 8.851514929313992e-06, "logits/chosen": -1.2887319326400757, "logits/rejected": -1.3207191228866577, "logps/chosen": -145.53732299804688, "logps/rejected": -148.5478515625, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": -2.493312120437622, "rewards/margins": 2.204572916030884, "rewards/rejected": -4.697885036468506, "step": 5201 }, { "epoch": 1.15, "learning_rate": 8.850371753589677e-06, "logits/chosen": -1.3090819120407104, "logits/rejected": -1.2216198444366455, "logps/chosen": -162.93447875976562, "logps/rejected": -273.9795837402344, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -1.9027893543243408, "rewards/margins": 3.7508699893951416, "rewards/rejected": -5.653659343719482, "step": 5202 }, { "epoch": 1.15, "learning_rate": 8.849228083101847e-06, "logits/chosen": -1.2760848999023438, "logits/rejected": -1.278358817100525, "logps/chosen": -122.67092895507812, "logps/rejected": -175.96630859375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.4435250759124756, "rewards/margins": 6.726173400878906, "rewards/rejected": -10.169698715209961, "step": 5203 }, { "epoch": 1.15, "learning_rate": 8.848083917997463e-06, "logits/chosen": -1.4448354244232178, "logits/rejected": -1.532366156578064, "logps/chosen": -180.61740112304688, "logps/rejected": -92.28755187988281, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": 0.21988831460475922, "rewards/margins": 6.741685390472412, "rewards/rejected": -6.521797180175781, "step": 5204 }, { "epoch": 1.15, "learning_rate": 8.846939258423545e-06, "logits/chosen": -1.1286673545837402, "logits/rejected": -1.078539490699768, "logps/chosen": -143.99917602539062, "logps/rejected": -253.4532470703125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.6975677609443665, "rewards/margins": 5.216734409332275, "rewards/rejected": -5.914302349090576, "step": 5205 }, { "epoch": 1.15, "learning_rate": 8.84579410452718e-06, "logits/chosen": -1.614747405052185, "logits/rejected": -1.5006682872772217, "logps/chosen": -199.59332275390625, "logps/rejected": -253.30511474609375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 2.3702850341796875, "rewards/margins": 8.0759859085083, "rewards/rejected": -5.705700874328613, "step": 5206 }, { "epoch": 1.15, "learning_rate": 8.844648456455518e-06, "logits/chosen": -1.0007421970367432, "logits/rejected": -0.9151498675346375, "logps/chosen": -197.7791748046875, "logps/rejected": -378.37213134765625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 4.476742744445801, "rewards/margins": 8.793600082397461, "rewards/rejected": -4.316857814788818, "step": 5207 }, { "epoch": 1.15, "learning_rate": 8.843502314355771e-06, "logits/chosen": -1.179891586303711, "logits/rejected": -1.1613209247589111, "logps/chosen": -157.69430541992188, "logps/rejected": -293.75665283203125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": 2.1924989223480225, "rewards/margins": 5.634225845336914, "rewards/rejected": -3.4417266845703125, "step": 5208 }, { "epoch": 1.15, "learning_rate": 8.842355678375217e-06, "logits/chosen": -0.7539639472961426, "logits/rejected": -0.7302836775779724, "logps/chosen": -120.17295837402344, "logps/rejected": -237.85557556152344, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7108520865440369, "rewards/margins": 6.559340000152588, "rewards/rejected": -7.2701921463012695, "step": 5209 }, { "epoch": 1.15, "learning_rate": 8.841208548661195e-06, "logits/chosen": -1.0806865692138672, "logits/rejected": -1.052394986152649, "logps/chosen": -132.45315551757812, "logps/rejected": -129.38027954101562, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.96759033203125, "rewards/margins": 4.078334808349609, "rewards/rejected": -5.045925140380859, "step": 5210 }, { "epoch": 1.15, "learning_rate": 8.840060925361109e-06, "logits/chosen": -0.9501971006393433, "logits/rejected": -0.9395521283149719, "logps/chosen": -172.32786560058594, "logps/rejected": -108.77572631835938, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -3.544224500656128, "rewards/margins": 4.856769561767578, "rewards/rejected": -8.400994300842285, "step": 5211 }, { "epoch": 1.15, "learning_rate": 8.838912808622424e-06, "logits/chosen": -1.5107678174972534, "logits/rejected": -1.5184967517852783, "logps/chosen": -117.2962646484375, "logps/rejected": -174.7076873779297, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.066558837890625, "rewards/margins": 7.004083633422852, "rewards/rejected": -8.070642471313477, "step": 5212 }, { "epoch": 1.15, "learning_rate": 8.837764198592672e-06, "logits/chosen": -1.6111955642700195, "logits/rejected": -1.6429202556610107, "logps/chosen": -156.83450317382812, "logps/rejected": -147.94857788085938, "loss": 0.0586, "rewards/accuracies": 1.0, "rewards/chosen": 0.1282501220703125, "rewards/margins": 2.8394570350646973, "rewards/rejected": -2.7112069129943848, "step": 5213 }, { "epoch": 1.15, "learning_rate": 8.836615095419448e-06, "logits/chosen": -1.040062427520752, "logits/rejected": -1.0729899406433105, "logps/chosen": -110.19181823730469, "logps/rejected": -176.07916259765625, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": -2.7730088233947754, "rewards/margins": 2.897164821624756, "rewards/rejected": -5.670173645019531, "step": 5214 }, { "epoch": 1.15, "learning_rate": 8.835465499250404e-06, "logits/chosen": -0.9976925849914551, "logits/rejected": -0.9716200828552246, "logps/chosen": -94.5278091430664, "logps/rejected": -102.85873413085938, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": 0.10128097981214523, "rewards/margins": 2.9785423278808594, "rewards/rejected": -2.8772614002227783, "step": 5215 }, { "epoch": 1.15, "learning_rate": 8.834315410233264e-06, "logits/chosen": -1.383919358253479, "logits/rejected": -1.3348362445831299, "logps/chosen": -252.33447265625, "logps/rejected": -467.4825439453125, "loss": 0.034, "rewards/accuracies": 1.0, "rewards/chosen": -0.21419067680835724, "rewards/margins": 15.959357261657715, "rewards/rejected": -16.173547744750977, "step": 5216 }, { "epoch": 1.15, "learning_rate": 8.833164828515815e-06, "logits/chosen": -0.9516729116439819, "logits/rejected": -0.9516729116439819, "logps/chosen": -76.05242919921875, "logps/rejected": -76.05242919921875, "loss": 0.3621, "rewards/accuracies": 0.0, "rewards/chosen": -6.366241455078125, "rewards/margins": 0.0, "rewards/rejected": -6.366241455078125, "step": 5217 }, { "epoch": 1.15, "learning_rate": 8.832013754245895e-06, "logits/chosen": -1.0569078922271729, "logits/rejected": -0.9941364526748657, "logps/chosen": -171.783203125, "logps/rejected": -285.126220703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.6207641959190369, "rewards/margins": 12.36171817779541, "rewards/rejected": -11.740954399108887, "step": 5218 }, { "epoch": 1.16, "learning_rate": 8.830862187571423e-06, "logits/chosen": -1.1442943811416626, "logits/rejected": -1.1442943811416626, "logps/chosen": -158.16110229492188, "logps/rejected": -158.16110229492188, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -7.349708557128906, "rewards/margins": 0.0, "rewards/rejected": -7.349708557128906, "step": 5219 }, { "epoch": 1.16, "learning_rate": 8.829710128640368e-06, "logits/chosen": -1.5264503955841064, "logits/rejected": -1.5264503955841064, "logps/chosen": -130.65472412109375, "logps/rejected": -130.65472412109375, "loss": 0.3474, "rewards/accuracies": 0.0, "rewards/chosen": -1.4542702436447144, "rewards/margins": 0.0, "rewards/rejected": -1.4542702436447144, "step": 5220 }, { "epoch": 1.16, "learning_rate": 8.828557577600769e-06, "logits/chosen": -1.0059436559677124, "logits/rejected": -0.9705317616462708, "logps/chosen": -56.27159881591797, "logps/rejected": -123.28062438964844, "loss": 0.0611, "rewards/accuracies": 1.0, "rewards/chosen": -0.5802505612373352, "rewards/margins": 2.092839241027832, "rewards/rejected": -2.6730897426605225, "step": 5221 }, { "epoch": 1.16, "learning_rate": 8.827404534600723e-06, "logits/chosen": -1.4880409240722656, "logits/rejected": -1.5087783336639404, "logps/chosen": -157.9133758544922, "logps/rejected": -175.6337890625, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 0.4616378843784332, "rewards/margins": 4.584730625152588, "rewards/rejected": -4.1230926513671875, "step": 5222 }, { "epoch": 1.16, "learning_rate": 8.826250999788397e-06, "logits/chosen": -1.570324420928955, "logits/rejected": -1.472793698310852, "logps/chosen": -103.70691680908203, "logps/rejected": -219.60678100585938, "loss": 0.1503, "rewards/accuracies": 1.0, "rewards/chosen": 0.2918548583984375, "rewards/margins": 1.2639052867889404, "rewards/rejected": -0.9720504879951477, "step": 5223 }, { "epoch": 1.16, "learning_rate": 8.825096973312014e-06, "logits/chosen": -1.2644678354263306, "logits/rejected": -1.2951890230178833, "logps/chosen": -78.95599365234375, "logps/rejected": -101.03565979003906, "loss": 0.1211, "rewards/accuracies": 1.0, "rewards/chosen": 0.037184905260801315, "rewards/margins": 1.2943603992462158, "rewards/rejected": -1.2571754455566406, "step": 5224 }, { "epoch": 1.16, "learning_rate": 8.823942455319866e-06, "logits/chosen": -1.0250109434127808, "logits/rejected": -1.0247167348861694, "logps/chosen": -202.0220184326172, "logps/rejected": -211.1255645751953, "loss": 2.0883, "rewards/accuracies": 0.0, "rewards/chosen": -1.7928909063339233, "rewards/margins": -4.1612043380737305, "rewards/rejected": 2.3683135509490967, "step": 5225 }, { "epoch": 1.16, "learning_rate": 8.822787445960303e-06, "logits/chosen": -1.136513352394104, "logits/rejected": -1.090369701385498, "logps/chosen": -151.49862670898438, "logps/rejected": -227.92608642578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.7937819957733154, "rewards/margins": 8.12073040008545, "rewards/rejected": -11.914512634277344, "step": 5226 }, { "epoch": 1.16, "learning_rate": 8.821631945381746e-06, "logits/chosen": -1.2889883518218994, "logits/rejected": -1.2634305953979492, "logps/chosen": -170.37457275390625, "logps/rejected": -175.06919860839844, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 1.1383956670761108, "rewards/margins": 7.3875579833984375, "rewards/rejected": -6.249162197113037, "step": 5227 }, { "epoch": 1.16, "learning_rate": 8.82047595373267e-06, "logits/chosen": -1.504553198814392, "logits/rejected": -1.2427221536636353, "logps/chosen": -166.8309326171875, "logps/rejected": -323.6962585449219, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 0.008731079287827015, "rewards/margins": 4.580074787139893, "rewards/rejected": -4.571343898773193, "step": 5228 }, { "epoch": 1.16, "learning_rate": 8.819319471161617e-06, "logits/chosen": -1.3281291723251343, "logits/rejected": -1.2411853075027466, "logps/chosen": -98.93302154541016, "logps/rejected": -125.84080505371094, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 0.7128288149833679, "rewards/margins": 9.599160194396973, "rewards/rejected": -8.886331558227539, "step": 5229 }, { "epoch": 1.16, "learning_rate": 8.818162497817195e-06, "logits/chosen": -1.295192003250122, "logits/rejected": -1.2309596538543701, "logps/chosen": -83.46890258789062, "logps/rejected": -218.62030029296875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.5533661246299744, "rewards/margins": 8.866189002990723, "rewards/rejected": -8.312823295593262, "step": 5230 }, { "epoch": 1.16, "learning_rate": 8.81700503384807e-06, "logits/chosen": -1.3758479356765747, "logits/rejected": -1.5281338691711426, "logps/chosen": -202.81849670410156, "logps/rejected": -214.50282287597656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.10208892822265625, "rewards/margins": 16.983510971069336, "rewards/rejected": -17.085599899291992, "step": 5231 }, { "epoch": 1.16, "learning_rate": 8.815847079402972e-06, "logits/chosen": -1.1429775953292847, "logits/rejected": -1.1506714820861816, "logps/chosen": -217.7606201171875, "logps/rejected": -177.6830596923828, "loss": 0.2132, "rewards/accuracies": 1.0, "rewards/chosen": -5.517920017242432, "rewards/margins": 0.6317925453186035, "rewards/rejected": -6.149712562561035, "step": 5232 }, { "epoch": 1.16, "learning_rate": 8.814688634630699e-06, "logits/chosen": -1.0485045909881592, "logits/rejected": -1.117800235748291, "logps/chosen": -140.6877899169922, "logps/rejected": -68.97813415527344, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -1.4610458612442017, "rewards/margins": 3.8355135917663574, "rewards/rejected": -5.2965593338012695, "step": 5233 }, { "epoch": 1.16, "learning_rate": 8.813529699680108e-06, "logits/chosen": -1.136866807937622, "logits/rejected": -1.1282639503479004, "logps/chosen": -136.5784912109375, "logps/rejected": -147.26937866210938, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.4330650568008423, "rewards/margins": 4.324338912963867, "rewards/rejected": -5.75740385055542, "step": 5234 }, { "epoch": 1.16, "learning_rate": 8.812370274700117e-06, "logits/chosen": -1.4804261922836304, "logits/rejected": -1.5625730752944946, "logps/chosen": -183.73165893554688, "logps/rejected": -194.33868408203125, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -5.652927398681641, "rewards/margins": 4.703415870666504, "rewards/rejected": -10.356343269348145, "step": 5235 }, { "epoch": 1.16, "learning_rate": 8.81121035983971e-06, "logits/chosen": -1.2061691284179688, "logits/rejected": -1.037503957748413, "logps/chosen": -221.58834838867188, "logps/rejected": -459.15740966796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 3.47336745262146, "rewards/margins": 10.079886436462402, "rewards/rejected": -6.606518745422363, "step": 5236 }, { "epoch": 1.16, "learning_rate": 8.810049955247933e-06, "logits/chosen": -1.149959921836853, "logits/rejected": -1.048073172569275, "logps/chosen": -201.9757080078125, "logps/rejected": -252.3367156982422, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.2523239850997925, "rewards/margins": 6.824792861938477, "rewards/rejected": -8.077116966247559, "step": 5237 }, { "epoch": 1.16, "learning_rate": 8.808889061073897e-06, "logits/chosen": -1.314101219177246, "logits/rejected": -1.3236697912216187, "logps/chosen": -164.30825805664062, "logps/rejected": -109.43254089355469, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.456350713968277, "rewards/margins": 7.6852335929870605, "rewards/rejected": -8.141584396362305, "step": 5238 }, { "epoch": 1.16, "learning_rate": 8.807727677466773e-06, "logits/chosen": -1.230178952217102, "logits/rejected": -1.2133448123931885, "logps/chosen": -51.48059844970703, "logps/rejected": -87.40227508544922, "loss": 0.1926, "rewards/accuracies": 1.0, "rewards/chosen": -0.5443710684776306, "rewards/margins": 0.7551230788230896, "rewards/rejected": -1.2994941473007202, "step": 5239 }, { "epoch": 1.16, "learning_rate": 8.806565804575796e-06, "logits/chosen": -1.3867827653884888, "logits/rejected": -1.3722611665725708, "logps/chosen": -201.34439086914062, "logps/rejected": -190.9063720703125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.6836487054824829, "rewards/margins": 6.298521518707275, "rewards/rejected": -6.982170104980469, "step": 5240 }, { "epoch": 1.16, "learning_rate": 8.805403442550261e-06, "logits/chosen": -1.1218554973602295, "logits/rejected": -1.0392097234725952, "logps/chosen": -77.17376708984375, "logps/rejected": -186.93035888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8412132263183594, "rewards/margins": 9.328774452209473, "rewards/rejected": -10.169987678527832, "step": 5241 }, { "epoch": 1.16, "learning_rate": 8.804240591539537e-06, "logits/chosen": -1.1138135194778442, "logits/rejected": -0.68096923828125, "logps/chosen": -194.50872802734375, "logps/rejected": -536.4969482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2691894471645355, "rewards/margins": 38.73221206665039, "rewards/rejected": -38.4630241394043, "step": 5242 }, { "epoch": 1.16, "learning_rate": 8.80307725169304e-06, "logits/chosen": -1.2332278490066528, "logits/rejected": -1.2332278490066528, "logps/chosen": -104.3294677734375, "logps/rejected": -104.3294677734375, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -7.7567338943481445, "rewards/margins": 0.0, "rewards/rejected": -7.7567338943481445, "step": 5243 }, { "epoch": 1.16, "learning_rate": 8.801913423160256e-06, "logits/chosen": -1.1290370225906372, "logits/rejected": -0.7107651829719543, "logps/chosen": -90.74883270263672, "logps/rejected": -517.5571899414062, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -5.583675861358643, "rewards/margins": 30.30005645751953, "rewards/rejected": -35.883731842041016, "step": 5244 }, { "epoch": 1.16, "learning_rate": 8.800749106090739e-06, "logits/chosen": -0.993771493434906, "logits/rejected": -1.0211408138275146, "logps/chosen": -156.5018768310547, "logps/rejected": -99.44136047363281, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -2.2313339710235596, "rewards/margins": 2.8789103031158447, "rewards/rejected": -5.110244274139404, "step": 5245 }, { "epoch": 1.16, "learning_rate": 8.799584300634096e-06, "logits/chosen": -1.5304838418960571, "logits/rejected": -1.521497368812561, "logps/chosen": -78.29829406738281, "logps/rejected": -125.42225646972656, "loss": 0.3562, "rewards/accuracies": 0.0, "rewards/chosen": -1.9001731872558594, "rewards/margins": -0.038103461265563965, "rewards/rejected": -1.8620697259902954, "step": 5246 }, { "epoch": 1.16, "learning_rate": 8.798419006940008e-06, "logits/chosen": -1.139543056488037, "logits/rejected": -1.103206753730774, "logps/chosen": -133.15264892578125, "logps/rejected": -146.36387634277344, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.3038268983364105, "rewards/margins": 5.0876641273498535, "rewards/rejected": -5.391490936279297, "step": 5247 }, { "epoch": 1.16, "learning_rate": 8.797253225158206e-06, "logits/chosen": -1.2752876281738281, "logits/rejected": -1.2437597513198853, "logps/chosen": -68.67752838134766, "logps/rejected": -77.14401245117188, "loss": 0.1717, "rewards/accuracies": 1.0, "rewards/chosen": -0.17517776787281036, "rewards/margins": 0.8921051621437073, "rewards/rejected": -1.0672829151153564, "step": 5248 }, { "epoch": 1.16, "learning_rate": 8.796086955438494e-06, "logits/chosen": -1.209481120109558, "logits/rejected": -1.3160455226898193, "logps/chosen": -171.75408935546875, "logps/rejected": -162.1802520751953, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": 0.6464004516601562, "rewards/margins": 4.910803318023682, "rewards/rejected": -4.264402866363525, "step": 5249 }, { "epoch": 1.16, "learning_rate": 8.794920197930735e-06, "logits/chosen": -1.1211647987365723, "logits/rejected": -1.062348484992981, "logps/chosen": -161.83724975585938, "logps/rejected": -150.13429260253906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.9278382062911987, "rewards/margins": 9.022883415222168, "rewards/rejected": -7.095045566558838, "step": 5250 }, { "epoch": 1.16, "learning_rate": 8.79375295278485e-06, "logits/chosen": -1.318260669708252, "logits/rejected": -1.3343600034713745, "logps/chosen": -164.12850952148438, "logps/rejected": -121.03364562988281, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 1.6384308338165283, "rewards/margins": 5.067852020263672, "rewards/rejected": -3.4294211864471436, "step": 5251 }, { "epoch": 1.16, "learning_rate": 8.792585220150834e-06, "logits/chosen": -1.3813875913619995, "logits/rejected": -1.4647239446640015, "logps/chosen": -166.54470825195312, "logps/rejected": -166.44058227539062, "loss": 0.0606, "rewards/accuracies": 1.0, "rewards/chosen": -0.04737396165728569, "rewards/margins": 6.600546360015869, "rewards/rejected": -6.64792013168335, "step": 5252 }, { "epoch": 1.16, "learning_rate": 8.791417000178732e-06, "logits/chosen": -1.2622841596603394, "logits/rejected": -0.7997722625732422, "logps/chosen": -172.83363342285156, "logps/rejected": -893.5740966796875, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": 2.9877655506134033, "rewards/margins": 64.05133056640625, "rewards/rejected": -61.06356430053711, "step": 5253 }, { "epoch": 1.16, "learning_rate": 8.790248293018662e-06, "logits/chosen": -1.340171217918396, "logits/rejected": -1.2845699787139893, "logps/chosen": -144.64105224609375, "logps/rejected": -232.16885375976562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 8.13089656829834, "rewards/margins": 13.152502059936523, "rewards/rejected": -5.021605014801025, "step": 5254 }, { "epoch": 1.16, "learning_rate": 8.789079098820796e-06, "logits/chosen": -1.452830195426941, "logits/rejected": -1.4875813722610474, "logps/chosen": -134.93141174316406, "logps/rejected": -173.33917236328125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.671105980873108, "rewards/margins": 4.852931976318359, "rewards/rejected": -6.524037837982178, "step": 5255 }, { "epoch": 1.16, "learning_rate": 8.787909417735374e-06, "logits/chosen": -1.219399333000183, "logits/rejected": -1.1774543523788452, "logps/chosen": -196.62420654296875, "logps/rejected": -200.5248565673828, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.1191223859786987, "rewards/margins": 6.606919765472412, "rewards/rejected": -7.7260422706604, "step": 5256 }, { "epoch": 1.16, "learning_rate": 8.7867392499127e-06, "logits/chosen": -1.3282876014709473, "logits/rejected": -1.3282876014709473, "logps/chosen": -61.80500030517578, "logps/rejected": -61.80500030517578, "loss": 0.3472, "rewards/accuracies": 0.0, "rewards/chosen": -2.560532331466675, "rewards/margins": 0.0, "rewards/rejected": -2.560532331466675, "step": 5257 }, { "epoch": 1.16, "learning_rate": 8.785568595503134e-06, "logits/chosen": -1.4314087629318237, "logits/rejected": -1.4198781251907349, "logps/chosen": -198.13040161132812, "logps/rejected": -211.09263610839844, "loss": 1.5175, "rewards/accuracies": 0.0, "rewards/chosen": -10.211508750915527, "rewards/margins": -2.9838247299194336, "rewards/rejected": -7.227684020996094, "step": 5258 }, { "epoch": 1.16, "learning_rate": 8.784397454657103e-06, "logits/chosen": -1.2347701787948608, "logits/rejected": -1.1933335065841675, "logps/chosen": -96.98014831542969, "logps/rejected": -145.59927368164062, "loss": 0.228, "rewards/accuracies": 1.0, "rewards/chosen": -3.7457351684570312, "rewards/margins": 0.54888916015625, "rewards/rejected": -4.294624328613281, "step": 5259 }, { "epoch": 1.16, "learning_rate": 8.783225827525098e-06, "logits/chosen": -0.9943508505821228, "logits/rejected": -0.9173744916915894, "logps/chosen": -135.16842651367188, "logps/rejected": -310.848876953125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": 1.8737305402755737, "rewards/margins": 7.424997329711914, "rewards/rejected": -5.551266670227051, "step": 5260 }, { "epoch": 1.16, "learning_rate": 8.782053714257668e-06, "logits/chosen": -1.244260549545288, "logits/rejected": -1.2072561979293823, "logps/chosen": -100.48957824707031, "logps/rejected": -187.01611328125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -2.4495537281036377, "rewards/margins": 5.0353193283081055, "rewards/rejected": -7.484873294830322, "step": 5261 }, { "epoch": 1.16, "learning_rate": 8.780881115005428e-06, "logits/chosen": -1.30350923538208, "logits/rejected": -1.3073625564575195, "logps/chosen": -115.4576644897461, "logps/rejected": -108.03145599365234, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.9345932006835938, "rewards/margins": 5.463129043579102, "rewards/rejected": -8.397722244262695, "step": 5262 }, { "epoch": 1.16, "learning_rate": 8.779708029919054e-06, "logits/chosen": -1.1844017505645752, "logits/rejected": -1.224811315536499, "logps/chosen": -187.61427307128906, "logps/rejected": -168.3066864013672, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 1.02196204662323, "rewards/margins": 7.295061111450195, "rewards/rejected": -6.273098945617676, "step": 5263 }, { "epoch": 1.17, "learning_rate": 8.778534459149283e-06, "logits/chosen": -1.1873713731765747, "logits/rejected": -1.2005001306533813, "logps/chosen": -269.64935302734375, "logps/rejected": -208.7412872314453, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -2.0480117797851562, "rewards/margins": 3.747342109680176, "rewards/rejected": -5.795353889465332, "step": 5264 }, { "epoch": 1.17, "learning_rate": 8.777360402846919e-06, "logits/chosen": -1.2226883172988892, "logits/rejected": -1.1967014074325562, "logps/chosen": -90.33572387695312, "logps/rejected": -231.571533203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.08197326958179474, "rewards/margins": 15.899225234985352, "rewards/rejected": -15.98119831085205, "step": 5265 }, { "epoch": 1.17, "learning_rate": 8.776185861162822e-06, "logits/chosen": -1.1307331323623657, "logits/rejected": -1.0532864332199097, "logps/chosen": -99.58244323730469, "logps/rejected": -137.57997131347656, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -0.5540489554405212, "rewards/margins": 3.2125563621520996, "rewards/rejected": -3.7666053771972656, "step": 5266 }, { "epoch": 1.17, "learning_rate": 8.77501083424792e-06, "logits/chosen": -1.0151588916778564, "logits/rejected": -1.062249779701233, "logps/chosen": -93.34394836425781, "logps/rejected": -62.25946044921875, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.29681396484375, "rewards/margins": 3.912555694580078, "rewards/rejected": -4.209369659423828, "step": 5267 }, { "epoch": 1.17, "learning_rate": 8.773835322253202e-06, "logits/chosen": -1.4766528606414795, "logits/rejected": -1.4002686738967896, "logps/chosen": -138.3695068359375, "logps/rejected": -157.686279296875, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": -0.49725037813186646, "rewards/margins": 2.5833840370178223, "rewards/rejected": -3.080634355545044, "step": 5268 }, { "epoch": 1.17, "learning_rate": 8.772659325329717e-06, "logits/chosen": -1.091567039489746, "logits/rejected": -1.0503277778625488, "logps/chosen": -102.21604919433594, "logps/rejected": -187.55001831054688, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.5769752264022827, "rewards/margins": 7.120041370391846, "rewards/rejected": -8.697016716003418, "step": 5269 }, { "epoch": 1.17, "learning_rate": 8.771482843628576e-06, "logits/chosen": -1.485168218612671, "logits/rejected": -1.4609265327453613, "logps/chosen": -138.8724365234375, "logps/rejected": -119.62931060791016, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 1.0607513189315796, "rewards/margins": 3.9638633728027344, "rewards/rejected": -2.9031121730804443, "step": 5270 }, { "epoch": 1.17, "learning_rate": 8.770305877300958e-06, "logits/chosen": -1.3910398483276367, "logits/rejected": -1.3656545877456665, "logps/chosen": -111.6019515991211, "logps/rejected": -155.720947265625, "loss": 0.0804, "rewards/accuracies": 1.0, "rewards/chosen": -1.4098106622695923, "rewards/margins": 1.7469223737716675, "rewards/rejected": -3.1567330360412598, "step": 5271 }, { "epoch": 1.17, "learning_rate": 8.769128426498098e-06, "logits/chosen": -1.1938670873641968, "logits/rejected": -1.1451616287231445, "logps/chosen": -118.904296875, "logps/rejected": -263.32647705078125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 3.61541748046875, "rewards/margins": 6.5816650390625, "rewards/rejected": -2.96624755859375, "step": 5272 }, { "epoch": 1.17, "learning_rate": 8.767950491371295e-06, "logits/chosen": -1.0380584001541138, "logits/rejected": -0.9217150807380676, "logps/chosen": -84.74795532226562, "logps/rejected": -239.1783447265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9985107779502869, "rewards/margins": 8.60776424407959, "rewards/rejected": -9.606274604797363, "step": 5273 }, { "epoch": 1.17, "learning_rate": 8.766772072071911e-06, "logits/chosen": -0.9758637547492981, "logits/rejected": -0.4640255868434906, "logps/chosen": -207.78936767578125, "logps/rejected": -775.4322509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.32876893877983093, "rewards/margins": 61.77269744873047, "rewards/rejected": -61.44392776489258, "step": 5274 }, { "epoch": 1.17, "learning_rate": 8.765593168751373e-06, "logits/chosen": -1.3664418458938599, "logits/rejected": -1.288025140762329, "logps/chosen": -83.16053009033203, "logps/rejected": -161.84146118164062, "loss": 0.1436, "rewards/accuracies": 1.0, "rewards/chosen": -1.3624359369277954, "rewards/margins": 3.016368865966797, "rewards/rejected": -4.378804683685303, "step": 5275 }, { "epoch": 1.17, "learning_rate": 8.764413781561164e-06, "logits/chosen": -1.4848487377166748, "logits/rejected": -1.421108365058899, "logps/chosen": -101.80763244628906, "logps/rejected": -201.79611206054688, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.9208313226699829, "rewards/margins": 6.762799263000488, "rewards/rejected": -7.683630466461182, "step": 5276 }, { "epoch": 1.17, "learning_rate": 8.763233910652833e-06, "logits/chosen": -1.27497136592865, "logits/rejected": -1.3031584024429321, "logps/chosen": -68.75956726074219, "logps/rejected": -95.85899353027344, "loss": 0.2779, "rewards/accuracies": 1.0, "rewards/chosen": 0.8425689935684204, "rewards/margins": 3.3715882301330566, "rewards/rejected": -2.5290191173553467, "step": 5277 }, { "epoch": 1.17, "learning_rate": 8.762053556177991e-06, "logits/chosen": -0.8993366956710815, "logits/rejected": -0.9774253368377686, "logps/chosen": -171.4468994140625, "logps/rejected": -131.0797576904297, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.5137939453125, "rewards/margins": 7.595696449279785, "rewards/rejected": -9.109490394592285, "step": 5278 }, { "epoch": 1.17, "learning_rate": 8.760872718288311e-06, "logits/chosen": -1.0663763284683228, "logits/rejected": -1.0663763284683228, "logps/chosen": -187.40286254882812, "logps/rejected": -187.40286254882812, "loss": 0.3695, "rewards/accuracies": 0.0, "rewards/chosen": -5.368527412414551, "rewards/margins": 0.0, "rewards/rejected": -5.368527412414551, "step": 5279 }, { "epoch": 1.17, "learning_rate": 8.759691397135528e-06, "logits/chosen": -1.440451741218567, "logits/rejected": -1.3858445882797241, "logps/chosen": -86.38713836669922, "logps/rejected": -163.16387939453125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.1327530145645142, "rewards/margins": 5.793889045715332, "rewards/rejected": -6.926641941070557, "step": 5280 }, { "epoch": 1.17, "learning_rate": 8.758509592871439e-06, "logits/chosen": -1.2425727844238281, "logits/rejected": -1.190619945526123, "logps/chosen": -90.40306091308594, "logps/rejected": -144.05763244628906, "loss": 0.2329, "rewards/accuracies": 1.0, "rewards/chosen": -0.7841240167617798, "rewards/margins": 2.3188209533691406, "rewards/rejected": -3.10294508934021, "step": 5281 }, { "epoch": 1.17, "learning_rate": 8.7573273056479e-06, "logits/chosen": -1.183897852897644, "logits/rejected": -1.303166151046753, "logps/chosen": -146.8242950439453, "logps/rejected": -98.60548400878906, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.158666968345642, "rewards/margins": 4.853368282318115, "rewards/rejected": -6.012035369873047, "step": 5282 }, { "epoch": 1.17, "learning_rate": 8.756144535616838e-06, "logits/chosen": -1.2102144956588745, "logits/rejected": -1.2102144956588745, "logps/chosen": -130.65585327148438, "logps/rejected": -130.65585327148438, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.329283237457275, "rewards/margins": 0.0, "rewards/rejected": -4.329283237457275, "step": 5283 }, { "epoch": 1.17, "learning_rate": 8.754961282930231e-06, "logits/chosen": -1.5146607160568237, "logits/rejected": -1.39170503616333, "logps/chosen": -102.56016540527344, "logps/rejected": -266.4577941894531, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6114829778671265, "rewards/margins": 7.814877986907959, "rewards/rejected": -9.426361083984375, "step": 5284 }, { "epoch": 1.17, "learning_rate": 8.753777547740126e-06, "logits/chosen": -1.5143933296203613, "logits/rejected": -0.8623607754707336, "logps/chosen": -142.7818603515625, "logps/rejected": -959.4828491210938, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 1.1698760986328125, "rewards/margins": 72.00714874267578, "rewards/rejected": -70.83727264404297, "step": 5285 }, { "epoch": 1.17, "learning_rate": 8.752593330198631e-06, "logits/chosen": -1.4302035570144653, "logits/rejected": -1.4429714679718018, "logps/chosen": -131.09786987304688, "logps/rejected": -169.0367431640625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.49791643023490906, "rewards/margins": 5.231484413146973, "rewards/rejected": -5.729400634765625, "step": 5286 }, { "epoch": 1.17, "learning_rate": 8.751408630457911e-06, "logits/chosen": -1.2126656770706177, "logits/rejected": -1.246461033821106, "logps/chosen": -134.81922912597656, "logps/rejected": -139.61007690429688, "loss": 0.76, "rewards/accuracies": 1.0, "rewards/chosen": -3.397878408432007, "rewards/margins": 7.44215202331543, "rewards/rejected": -10.840030670166016, "step": 5287 }, { "epoch": 1.17, "learning_rate": 8.750223448670204e-06, "logits/chosen": -0.846931517124176, "logits/rejected": -0.784956157207489, "logps/chosen": -102.2412109375, "logps/rejected": -363.8747863769531, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 1.291693925857544, "rewards/margins": 10.007059097290039, "rewards/rejected": -8.715365409851074, "step": 5288 }, { "epoch": 1.17, "learning_rate": 8.749037784987797e-06, "logits/chosen": -1.1423580646514893, "logits/rejected": -1.1423580646514893, "logps/chosen": -291.6365051269531, "logps/rejected": -291.6365051269531, "loss": 0.3472, "rewards/accuracies": 0.0, "rewards/chosen": -9.490790367126465, "rewards/margins": 0.0, "rewards/rejected": -9.490790367126465, "step": 5289 }, { "epoch": 1.17, "learning_rate": 8.747851639563048e-06, "logits/chosen": -0.9640989899635315, "logits/rejected": -0.9015131592750549, "logps/chosen": -121.34939575195312, "logps/rejected": -232.474365234375, "loss": 0.202, "rewards/accuracies": 1.0, "rewards/chosen": -2.27531361579895, "rewards/margins": 0.7012505531311035, "rewards/rejected": -2.9765641689300537, "step": 5290 }, { "epoch": 1.17, "learning_rate": 8.746665012548373e-06, "logits/chosen": -1.144935131072998, "logits/rejected": -1.0907121896743774, "logps/chosen": -98.43634033203125, "logps/rejected": -209.19345092773438, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 0.6262375116348267, "rewards/margins": 4.39790678024292, "rewards/rejected": -3.7716691493988037, "step": 5291 }, { "epoch": 1.17, "learning_rate": 8.745477904096247e-06, "logits/chosen": -1.2810794115066528, "logits/rejected": -1.2936217784881592, "logps/chosen": -63.47533416748047, "logps/rejected": -104.62484741210938, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": 0.9801910519599915, "rewards/margins": 4.899592876434326, "rewards/rejected": -3.9194016456604004, "step": 5292 }, { "epoch": 1.17, "learning_rate": 8.744290314359219e-06, "logits/chosen": -1.4212313890457153, "logits/rejected": -1.4194234609603882, "logps/chosen": -218.95729064941406, "logps/rejected": -324.6824951171875, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -2.6409072875976562, "rewards/margins": 7.437138557434082, "rewards/rejected": -10.078045845031738, "step": 5293 }, { "epoch": 1.17, "learning_rate": 8.743102243489885e-06, "logits/chosen": -1.2936629056930542, "logits/rejected": -1.2862876653671265, "logps/chosen": -146.42926025390625, "logps/rejected": -139.43870544433594, "loss": 0.9505, "rewards/accuracies": 0.0, "rewards/chosen": -4.574395656585693, "rewards/margins": -1.7189643383026123, "rewards/rejected": -2.855431318283081, "step": 5294 }, { "epoch": 1.17, "learning_rate": 8.74191369164091e-06, "logits/chosen": -1.1022250652313232, "logits/rejected": -1.1813006401062012, "logps/chosen": -325.0321044921875, "logps/rejected": -226.5190887451172, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -5.158413887023926, "rewards/margins": 6.399614334106445, "rewards/rejected": -11.558028221130371, "step": 5295 }, { "epoch": 1.17, "learning_rate": 8.74072465896502e-06, "logits/chosen": -1.105600357055664, "logits/rejected": -1.1306664943695068, "logps/chosen": -104.9873046875, "logps/rejected": -112.09393310546875, "loss": 0.1999, "rewards/accuracies": 1.0, "rewards/chosen": -0.4397018551826477, "rewards/margins": 1.8425064086914062, "rewards/rejected": -2.282208204269409, "step": 5296 }, { "epoch": 1.17, "learning_rate": 8.739535145615005e-06, "logits/chosen": -1.067104458808899, "logits/rejected": -0.9546020030975342, "logps/chosen": -112.763671875, "logps/rejected": -167.28427124023438, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.2505401372909546, "rewards/margins": 5.738976955413818, "rewards/rejected": -6.9895172119140625, "step": 5297 }, { "epoch": 1.17, "learning_rate": 8.738345151743715e-06, "logits/chosen": -0.9881324768066406, "logits/rejected": -1.0981600284576416, "logps/chosen": -134.8470458984375, "logps/rejected": -125.530029296875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 3.7052857875823975, "rewards/margins": 11.545124053955078, "rewards/rejected": -7.83983850479126, "step": 5298 }, { "epoch": 1.17, "learning_rate": 8.737154677504059e-06, "logits/chosen": -1.145836353302002, "logits/rejected": -1.145836353302002, "logps/chosen": -212.31292724609375, "logps/rejected": -212.31292724609375, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -7.931649684906006, "rewards/margins": 0.0, "rewards/rejected": -7.931649684906006, "step": 5299 }, { "epoch": 1.17, "learning_rate": 8.73596372304901e-06, "logits/chosen": -0.8570212125778198, "logits/rejected": -0.9065960049629211, "logps/chosen": -275.5477294921875, "logps/rejected": -118.11229705810547, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.5430054664611816, "rewards/margins": 7.139654636383057, "rewards/rejected": -9.682660102844238, "step": 5300 }, { "epoch": 1.17, "learning_rate": 8.734772288531604e-06, "logits/chosen": -1.023929238319397, "logits/rejected": -0.9989750385284424, "logps/chosen": -212.6431884765625, "logps/rejected": -254.91116333007812, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -1.78948974609375, "rewards/margins": 3.9832215309143066, "rewards/rejected": -5.772711277008057, "step": 5301 }, { "epoch": 1.17, "learning_rate": 8.733580374104936e-06, "logits/chosen": -1.3182616233825684, "logits/rejected": -1.2776833772659302, "logps/chosen": -90.48822021484375, "logps/rejected": -151.5725860595703, "loss": 0.0734, "rewards/accuracies": 1.0, "rewards/chosen": 0.7874305844306946, "rewards/margins": 1.848299503326416, "rewards/rejected": -1.0608688592910767, "step": 5302 }, { "epoch": 1.17, "learning_rate": 8.732387979922167e-06, "logits/chosen": -1.3746097087860107, "logits/rejected": -1.3806077241897583, "logps/chosen": -94.76577758789062, "logps/rejected": -128.23458862304688, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/chosen": -0.8546226620674133, "rewards/margins": 2.4701576232910156, "rewards/rejected": -3.324780225753784, "step": 5303 }, { "epoch": 1.17, "learning_rate": 8.731195106136515e-06, "logits/chosen": -1.1343967914581299, "logits/rejected": -1.0719012022018433, "logps/chosen": -91.033935546875, "logps/rejected": -210.74761962890625, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -2.2169768810272217, "rewards/margins": 3.8439242839813232, "rewards/rejected": -6.060901165008545, "step": 5304 }, { "epoch": 1.17, "learning_rate": 8.730001752901258e-06, "logits/chosen": -1.1169610023498535, "logits/rejected": -1.1190440654754639, "logps/chosen": -104.83616638183594, "logps/rejected": -95.58228302001953, "loss": 1.1443, "rewards/accuracies": 0.0, "rewards/chosen": -3.6712234020233154, "rewards/margins": -2.178551435470581, "rewards/rejected": -1.4926719665527344, "step": 5305 }, { "epoch": 1.17, "learning_rate": 8.728807920369747e-06, "logits/chosen": -0.7452742457389832, "logits/rejected": -0.6918662786483765, "logps/chosen": -146.44171142578125, "logps/rejected": -228.38446044921875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.4838759899139404, "rewards/margins": 7.090761184692383, "rewards/rejected": -10.574637413024902, "step": 5306 }, { "epoch": 1.17, "learning_rate": 8.727613608695379e-06, "logits/chosen": -1.0585881471633911, "logits/rejected": -1.0359299182891846, "logps/chosen": -192.9991455078125, "logps/rejected": -209.16781616210938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.0419281721115112, "rewards/margins": 8.347125053405762, "rewards/rejected": -7.305197238922119, "step": 5307 }, { "epoch": 1.17, "learning_rate": 8.726418818031623e-06, "logits/chosen": -1.1055405139923096, "logits/rejected": -0.8879587650299072, "logps/chosen": -96.6657943725586, "logps/rejected": -313.8670349121094, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.5850074887275696, "rewards/margins": 5.30915641784668, "rewards/rejected": -4.724148750305176, "step": 5308 }, { "epoch": 1.18, "learning_rate": 8.72522354853201e-06, "logits/chosen": -1.0696667432785034, "logits/rejected": -1.0297861099243164, "logps/chosen": -124.32291412353516, "logps/rejected": -214.37847900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.13439178466796875, "rewards/margins": 11.480361938476562, "rewards/rejected": -11.345970153808594, "step": 5309 }, { "epoch": 1.18, "learning_rate": 8.724027800350123e-06, "logits/chosen": -1.0618950128555298, "logits/rejected": -1.0872045755386353, "logps/chosen": -205.09767150878906, "logps/rejected": -133.53140258789062, "loss": 0.0379, "rewards/accuracies": 1.0, "rewards/chosen": -0.5906982421875, "rewards/margins": 2.5413711071014404, "rewards/rejected": -3.1320693492889404, "step": 5310 }, { "epoch": 1.18, "learning_rate": 8.722831573639618e-06, "logits/chosen": -1.3385096788406372, "logits/rejected": -1.295048713684082, "logps/chosen": -165.4732666015625, "logps/rejected": -181.49755859375, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": 3.337634325027466, "rewards/margins": 2.9889771938323975, "rewards/rejected": 0.3486572206020355, "step": 5311 }, { "epoch": 1.18, "learning_rate": 8.721634868554204e-06, "logits/chosen": -0.9665867686271667, "logits/rejected": -0.9345433115959167, "logps/chosen": -213.67672729492188, "logps/rejected": -528.6005249023438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 5.097830295562744, "rewards/margins": 28.163326263427734, "rewards/rejected": -23.06549644470215, "step": 5312 }, { "epoch": 1.18, "learning_rate": 8.720437685247657e-06, "logits/chosen": -1.3286268711090088, "logits/rejected": -1.2850815057754517, "logps/chosen": -153.64996337890625, "logps/rejected": -244.14520263671875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.2245911359786987, "rewards/margins": 9.169217109680176, "rewards/rejected": -10.393808364868164, "step": 5313 }, { "epoch": 1.18, "learning_rate": 8.719240023873809e-06, "logits/chosen": -0.8340020179748535, "logits/rejected": -0.7455450892448425, "logps/chosen": -125.21495056152344, "logps/rejected": -295.7393798828125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 0.3085586726665497, "rewards/margins": 6.879826068878174, "rewards/rejected": -6.571267604827881, "step": 5314 }, { "epoch": 1.18, "learning_rate": 8.71804188458656e-06, "logits/chosen": -1.3087739944458008, "logits/rejected": -1.26089346408844, "logps/chosen": -161.1913299560547, "logps/rejected": -219.50088500976562, "loss": 0.043, "rewards/accuracies": 1.0, "rewards/chosen": 0.7482330203056335, "rewards/margins": 2.4948272705078125, "rewards/rejected": -1.7465943098068237, "step": 5315 }, { "epoch": 1.18, "learning_rate": 8.716843267539868e-06, "logits/chosen": -1.5404421091079712, "logits/rejected": -1.3305960893630981, "logps/chosen": -164.91534423828125, "logps/rejected": -260.2286376953125, "loss": 0.677, "rewards/accuracies": 0.0, "rewards/chosen": -4.409101963043213, "rewards/margins": -1.0554275512695312, "rewards/rejected": -3.3536744117736816, "step": 5316 }, { "epoch": 1.18, "learning_rate": 8.715644172887751e-06, "logits/chosen": -1.0226469039916992, "logits/rejected": -0.7445454597473145, "logps/chosen": -160.564208984375, "logps/rejected": -750.83056640625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.7270278930664062, "rewards/margins": 64.47957611083984, "rewards/rejected": -65.20660400390625, "step": 5317 }, { "epoch": 1.18, "learning_rate": 8.714444600784289e-06, "logits/chosen": -1.1037886142730713, "logits/rejected": -1.1369363069534302, "logps/chosen": -196.15956115722656, "logps/rejected": -216.39434814453125, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": 0.5893234610557556, "rewards/margins": 3.0018510818481445, "rewards/rejected": -2.412527561187744, "step": 5318 }, { "epoch": 1.18, "learning_rate": 8.713244551383626e-06, "logits/chosen": -0.742266058921814, "logits/rejected": -0.6653878092765808, "logps/chosen": -186.74337768554688, "logps/rejected": -344.0679931640625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.646902561187744, "rewards/margins": 6.532431125640869, "rewards/rejected": -9.179333686828613, "step": 5319 }, { "epoch": 1.18, "learning_rate": 8.712044024839962e-06, "logits/chosen": -1.2405685186386108, "logits/rejected": -1.2493454217910767, "logps/chosen": -193.63092041015625, "logps/rejected": -217.6074981689453, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 0.3530624508857727, "rewards/margins": 9.758833885192871, "rewards/rejected": -9.405771255493164, "step": 5320 }, { "epoch": 1.18, "learning_rate": 8.710843021307567e-06, "logits/chosen": -1.3006312847137451, "logits/rejected": -1.2727172374725342, "logps/chosen": -168.5755615234375, "logps/rejected": -232.98764038085938, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 2.867687940597534, "rewards/margins": 4.308148384094238, "rewards/rejected": -1.440460205078125, "step": 5321 }, { "epoch": 1.18, "learning_rate": 8.709641540940764e-06, "logits/chosen": -1.3429889678955078, "logits/rejected": -1.3927778005599976, "logps/chosen": -151.25462341308594, "logps/rejected": -152.4258575439453, "loss": 0.1005, "rewards/accuracies": 1.0, "rewards/chosen": -1.2602920532226562, "rewards/margins": 1.5353057384490967, "rewards/rejected": -2.795597791671753, "step": 5322 }, { "epoch": 1.18, "learning_rate": 8.70843958389394e-06, "logits/chosen": -1.1734905242919922, "logits/rejected": -1.1538660526275635, "logps/chosen": -83.05608367919922, "logps/rejected": -105.84814453125, "loss": 0.0593, "rewards/accuracies": 1.0, "rewards/chosen": -1.0381355285644531, "rewards/margins": 2.087362051010132, "rewards/rejected": -3.125497579574585, "step": 5323 }, { "epoch": 1.18, "learning_rate": 8.707237150321544e-06, "logits/chosen": -1.0428045988082886, "logits/rejected": -0.8714808821678162, "logps/chosen": -145.79580688476562, "logps/rejected": -300.49395751953125, "loss": 0.2379, "rewards/accuracies": 1.0, "rewards/chosen": -1.1125519275665283, "rewards/margins": 0.49672234058380127, "rewards/rejected": -1.6092742681503296, "step": 5324 }, { "epoch": 1.18, "learning_rate": 8.706034240378087e-06, "logits/chosen": -1.216888189315796, "logits/rejected": -1.2376552820205688, "logps/chosen": -50.45591735839844, "logps/rejected": -63.20985794067383, "loss": 0.2878, "rewards/accuracies": 1.0, "rewards/chosen": -3.402606248855591, "rewards/margins": 0.25156331062316895, "rewards/rejected": -3.6541695594787598, "step": 5325 }, { "epoch": 1.18, "learning_rate": 8.704830854218138e-06, "logits/chosen": -1.2649132013320923, "logits/rejected": -1.1553696393966675, "logps/chosen": -153.34332275390625, "logps/rejected": -246.6204833984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.9801712036132812, "rewards/margins": 10.814608573913574, "rewards/rejected": -9.834437370300293, "step": 5326 }, { "epoch": 1.18, "learning_rate": 8.703626991996333e-06, "logits/chosen": -0.9646544456481934, "logits/rejected": -1.0038032531738281, "logps/chosen": -169.24539184570312, "logps/rejected": -196.66912841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.03601837158203125, "rewards/margins": 15.520654678344727, "rewards/rejected": -15.484636306762695, "step": 5327 }, { "epoch": 1.18, "learning_rate": 8.70242265386736e-06, "logits/chosen": -1.0520317554473877, "logits/rejected": -1.0310142040252686, "logps/chosen": -79.5052719116211, "logps/rejected": -82.44798278808594, "loss": 0.1265, "rewards/accuracies": 1.0, "rewards/chosen": -0.8200249075889587, "rewards/margins": 1.2455840110778809, "rewards/rejected": -2.0656089782714844, "step": 5328 }, { "epoch": 1.18, "learning_rate": 8.701217839985978e-06, "logits/chosen": -1.2599678039550781, "logits/rejected": -1.2879176139831543, "logps/chosen": -191.00164794921875, "logps/rejected": -280.36419677734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.817474365234375, "rewards/margins": 13.226091384887695, "rewards/rejected": -18.04356575012207, "step": 5329 }, { "epoch": 1.18, "learning_rate": 8.700012550507e-06, "logits/chosen": -1.2243016958236694, "logits/rejected": -1.1205127239227295, "logps/chosen": -93.66372680664062, "logps/rejected": -211.90283203125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.20193786919116974, "rewards/margins": 6.815430164337158, "rewards/rejected": -7.017367839813232, "step": 5330 }, { "epoch": 1.18, "learning_rate": 8.698806785585305e-06, "logits/chosen": -0.5708995461463928, "logits/rejected": -0.5663204789161682, "logps/chosen": -90.64076232910156, "logps/rejected": -116.26117706298828, "loss": 0.2711, "rewards/accuracies": 1.0, "rewards/chosen": -0.4273696839809418, "rewards/margins": 0.3552452027797699, "rewards/rejected": -0.7826148867607117, "step": 5331 }, { "epoch": 1.18, "learning_rate": 8.697600545375829e-06, "logits/chosen": -1.2459522485733032, "logits/rejected": -1.185852289199829, "logps/chosen": -57.461952209472656, "logps/rejected": -147.85218811035156, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.33822059631347656, "rewards/margins": 5.0708417892456055, "rewards/rejected": -5.409062385559082, "step": 5332 }, { "epoch": 1.18, "learning_rate": 8.696393830033571e-06, "logits/chosen": -0.84437495470047, "logits/rejected": -0.7646660208702087, "logps/chosen": -228.0805206298828, "logps/rejected": -242.99798583984375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 2.194850206375122, "rewards/margins": 6.27647590637207, "rewards/rejected": -4.081625461578369, "step": 5333 }, { "epoch": 1.18, "learning_rate": 8.695186639713593e-06, "logits/chosen": -1.0024889707565308, "logits/rejected": -0.9303871393203735, "logps/chosen": -209.0515899658203, "logps/rejected": -288.85662841796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.132533311843872, "rewards/margins": 8.498947143554688, "rewards/rejected": -7.3664140701293945, "step": 5334 }, { "epoch": 1.18, "learning_rate": 8.693978974571013e-06, "logits/chosen": -1.0100411176681519, "logits/rejected": -0.8228251934051514, "logps/chosen": -174.6285858154297, "logps/rejected": -408.30926513671875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 0.8666183352470398, "rewards/margins": 7.192368984222412, "rewards/rejected": -6.325750827789307, "step": 5335 }, { "epoch": 1.18, "learning_rate": 8.692770834761017e-06, "logits/chosen": -1.0237451791763306, "logits/rejected": -1.0237451791763306, "logps/chosen": -113.25422668457031, "logps/rejected": -113.25422668457031, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -3.165306806564331, "rewards/margins": 0.0, "rewards/rejected": -3.165306806564331, "step": 5336 }, { "epoch": 1.18, "learning_rate": 8.691562220438845e-06, "logits/chosen": -0.9394657611846924, "logits/rejected": -0.9743058085441589, "logps/chosen": -161.47467041015625, "logps/rejected": -196.38597106933594, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 2.8292877674102783, "rewards/margins": 12.1184720993042, "rewards/rejected": -9.2891845703125, "step": 5337 }, { "epoch": 1.18, "learning_rate": 8.690353131759802e-06, "logits/chosen": -1.3591512441635132, "logits/rejected": -1.3300851583480835, "logps/chosen": -106.46966552734375, "logps/rejected": -137.24314880371094, "loss": 0.5974, "rewards/accuracies": 0.0, "rewards/chosen": -1.4554795026779175, "rewards/margins": -0.7331268787384033, "rewards/rejected": -0.7223526239395142, "step": 5338 }, { "epoch": 1.18, "learning_rate": 8.689143568879252e-06, "logits/chosen": -1.0543452501296997, "logits/rejected": -0.9974775910377502, "logps/chosen": -126.69888305664062, "logps/rejected": -178.1112060546875, "loss": 0.0634, "rewards/accuracies": 1.0, "rewards/chosen": 0.6619888544082642, "rewards/margins": 6.306473731994629, "rewards/rejected": -5.644484996795654, "step": 5339 }, { "epoch": 1.18, "learning_rate": 8.687933531952624e-06, "logits/chosen": -1.0134152173995972, "logits/rejected": -1.0250753164291382, "logps/chosen": -196.67294311523438, "logps/rejected": -166.7406463623047, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.687544345855713, "rewards/margins": 5.604496479034424, "rewards/rejected": -8.292040824890137, "step": 5340 }, { "epoch": 1.18, "learning_rate": 8.686723021135402e-06, "logits/chosen": -1.3325684070587158, "logits/rejected": -1.310880422592163, "logps/chosen": -103.78885650634766, "logps/rejected": -175.66851806640625, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -2.71378493309021, "rewards/margins": 3.2263290882110596, "rewards/rejected": -5.9401140213012695, "step": 5341 }, { "epoch": 1.18, "learning_rate": 8.685512036583132e-06, "logits/chosen": -1.0090140104293823, "logits/rejected": -1.0388015508651733, "logps/chosen": -190.02528381347656, "logps/rejected": -264.2752685546875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.8530166745185852, "rewards/margins": 6.346733570098877, "rewards/rejected": -5.493716716766357, "step": 5342 }, { "epoch": 1.18, "learning_rate": 8.684300578451428e-06, "logits/chosen": -1.2247188091278076, "logits/rejected": -1.1852647066116333, "logps/chosen": -208.669921875, "logps/rejected": -244.69297790527344, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": 2.898625135421753, "rewards/margins": 3.0073821544647217, "rewards/rejected": -0.10875701904296875, "step": 5343 }, { "epoch": 1.18, "learning_rate": 8.683088646895955e-06, "logits/chosen": -0.8504855632781982, "logits/rejected": -0.8653485774993896, "logps/chosen": -100.84098052978516, "logps/rejected": -146.9302978515625, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": -1.3661949634552002, "rewards/margins": 2.5775482654571533, "rewards/rejected": -3.9437432289123535, "step": 5344 }, { "epoch": 1.18, "learning_rate": 8.681876242072445e-06, "logits/chosen": -1.0577527284622192, "logits/rejected": -1.0577527284622192, "logps/chosen": -178.3407745361328, "logps/rejected": -178.3407745361328, "loss": 0.3471, "rewards/accuracies": 0.0, "rewards/chosen": -6.873574256896973, "rewards/margins": 0.0, "rewards/rejected": -6.873574256896973, "step": 5345 }, { "epoch": 1.18, "learning_rate": 8.68066336413669e-06, "logits/chosen": -0.8380107283592224, "logits/rejected": -0.8122652769088745, "logps/chosen": -121.94239044189453, "logps/rejected": -195.74801635742188, "loss": 0.5071, "rewards/accuracies": 1.0, "rewards/chosen": -1.956739068031311, "rewards/margins": 7.13347864151001, "rewards/rejected": -9.090217590332031, "step": 5346 }, { "epoch": 1.18, "learning_rate": 8.67945001324454e-06, "logits/chosen": -0.7482699155807495, "logits/rejected": -0.7482699155807495, "logps/chosen": -106.6446762084961, "logps/rejected": -106.6446762084961, "loss": 0.3552, "rewards/accuracies": 0.0, "rewards/chosen": -0.740918755531311, "rewards/margins": 0.0, "rewards/rejected": -0.740918755531311, "step": 5347 }, { "epoch": 1.18, "learning_rate": 8.678236189551907e-06, "logits/chosen": -1.012902021408081, "logits/rejected": -1.0420509576797485, "logps/chosen": -178.84861755371094, "logps/rejected": -93.12169647216797, "loss": 2.2709, "rewards/accuracies": 0.0, "rewards/chosen": -6.976251125335693, "rewards/margins": -4.530951499938965, "rewards/rejected": -2.4452996253967285, "step": 5348 }, { "epoch": 1.18, "learning_rate": 8.677021893214768e-06, "logits/chosen": -1.4711720943450928, "logits/rejected": -1.5808013677597046, "logps/chosen": -122.11231231689453, "logps/rejected": -87.3084716796875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.6187157034873962, "rewards/margins": 6.17573356628418, "rewards/rejected": -5.557017803192139, "step": 5349 }, { "epoch": 1.18, "learning_rate": 8.675807124389153e-06, "logits/chosen": -1.0813368558883667, "logits/rejected": -1.1283190250396729, "logps/chosen": -85.29833984375, "logps/rejected": -128.30557250976562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.24132239818573, "rewards/margins": 8.357346534729004, "rewards/rejected": -9.598669052124023, "step": 5350 }, { "epoch": 1.18, "learning_rate": 8.67459188323116e-06, "logits/chosen": -1.1330498456954956, "logits/rejected": -1.135130524635315, "logps/chosen": -132.87789916992188, "logps/rejected": -250.44589233398438, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.788203477859497, "rewards/margins": 7.7482500076293945, "rewards/rejected": -9.536453247070312, "step": 5351 }, { "epoch": 1.18, "learning_rate": 8.673376169896944e-06, "logits/chosen": -1.1076669692993164, "logits/rejected": -1.1472407579421997, "logps/chosen": -109.08568572998047, "logps/rejected": -141.25753784179688, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.8365448117256165, "rewards/margins": 3.545253038406372, "rewards/rejected": -4.381797790527344, "step": 5352 }, { "epoch": 1.18, "learning_rate": 8.672159984542721e-06, "logits/chosen": -1.1435102224349976, "logits/rejected": -0.9760494232177734, "logps/chosen": -311.7473449707031, "logps/rejected": -495.614990234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.514086902141571, "rewards/margins": 43.339210510253906, "rewards/rejected": -42.82512283325195, "step": 5353 }, { "epoch": 1.19, "learning_rate": 8.670943327324767e-06, "logits/chosen": -1.2430695295333862, "logits/rejected": -1.0937798023223877, "logps/chosen": -191.61688232421875, "logps/rejected": -236.07144165039062, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 2.048248291015625, "rewards/margins": 8.887170791625977, "rewards/rejected": -6.838922023773193, "step": 5354 }, { "epoch": 1.19, "learning_rate": 8.66972619839942e-06, "logits/chosen": -1.1710790395736694, "logits/rejected": -1.1103471517562866, "logps/chosen": -84.63146209716797, "logps/rejected": -238.3096923828125, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.4778244197368622, "rewards/margins": 7.302363872528076, "rewards/rejected": -7.780188083648682, "step": 5355 }, { "epoch": 1.19, "learning_rate": 8.668508597923077e-06, "logits/chosen": -1.1262297630310059, "logits/rejected": -1.120008111000061, "logps/chosen": -61.25100326538086, "logps/rejected": -126.89573669433594, "loss": 0.0712, "rewards/accuracies": 1.0, "rewards/chosen": -1.190386176109314, "rewards/margins": 2.105682373046875, "rewards/rejected": -3.2960686683654785, "step": 5356 }, { "epoch": 1.19, "learning_rate": 8.6672905260522e-06, "logits/chosen": -1.2452141046524048, "logits/rejected": -1.4171415567398071, "logps/chosen": -197.12155151367188, "logps/rejected": -129.52536010742188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2192353010177612, "rewards/margins": 8.700822830200195, "rewards/rejected": -9.920058250427246, "step": 5357 }, { "epoch": 1.19, "learning_rate": 8.666071982943306e-06, "logits/chosen": -1.382106065750122, "logits/rejected": -1.3616125583648682, "logps/chosen": -135.7578887939453, "logps/rejected": -185.1247100830078, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 2.855006456375122, "rewards/margins": 7.286572456359863, "rewards/rejected": -4.431565761566162, "step": 5358 }, { "epoch": 1.19, "learning_rate": 8.664852968752975e-06, "logits/chosen": -1.2117061614990234, "logits/rejected": -1.1262022256851196, "logps/chosen": -210.49566650390625, "logps/rejected": -318.59771728515625, "loss": 0.0544, "rewards/accuracies": 1.0, "rewards/chosen": -2.241943359375, "rewards/margins": 2.1651430130004883, "rewards/rejected": -4.407086372375488, "step": 5359 }, { "epoch": 1.19, "learning_rate": 8.663633483637847e-06, "logits/chosen": -1.3866523504257202, "logits/rejected": -1.2602663040161133, "logps/chosen": -168.814208984375, "logps/rejected": -430.7601623535156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.7758729457855225, "rewards/margins": 18.963157653808594, "rewards/rejected": -16.187284469604492, "step": 5360 }, { "epoch": 1.19, "learning_rate": 8.662413527754624e-06, "logits/chosen": -1.2494862079620361, "logits/rejected": -1.1303855180740356, "logps/chosen": -147.9833221435547, "logps/rejected": -304.248046875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.3984726071357727, "rewards/margins": 4.580557346343994, "rewards/rejected": -4.979030132293701, "step": 5361 }, { "epoch": 1.19, "learning_rate": 8.661193101260067e-06, "logits/chosen": -0.9522342681884766, "logits/rejected": -0.9630123972892761, "logps/chosen": -107.85910034179688, "logps/rejected": -126.37340545654297, "loss": 0.0784, "rewards/accuracies": 1.0, "rewards/chosen": -1.977728247642517, "rewards/margins": 1.9386237859725952, "rewards/rejected": -3.9163520336151123, "step": 5362 }, { "epoch": 1.19, "learning_rate": 8.659972204310998e-06, "logits/chosen": -0.9093284010887146, "logits/rejected": -0.7314779162406921, "logps/chosen": -232.5210723876953, "logps/rejected": -413.79107666015625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.5323730707168579, "rewards/margins": 7.554937839508057, "rewards/rejected": -7.022564888000488, "step": 5363 }, { "epoch": 1.19, "learning_rate": 8.658750837064299e-06, "logits/chosen": -1.5774644613265991, "logits/rejected": -1.6414594650268555, "logps/chosen": -88.40255737304688, "logps/rejected": -46.068302154541016, "loss": 0.0787, "rewards/accuracies": 1.0, "rewards/chosen": -1.3739670515060425, "rewards/margins": 1.7698372602462769, "rewards/rejected": -3.1438043117523193, "step": 5364 }, { "epoch": 1.19, "learning_rate": 8.657528999676912e-06, "logits/chosen": -1.3036506175994873, "logits/rejected": -1.3036506175994873, "logps/chosen": -217.34881591796875, "logps/rejected": -217.34881591796875, "loss": 0.4699, "rewards/accuracies": 0.0, "rewards/chosen": -7.674935817718506, "rewards/margins": 0.0, "rewards/rejected": -7.674935817718506, "step": 5365 }, { "epoch": 1.19, "learning_rate": 8.65630669230584e-06, "logits/chosen": -1.1831324100494385, "logits/rejected": -1.1360384225845337, "logps/chosen": -73.72792053222656, "logps/rejected": -122.7303466796875, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -0.9542221426963806, "rewards/margins": 2.960806131362915, "rewards/rejected": -3.9150283336639404, "step": 5366 }, { "epoch": 1.19, "learning_rate": 8.65508391510815e-06, "logits/chosen": -1.2940773963928223, "logits/rejected": -1.4568939208984375, "logps/chosen": -340.0126953125, "logps/rejected": -206.9984130859375, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/chosen": -5.345056056976318, "rewards/margins": 3.160269260406494, "rewards/rejected": -8.505325317382812, "step": 5367 }, { "epoch": 1.19, "learning_rate": 8.653860668240963e-06, "logits/chosen": -1.334399938583374, "logits/rejected": -1.356475830078125, "logps/chosen": -146.4650421142578, "logps/rejected": -75.06354522705078, "loss": 1.874, "rewards/accuracies": 0.0, "rewards/chosen": -5.95114278793335, "rewards/margins": -3.723118543624878, "rewards/rejected": -2.2280242443084717, "step": 5368 }, { "epoch": 1.19, "learning_rate": 8.652636951861463e-06, "logits/chosen": -1.169984221458435, "logits/rejected": -1.1721158027648926, "logps/chosen": -181.49606323242188, "logps/rejected": -170.6942138671875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.6509034633636475, "rewards/margins": 6.192503929138184, "rewards/rejected": -8.84340763092041, "step": 5369 }, { "epoch": 1.19, "learning_rate": 8.651412766126896e-06, "logits/chosen": -1.1097537279129028, "logits/rejected": -1.2052154541015625, "logps/chosen": -180.74464416503906, "logps/rejected": -138.544921875, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.27325439453125, "rewards/margins": 3.8524131774902344, "rewards/rejected": -5.125667572021484, "step": 5370 }, { "epoch": 1.19, "learning_rate": 8.650188111194565e-06, "logits/chosen": -1.114313006401062, "logits/rejected": -1.194138765335083, "logps/chosen": -118.88739013671875, "logps/rejected": -143.16111755371094, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 1.9384857416152954, "rewards/margins": 7.195197105407715, "rewards/rejected": -5.256711483001709, "step": 5371 }, { "epoch": 1.19, "learning_rate": 8.648962987221837e-06, "logits/chosen": -1.20913565158844, "logits/rejected": -1.2127554416656494, "logps/chosen": -119.45252990722656, "logps/rejected": -173.9686279296875, "loss": 0.5892, "rewards/accuracies": 1.0, "rewards/chosen": -2.5724685192108154, "rewards/margins": 0.4706864356994629, "rewards/rejected": -3.0431549549102783, "step": 5372 }, { "epoch": 1.19, "learning_rate": 8.647737394366138e-06, "logits/chosen": -1.5941401720046997, "logits/rejected": -1.6257469654083252, "logps/chosen": -114.35212707519531, "logps/rejected": -87.41943359375, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -2.390698194503784, "rewards/margins": 3.2097628116607666, "rewards/rejected": -5.600461006164551, "step": 5373 }, { "epoch": 1.19, "learning_rate": 8.646511332784953e-06, "logits/chosen": -0.9796573519706726, "logits/rejected": -0.9683521389961243, "logps/chosen": -149.47447204589844, "logps/rejected": -129.24542236328125, "loss": 0.0332, "rewards/accuracies": 1.0, "rewards/chosen": -2.3570594787597656, "rewards/margins": 2.8358535766601562, "rewards/rejected": -5.192913055419922, "step": 5374 }, { "epoch": 1.19, "learning_rate": 8.645284802635827e-06, "logits/chosen": -1.0121833086013794, "logits/rejected": -0.9016677141189575, "logps/chosen": -74.67950439453125, "logps/rejected": -259.2926025390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5180625915527344, "rewards/margins": 9.446703910827637, "rewards/rejected": -9.964766502380371, "step": 5375 }, { "epoch": 1.19, "learning_rate": 8.644057804076367e-06, "logits/chosen": -1.1780041456222534, "logits/rejected": -1.1780041456222534, "logps/chosen": -73.90176391601562, "logps/rejected": -73.90176391601562, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.1902852058410645, "rewards/margins": 0.0, "rewards/rejected": -5.1902852058410645, "step": 5376 }, { "epoch": 1.19, "learning_rate": 8.642830337264239e-06, "logits/chosen": -1.2097634077072144, "logits/rejected": -1.2097634077072144, "logps/chosen": -117.46775817871094, "logps/rejected": -117.46775817871094, "loss": 0.8852, "rewards/accuracies": 0.0, "rewards/chosen": -3.350132703781128, "rewards/margins": 0.0, "rewards/rejected": -3.350132703781128, "step": 5377 }, { "epoch": 1.19, "learning_rate": 8.641602402357168e-06, "logits/chosen": -1.1960417032241821, "logits/rejected": -1.127770185470581, "logps/chosen": -188.77175903320312, "logps/rejected": -282.1951904296875, "loss": 0.3469, "rewards/accuracies": 1.0, "rewards/chosen": -0.03616638109087944, "rewards/margins": 7.335571765899658, "rewards/rejected": -7.371737957000732, "step": 5378 }, { "epoch": 1.19, "learning_rate": 8.640373999512946e-06, "logits/chosen": -1.1887351274490356, "logits/rejected": -1.239812970161438, "logps/chosen": -153.18629455566406, "logps/rejected": -115.31288146972656, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -4.5868120193481445, "rewards/margins": 3.729095458984375, "rewards/rejected": -8.31590747833252, "step": 5379 }, { "epoch": 1.19, "learning_rate": 8.639145128889415e-06, "logits/chosen": -1.0586390495300293, "logits/rejected": -1.0775166749954224, "logps/chosen": -97.26295471191406, "logps/rejected": -111.08012390136719, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -0.6889724731445312, "rewards/margins": 2.7453887462615967, "rewards/rejected": -3.434361219406128, "step": 5380 }, { "epoch": 1.19, "learning_rate": 8.637915790644482e-06, "logits/chosen": -0.9552043676376343, "logits/rejected": -0.9801502227783203, "logps/chosen": -94.56881713867188, "logps/rejected": -121.2029800415039, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -1.4030503034591675, "rewards/margins": 4.950689792633057, "rewards/rejected": -6.353740215301514, "step": 5381 }, { "epoch": 1.19, "learning_rate": 8.636685984936115e-06, "logits/chosen": -1.2492581605911255, "logits/rejected": -1.1018298864364624, "logps/chosen": -158.54417419433594, "logps/rejected": -315.8840637207031, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": -1.0825210809707642, "rewards/margins": 2.9410171508789062, "rewards/rejected": -4.023538112640381, "step": 5382 }, { "epoch": 1.19, "learning_rate": 8.635455711922343e-06, "logits/chosen": -1.3860491514205933, "logits/rejected": -1.431925892829895, "logps/chosen": -276.5642395019531, "logps/rejected": -293.9221496582031, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.6047455072402954, "rewards/margins": 7.923089504241943, "rewards/rejected": -6.3183441162109375, "step": 5383 }, { "epoch": 1.19, "learning_rate": 8.634224971761251e-06, "logits/chosen": -1.2454016208648682, "logits/rejected": -1.257231593132019, "logps/chosen": -66.64826202392578, "logps/rejected": -65.7127914428711, "loss": 0.398, "rewards/accuracies": 0.0, "rewards/chosen": -2.239940643310547, "rewards/margins": -0.19596433639526367, "rewards/rejected": -2.043976306915283, "step": 5384 }, { "epoch": 1.19, "learning_rate": 8.632993764610986e-06, "logits/chosen": -1.132293462753296, "logits/rejected": -1.0883262157440186, "logps/chosen": -150.99803161621094, "logps/rejected": -171.69085693359375, "loss": 0.0666, "rewards/accuracies": 1.0, "rewards/chosen": 1.170613169670105, "rewards/margins": 4.300596714019775, "rewards/rejected": -3.12998366355896, "step": 5385 }, { "epoch": 1.19, "learning_rate": 8.631762090629756e-06, "logits/chosen": -1.4212181568145752, "logits/rejected": -1.4526641368865967, "logps/chosen": -249.49774169921875, "logps/rejected": -263.0310363769531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.232626438140869, "rewards/margins": 14.275482177734375, "rewards/rejected": -12.042856216430664, "step": 5386 }, { "epoch": 1.19, "learning_rate": 8.630529949975828e-06, "logits/chosen": -1.597812533378601, "logits/rejected": -1.5967620611190796, "logps/chosen": -136.9544677734375, "logps/rejected": -166.97262573242188, "loss": 0.3201, "rewards/accuracies": 1.0, "rewards/chosen": -4.368415832519531, "rewards/margins": 0.11165332794189453, "rewards/rejected": -4.480069160461426, "step": 5387 }, { "epoch": 1.19, "learning_rate": 8.629297342807528e-06, "logits/chosen": -1.1249040365219116, "logits/rejected": -0.9732668995857239, "logps/chosen": -120.83612823486328, "logps/rejected": -274.1924133300781, "loss": 0.1461, "rewards/accuracies": 1.0, "rewards/chosen": -2.625014543533325, "rewards/margins": 1.102628231048584, "rewards/rejected": -3.727642774581909, "step": 5388 }, { "epoch": 1.19, "learning_rate": 8.628064269283246e-06, "logits/chosen": -0.8930009007453918, "logits/rejected": -0.7617063522338867, "logps/chosen": -81.22161102294922, "logps/rejected": -295.03558349609375, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -1.627705454826355, "rewards/margins": 4.6168532371521, "rewards/rejected": -6.244558811187744, "step": 5389 }, { "epoch": 1.19, "learning_rate": 8.626830729561426e-06, "logits/chosen": -0.9704271554946899, "logits/rejected": -0.978506326675415, "logps/chosen": -196.465576171875, "logps/rejected": -227.29562377929688, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.5018936395645142, "rewards/margins": 6.866063117980957, "rewards/rejected": -6.364169597625732, "step": 5390 }, { "epoch": 1.19, "learning_rate": 8.625596723800575e-06, "logits/chosen": -0.9025470018386841, "logits/rejected": -0.9025470018386841, "logps/chosen": -85.56693267822266, "logps/rejected": -85.56693267822266, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -3.735546827316284, "rewards/margins": 0.0, "rewards/rejected": -3.735546827316284, "step": 5391 }, { "epoch": 1.19, "learning_rate": 8.624362252159262e-06, "logits/chosen": -1.259300947189331, "logits/rejected": -1.237895131111145, "logps/chosen": -77.5025405883789, "logps/rejected": -106.49221801757812, "loss": 0.0966, "rewards/accuracies": 1.0, "rewards/chosen": -0.8244636654853821, "rewards/margins": 1.545724630355835, "rewards/rejected": -2.3701882362365723, "step": 5392 }, { "epoch": 1.19, "learning_rate": 8.623127314796111e-06, "logits/chosen": -1.0354050397872925, "logits/rejected": -1.086431860923767, "logps/chosen": -169.63702392578125, "logps/rejected": -78.17555236816406, "loss": 0.149, "rewards/accuracies": 1.0, "rewards/chosen": -3.6912033557891846, "rewards/margins": 1.0657579898834229, "rewards/rejected": -4.756961345672607, "step": 5393 }, { "epoch": 1.19, "learning_rate": 8.621891911869811e-06, "logits/chosen": -1.48450767993927, "logits/rejected": -1.4252549409866333, "logps/chosen": -64.67479705810547, "logps/rejected": -168.1658935546875, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -2.4824397563934326, "rewards/margins": 3.169133424758911, "rewards/rejected": -5.651573181152344, "step": 5394 }, { "epoch": 1.19, "learning_rate": 8.620656043539106e-06, "logits/chosen": -1.1916146278381348, "logits/rejected": -1.1127715110778809, "logps/chosen": -203.472900390625, "logps/rejected": -238.5223388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 4.062526226043701, "rewards/margins": 12.956531524658203, "rewards/rejected": -8.894004821777344, "step": 5395 }, { "epoch": 1.19, "learning_rate": 8.619419709962804e-06, "logits/chosen": -1.1206378936767578, "logits/rejected": -1.1373268365859985, "logps/chosen": -88.8388671875, "logps/rejected": -58.110015869140625, "loss": 0.1202, "rewards/accuracies": 1.0, "rewards/chosen": -2.5222413539886475, "rewards/margins": 1.3072538375854492, "rewards/rejected": -3.8294951915740967, "step": 5396 }, { "epoch": 1.19, "learning_rate": 8.61818291129977e-06, "logits/chosen": -1.1021060943603516, "logits/rejected": -1.2424132823944092, "logps/chosen": -240.71510314941406, "logps/rejected": -172.19386291503906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.2626876831054688, "rewards/margins": 11.255613327026367, "rewards/rejected": -8.992925643920898, "step": 5397 }, { "epoch": 1.19, "learning_rate": 8.61694564770893e-06, "logits/chosen": -1.238434910774231, "logits/rejected": -1.3910911083221436, "logps/chosen": -274.7435607910156, "logps/rejected": -133.95947265625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.3639038801193237, "rewards/margins": 8.364048957824707, "rewards/rejected": -9.72795295715332, "step": 5398 }, { "epoch": 1.19, "learning_rate": 8.61570791934927e-06, "logits/chosen": -1.0419676303863525, "logits/rejected": -0.9457353353500366, "logps/chosen": -129.19793701171875, "logps/rejected": -177.43960571289062, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.4670127630233765, "rewards/margins": 5.082843780517578, "rewards/rejected": -6.549856662750244, "step": 5399 }, { "epoch": 1.2, "learning_rate": 8.614469726379833e-06, "logits/chosen": -1.3306790590286255, "logits/rejected": -1.3396660089492798, "logps/chosen": -99.58465576171875, "logps/rejected": -116.93822479248047, "loss": 0.1723, "rewards/accuracies": 1.0, "rewards/chosen": -2.981588840484619, "rewards/margins": 0.8941383361816406, "rewards/rejected": -3.8757271766662598, "step": 5400 }, { "epoch": 1.2, "learning_rate": 8.613231068959726e-06, "logits/chosen": -1.2458972930908203, "logits/rejected": -1.245165467262268, "logps/chosen": -127.72503662109375, "logps/rejected": -142.03231811523438, "loss": 0.1436, "rewards/accuracies": 1.0, "rewards/chosen": -3.027841329574585, "rewards/margins": 1.124877691268921, "rewards/rejected": -4.152719020843506, "step": 5401 }, { "epoch": 1.2, "learning_rate": 8.61199194724811e-06, "logits/chosen": -1.2994414567947388, "logits/rejected": -1.25831937789917, "logps/chosen": -215.64605712890625, "logps/rejected": -264.0489501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.9293349981307983, "rewards/margins": 13.787565231323242, "rewards/rejected": -11.858230590820312, "step": 5402 }, { "epoch": 1.2, "learning_rate": 8.610752361404216e-06, "logits/chosen": -1.158356785774231, "logits/rejected": -1.160949945449829, "logps/chosen": -93.1014633178711, "logps/rejected": -205.62850952148438, "loss": 0.1417, "rewards/accuracies": 1.0, "rewards/chosen": 1.2722663879394531, "rewards/margins": 8.684861183166504, "rewards/rejected": -7.412594795227051, "step": 5403 }, { "epoch": 1.2, "learning_rate": 8.60951231158732e-06, "logits/chosen": -1.1177513599395752, "logits/rejected": -1.12594473361969, "logps/chosen": -201.54177856445312, "logps/rejected": -174.69821166992188, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.05249328538775444, "rewards/margins": 6.798684597015381, "rewards/rejected": -6.746191501617432, "step": 5404 }, { "epoch": 1.2, "learning_rate": 8.60827179795677e-06, "logits/chosen": -1.266600251197815, "logits/rejected": -1.235916256904602, "logps/chosen": -212.16415405273438, "logps/rejected": -273.93841552734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3752533197402954, "rewards/margins": 13.733428955078125, "rewards/rejected": -15.108682632446289, "step": 5405 }, { "epoch": 1.2, "learning_rate": 8.607030820671969e-06, "logits/chosen": -1.2593891620635986, "logits/rejected": -1.2341506481170654, "logps/chosen": -73.41989135742188, "logps/rejected": -101.69284057617188, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": -1.6861381530761719, "rewards/margins": 2.547156810760498, "rewards/rejected": -4.23329496383667, "step": 5406 }, { "epoch": 1.2, "learning_rate": 8.605789379892378e-06, "logits/chosen": -1.154974102973938, "logits/rejected": -1.1155869960784912, "logps/chosen": -134.63778686523438, "logps/rejected": -265.7864990234375, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -2.4330337047576904, "rewards/margins": 6.644737243652344, "rewards/rejected": -9.077771186828613, "step": 5407 }, { "epoch": 1.2, "learning_rate": 8.60454747577752e-06, "logits/chosen": -1.2898929119110107, "logits/rejected": -1.4255439043045044, "logps/chosen": -213.02069091796875, "logps/rejected": -183.829345703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.2120742797851562, "rewards/margins": 8.228315353393555, "rewards/rejected": -7.016241550445557, "step": 5408 }, { "epoch": 1.2, "learning_rate": 8.603305108486975e-06, "logits/chosen": -1.1621993780136108, "logits/rejected": -1.1505614519119263, "logps/chosen": -235.56179809570312, "logps/rejected": -252.71511840820312, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.1558074951171875, "rewards/margins": 8.846200942993164, "rewards/rejected": -11.002008438110352, "step": 5409 }, { "epoch": 1.2, "learning_rate": 8.602062278180388e-06, "logits/chosen": -0.9491633772850037, "logits/rejected": -1.0525362491607666, "logps/chosen": -130.45294189453125, "logps/rejected": -59.831180572509766, "loss": 0.2932, "rewards/accuracies": 1.0, "rewards/chosen": -3.922987461090088, "rewards/margins": 0.2402482032775879, "rewards/rejected": -4.163235664367676, "step": 5410 }, { "epoch": 1.2, "learning_rate": 8.600818985017457e-06, "logits/chosen": -0.9939281344413757, "logits/rejected": -0.9485271573066711, "logps/chosen": -100.85450744628906, "logps/rejected": -228.6165008544922, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.1880760192871094, "rewards/margins": 10.389445304870605, "rewards/rejected": -9.201369285583496, "step": 5411 }, { "epoch": 1.2, "learning_rate": 8.59957522915794e-06, "logits/chosen": -0.946414589881897, "logits/rejected": -0.9613925814628601, "logps/chosen": -148.6737060546875, "logps/rejected": -135.09207153320312, "loss": 0.1611, "rewards/accuracies": 1.0, "rewards/chosen": -3.9330520629882812, "rewards/margins": 0.9674334526062012, "rewards/rejected": -4.900485515594482, "step": 5412 }, { "epoch": 1.2, "learning_rate": 8.598331010761662e-06, "logits/chosen": -1.3377069234848022, "logits/rejected": -1.2609496116638184, "logps/chosen": -120.18795776367188, "logps/rejected": -181.40695190429688, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -0.9398193359375, "rewards/margins": 3.8471102714538574, "rewards/rejected": -4.786929607391357, "step": 5413 }, { "epoch": 1.2, "learning_rate": 8.597086329988498e-06, "logits/chosen": -1.2401044368743896, "logits/rejected": -1.2401044368743896, "logps/chosen": -238.4536895751953, "logps/rejected": -238.4536895751953, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.606565952301025, "rewards/margins": 0.0, "rewards/rejected": -5.606565952301025, "step": 5414 }, { "epoch": 1.2, "learning_rate": 8.595841186998388e-06, "logits/chosen": -1.3959711790084839, "logits/rejected": -1.2701388597488403, "logps/chosen": -99.75559997558594, "logps/rejected": -310.0675048828125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9259384274482727, "rewards/margins": 6.038069248199463, "rewards/rejected": -6.96400785446167, "step": 5415 }, { "epoch": 1.2, "learning_rate": 8.594595581951329e-06, "logits/chosen": -0.9129601120948792, "logits/rejected": -0.879468560218811, "logps/chosen": -191.78982543945312, "logps/rejected": -336.7544250488281, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 0.8218719363212585, "rewards/margins": 9.835655212402344, "rewards/rejected": -9.01378345489502, "step": 5416 }, { "epoch": 1.2, "learning_rate": 8.593349515007379e-06, "logits/chosen": -1.3972479104995728, "logits/rejected": -1.200308084487915, "logps/chosen": -126.10415649414062, "logps/rejected": -236.2254180908203, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": 2.8775651454925537, "rewards/margins": 3.1973633766174316, "rewards/rejected": -0.3197982907295227, "step": 5417 }, { "epoch": 1.2, "learning_rate": 8.592102986326656e-06, "logits/chosen": -1.379626989364624, "logits/rejected": -1.3174490928649902, "logps/chosen": -111.78486633300781, "logps/rejected": -153.85623168945312, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/chosen": -1.0511047840118408, "rewards/margins": 2.4983718395233154, "rewards/rejected": -3.5494766235351562, "step": 5418 }, { "epoch": 1.2, "learning_rate": 8.590855996069334e-06, "logits/chosen": -1.5226532220840454, "logits/rejected": -1.4966737031936646, "logps/chosen": -132.12680053710938, "logps/rejected": -192.4591827392578, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.32073974609375, "rewards/margins": 6.8415021896362305, "rewards/rejected": -8.16224193572998, "step": 5419 }, { "epoch": 1.2, "learning_rate": 8.589608544395646e-06, "logits/chosen": -1.2430065870285034, "logits/rejected": -1.1752431392669678, "logps/chosen": -176.8106689453125, "logps/rejected": -232.75537109375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.21625672280788422, "rewards/margins": 6.419833660125732, "rewards/rejected": -6.636090278625488, "step": 5420 }, { "epoch": 1.2, "learning_rate": 8.588360631465893e-06, "logits/chosen": -1.2489559650421143, "logits/rejected": -0.9882621765136719, "logps/chosen": -233.53805541992188, "logps/rejected": -691.2728271484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.686737060546875, "rewards/margins": 27.977270126342773, "rewards/rejected": -27.2905330657959, "step": 5421 }, { "epoch": 1.2, "learning_rate": 8.587112257440422e-06, "logits/chosen": -1.4434809684753418, "logits/rejected": -1.5062763690948486, "logps/chosen": -171.0211639404297, "logps/rejected": -183.94259643554688, "loss": 0.2457, "rewards/accuracies": 1.0, "rewards/chosen": -2.1155197620391846, "rewards/margins": 0.45456695556640625, "rewards/rejected": -2.570086717605591, "step": 5422 }, { "epoch": 1.2, "learning_rate": 8.585863422479652e-06, "logits/chosen": -1.223560094833374, "logits/rejected": -1.198067545890808, "logps/chosen": -131.22161865234375, "logps/rejected": -244.96827697753906, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -3.042600393295288, "rewards/margins": 7.927240371704102, "rewards/rejected": -10.969841003417969, "step": 5423 }, { "epoch": 1.2, "learning_rate": 8.584614126744051e-06, "logits/chosen": -0.880955696105957, "logits/rejected": -0.8683812618255615, "logps/chosen": -169.62582397460938, "logps/rejected": -166.684326171875, "loss": 0.5363, "rewards/accuracies": 1.0, "rewards/chosen": 2.5406646728515625, "rewards/margins": 4.6636247634887695, "rewards/rejected": -2.122959852218628, "step": 5424 }, { "epoch": 1.2, "learning_rate": 8.583364370394152e-06, "logits/chosen": -1.387810468673706, "logits/rejected": -1.336455225944519, "logps/chosen": -97.82968139648438, "logps/rejected": -194.58291625976562, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5677017569541931, "rewards/margins": 8.523106575012207, "rewards/rejected": -9.090807914733887, "step": 5425 }, { "epoch": 1.2, "learning_rate": 8.582114153590543e-06, "logits/chosen": -1.1195074319839478, "logits/rejected": -1.1103469133377075, "logps/chosen": -154.5687255859375, "logps/rejected": -182.39968872070312, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.96099853515625, "rewards/margins": 7.2555694580078125, "rewards/rejected": -6.2945709228515625, "step": 5426 }, { "epoch": 1.2, "learning_rate": 8.58086347649388e-06, "logits/chosen": -0.9207013249397278, "logits/rejected": -0.6056168079376221, "logps/chosen": -120.72083282470703, "logps/rejected": -716.91748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.27044677734375, "rewards/margins": 60.68218994140625, "rewards/rejected": -64.95263671875, "step": 5427 }, { "epoch": 1.2, "learning_rate": 8.579612339264867e-06, "logits/chosen": -1.3175101280212402, "logits/rejected": -1.3571172952651978, "logps/chosen": -152.42669677734375, "logps/rejected": -201.51222229003906, "loss": 0.0959, "rewards/accuracies": 1.0, "rewards/chosen": -5.569067478179932, "rewards/margins": 4.023830890655518, "rewards/rejected": -9.59289836883545, "step": 5428 }, { "epoch": 1.2, "learning_rate": 8.578360742064274e-06, "logits/chosen": -1.4682247638702393, "logits/rejected": -1.4622154235839844, "logps/chosen": -172.62713623046875, "logps/rejected": -174.42503356933594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.1678894758224487, "rewards/margins": 9.005145072937012, "rewards/rejected": -7.837255954742432, "step": 5429 }, { "epoch": 1.2, "learning_rate": 8.577108685052927e-06, "logits/chosen": -1.1086457967758179, "logits/rejected": -1.0777679681777954, "logps/chosen": -119.11014556884766, "logps/rejected": -164.26116943359375, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/chosen": -1.482308268547058, "rewards/margins": 2.9393134117126465, "rewards/rejected": -4.421621799468994, "step": 5430 }, { "epoch": 1.2, "learning_rate": 8.575856168391714e-06, "logits/chosen": -1.252840280532837, "logits/rejected": -1.2433780431747437, "logps/chosen": -130.08767700195312, "logps/rejected": -170.75125122070312, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": -0.7613998651504517, "rewards/margins": 2.6912994384765625, "rewards/rejected": -3.4526994228363037, "step": 5431 }, { "epoch": 1.2, "learning_rate": 8.57460319224158e-06, "logits/chosen": -1.5627931356430054, "logits/rejected": -1.5356966257095337, "logps/chosen": -51.969093322753906, "logps/rejected": -98.52027130126953, "loss": 0.1458, "rewards/accuracies": 1.0, "rewards/chosen": -2.2625644207000732, "rewards/margins": 1.113433599472046, "rewards/rejected": -3.375998020172119, "step": 5432 }, { "epoch": 1.2, "learning_rate": 8.573349756763527e-06, "logits/chosen": -1.3346283435821533, "logits/rejected": -1.4304745197296143, "logps/chosen": -184.30364990234375, "logps/rejected": -179.53382873535156, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.7569183707237244, "rewards/margins": 9.924318313598633, "rewards/rejected": -9.167400360107422, "step": 5433 }, { "epoch": 1.2, "learning_rate": 8.572095862118621e-06, "logits/chosen": -1.3647277355194092, "logits/rejected": -1.3009817600250244, "logps/chosen": -113.69145202636719, "logps/rejected": -190.32852172851562, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.7273414731025696, "rewards/margins": 7.05571985244751, "rewards/rejected": -6.328378200531006, "step": 5434 }, { "epoch": 1.2, "learning_rate": 8.570841508467984e-06, "logits/chosen": -1.0981738567352295, "logits/rejected": -1.0849980115890503, "logps/chosen": -99.47892761230469, "logps/rejected": -95.2281494140625, "loss": 0.3821, "rewards/accuracies": 0.0, "rewards/chosen": -1.792493462562561, "rewards/margins": -0.13720250129699707, "rewards/rejected": -1.655290961265564, "step": 5435 }, { "epoch": 1.2, "learning_rate": 8.569586695972798e-06, "logits/chosen": -0.9182413220405579, "logits/rejected": -0.8725290894508362, "logps/chosen": -82.24137878417969, "logps/rejected": -190.70510864257812, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.9471817016601562, "rewards/margins": 3.8991622924804688, "rewards/rejected": -4.846343994140625, "step": 5436 }, { "epoch": 1.2, "learning_rate": 8.568331424794301e-06, "logits/chosen": -1.0310319662094116, "logits/rejected": -0.9707841277122498, "logps/chosen": -219.1473388671875, "logps/rejected": -242.05023193359375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9275848269462585, "rewards/margins": 5.859560012817383, "rewards/rejected": -6.787144660949707, "step": 5437 }, { "epoch": 1.2, "learning_rate": 8.567075695093796e-06, "logits/chosen": -0.7593778371810913, "logits/rejected": -0.8625578284263611, "logps/chosen": -229.92831420898438, "logps/rejected": -312.6929931640625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.9229401350021362, "rewards/margins": 6.795417785644531, "rewards/rejected": -8.718358039855957, "step": 5438 }, { "epoch": 1.2, "learning_rate": 8.565819507032637e-06, "logits/chosen": -1.0791045427322388, "logits/rejected": -1.0401157140731812, "logps/chosen": -126.77082824707031, "logps/rejected": -227.5026397705078, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": -0.5604866147041321, "rewards/margins": 2.5516304969787598, "rewards/rejected": -3.112117052078247, "step": 5439 }, { "epoch": 1.2, "learning_rate": 8.564562860772246e-06, "logits/chosen": -1.2891637086868286, "logits/rejected": -1.3034462928771973, "logps/chosen": -77.73766326904297, "logps/rejected": -67.57483673095703, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9439781308174133, "rewards/margins": 3.9576361179351807, "rewards/rejected": -4.901614189147949, "step": 5440 }, { "epoch": 1.2, "learning_rate": 8.563305756474094e-06, "logits/chosen": -1.0717275142669678, "logits/rejected": -0.9652369618415833, "logps/chosen": -159.00216674804688, "logps/rejected": -132.86187744140625, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -3.6957955360412598, "rewards/margins": 2.859083652496338, "rewards/rejected": -6.554879188537598, "step": 5441 }, { "epoch": 1.2, "learning_rate": 8.562048194299719e-06, "logits/chosen": -1.1806755065917969, "logits/rejected": -1.1819777488708496, "logps/chosen": -124.07257843017578, "logps/rejected": -153.42337036132812, "loss": 0.0488, "rewards/accuracies": 1.0, "rewards/chosen": -1.5132942199707031, "rewards/margins": 2.284194231033325, "rewards/rejected": -3.7974884510040283, "step": 5442 }, { "epoch": 1.2, "learning_rate": 8.560790174410713e-06, "logits/chosen": -0.8478342294692993, "logits/rejected": -0.8478342294692993, "logps/chosen": -74.63121032714844, "logps/rejected": -74.63121032714844, "loss": 0.3723, "rewards/accuracies": 0.0, "rewards/chosen": -2.2390944957733154, "rewards/margins": 0.0, "rewards/rejected": -2.2390944957733154, "step": 5443 }, { "epoch": 1.2, "learning_rate": 8.559531696968733e-06, "logits/chosen": -1.3269777297973633, "logits/rejected": -1.3269777297973633, "logps/chosen": -99.80438995361328, "logps/rejected": -99.80438995361328, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": 0.3204391598701477, "rewards/margins": 0.0, "rewards/rejected": 0.3204391598701477, "step": 5444 }, { "epoch": 1.21, "learning_rate": 8.558272762135483e-06, "logits/chosen": -1.2121914625167847, "logits/rejected": -1.1627962589263916, "logps/chosen": -108.54893493652344, "logps/rejected": -211.2137451171875, "loss": 0.2078, "rewards/accuracies": 1.0, "rewards/chosen": -7.6571807861328125, "rewards/margins": 0.9010562896728516, "rewards/rejected": -8.558237075805664, "step": 5445 }, { "epoch": 1.21, "learning_rate": 8.557013370072737e-06, "logits/chosen": -1.295716643333435, "logits/rejected": -1.36332106590271, "logps/chosen": -185.89056396484375, "logps/rejected": -185.28219604492188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.757885754108429, "rewards/margins": 8.862534523010254, "rewards/rejected": -9.620420455932617, "step": 5446 }, { "epoch": 1.21, "learning_rate": 8.555753520942327e-06, "logits/chosen": -1.1342307329177856, "logits/rejected": -1.1803545951843262, "logps/chosen": -150.399169921875, "logps/rejected": -141.10125732421875, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 1.8323639631271362, "rewards/margins": 3.643235921859741, "rewards/rejected": -1.810871958732605, "step": 5447 }, { "epoch": 1.21, "learning_rate": 8.554493214906135e-06, "logits/chosen": -1.1951640844345093, "logits/rejected": -1.1951640844345093, "logps/chosen": -84.82447814941406, "logps/rejected": -84.82447814941406, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -6.6553053855896, "rewards/margins": 0.0, "rewards/rejected": -6.6553053855896, "step": 5448 }, { "epoch": 1.21, "learning_rate": 8.55323245212611e-06, "logits/chosen": -0.7124531865119934, "logits/rejected": -0.7017025947570801, "logps/chosen": -140.92832946777344, "logps/rejected": -140.1141357421875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 1.5398086309432983, "rewards/margins": 8.568946838378906, "rewards/rejected": -7.029138088226318, "step": 5449 }, { "epoch": 1.21, "learning_rate": 8.551971232764255e-06, "logits/chosen": -1.1559643745422363, "logits/rejected": -1.210882306098938, "logps/chosen": -183.274658203125, "logps/rejected": -154.54830932617188, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -1.7149399518966675, "rewards/margins": 3.1824135780334473, "rewards/rejected": -4.897353649139404, "step": 5450 }, { "epoch": 1.21, "learning_rate": 8.550709556982637e-06, "logits/chosen": -1.017303228378296, "logits/rejected": -0.9710756540298462, "logps/chosen": -133.98031616210938, "logps/rejected": -124.00845336914062, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -2.626513719558716, "rewards/margins": 6.874466896057129, "rewards/rejected": -9.500980377197266, "step": 5451 }, { "epoch": 1.21, "learning_rate": 8.549447424943379e-06, "logits/chosen": -1.2690784931182861, "logits/rejected": -1.3253535032272339, "logps/chosen": -170.16290283203125, "logps/rejected": -139.37896728515625, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.7984222769737244, "rewards/margins": 4.2699432373046875, "rewards/rejected": -5.068365573883057, "step": 5452 }, { "epoch": 1.21, "learning_rate": 8.548184836808657e-06, "logits/chosen": -1.349287748336792, "logits/rejected": -1.2854446172714233, "logps/chosen": -71.89820098876953, "logps/rejected": -243.81201171875, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.5187881588935852, "rewards/margins": 9.72284984588623, "rewards/rejected": -10.24163818359375, "step": 5453 }, { "epoch": 1.21, "learning_rate": 8.546921792740712e-06, "logits/chosen": -0.9827468395233154, "logits/rejected": -0.94260573387146, "logps/chosen": -205.2695770263672, "logps/rejected": -215.28639221191406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.6561385989189148, "rewards/margins": 8.679082870483398, "rewards/rejected": -8.022944450378418, "step": 5454 }, { "epoch": 1.21, "learning_rate": 8.545658292901844e-06, "logits/chosen": -0.9022276401519775, "logits/rejected": -0.8832339644432068, "logps/chosen": -129.9763641357422, "logps/rejected": -127.52346801757812, "loss": 0.1094, "rewards/accuracies": 1.0, "rewards/chosen": -3.6260344982147217, "rewards/margins": 1.4107797145843506, "rewards/rejected": -5.036814212799072, "step": 5455 }, { "epoch": 1.21, "learning_rate": 8.544394337454409e-06, "logits/chosen": -1.1458200216293335, "logits/rejected": -1.1425260305404663, "logps/chosen": -68.8957748413086, "logps/rejected": -65.40641021728516, "loss": 0.3513, "rewards/accuracies": 0.0, "rewards/chosen": -4.565701961517334, "rewards/margins": -0.007568359375, "rewards/rejected": -4.558133602142334, "step": 5456 }, { "epoch": 1.21, "learning_rate": 8.543129926560822e-06, "logits/chosen": -1.0996806621551514, "logits/rejected": -1.0996806621551514, "logps/chosen": -93.53321838378906, "logps/rejected": -93.53321838378906, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.011890411376953, "rewards/margins": 0.0, "rewards/rejected": -4.011890411376953, "step": 5457 }, { "epoch": 1.21, "learning_rate": 8.541865060383559e-06, "logits/chosen": -1.0724543333053589, "logits/rejected": -1.1160850524902344, "logps/chosen": -172.13328552246094, "logps/rejected": -171.1342315673828, "loss": 0.3786, "rewards/accuracies": 0.0, "rewards/chosen": -7.293103218078613, "rewards/margins": -0.12256097793579102, "rewards/rejected": -7.170542240142822, "step": 5458 }, { "epoch": 1.21, "learning_rate": 8.540599739085147e-06, "logits/chosen": -1.2154347896575928, "logits/rejected": -1.1970049142837524, "logps/chosen": -213.6641845703125, "logps/rejected": -230.361328125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.21510620415210724, "rewards/margins": 9.024955749511719, "rewards/rejected": -9.24006175994873, "step": 5459 }, { "epoch": 1.21, "learning_rate": 8.539333962828182e-06, "logits/chosen": -1.2186799049377441, "logits/rejected": -1.1622496843338013, "logps/chosen": -104.78143310546875, "logps/rejected": -152.65261840820312, "loss": 0.0622, "rewards/accuracies": 1.0, "rewards/chosen": -3.2720725536346436, "rewards/margins": 2.021850824356079, "rewards/rejected": -5.293923377990723, "step": 5460 }, { "epoch": 1.21, "learning_rate": 8.53806773177531e-06, "logits/chosen": -0.9064275622367859, "logits/rejected": -0.9454143643379211, "logps/chosen": -256.88519287109375, "logps/rejected": -121.74525451660156, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 2.6324462890625, "rewards/margins": 5.971981048583984, "rewards/rejected": -3.3395347595214844, "step": 5461 }, { "epoch": 1.21, "learning_rate": 8.53680104608924e-06, "logits/chosen": -0.82306307554245, "logits/rejected": -0.8078755140304565, "logps/chosen": -186.42630004882812, "logps/rejected": -198.07611083984375, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": 3.720816135406494, "rewards/margins": 3.194488525390625, "rewards/rejected": 0.5263275504112244, "step": 5462 }, { "epoch": 1.21, "learning_rate": 8.535533905932739e-06, "logits/chosen": -1.1050598621368408, "logits/rejected": -1.1050598621368408, "logps/chosen": -160.312744140625, "logps/rejected": -160.312744140625, "loss": 0.3701, "rewards/accuracies": 0.0, "rewards/chosen": -3.582932233810425, "rewards/margins": 0.0, "rewards/rejected": -3.582932233810425, "step": 5463 }, { "epoch": 1.21, "learning_rate": 8.534266311468629e-06, "logits/chosen": -1.1795856952667236, "logits/rejected": -1.1520607471466064, "logps/chosen": -143.34075927734375, "logps/rejected": -132.28179931640625, "loss": 0.2309, "rewards/accuracies": 1.0, "rewards/chosen": -2.3031814098358154, "rewards/margins": 0.544771671295166, "rewards/rejected": -2.8479530811309814, "step": 5464 }, { "epoch": 1.21, "learning_rate": 8.532998262859794e-06, "logits/chosen": -1.644652247428894, "logits/rejected": -1.4455045461654663, "logps/chosen": -132.13308715820312, "logps/rejected": -261.36956787109375, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -3.5202102661132812, "rewards/margins": 2.9565887451171875, "rewards/rejected": -6.476799011230469, "step": 5465 }, { "epoch": 1.21, "learning_rate": 8.531729760269176e-06, "logits/chosen": -1.2623611688613892, "logits/rejected": -1.2541650533676147, "logps/chosen": -177.31915283203125, "logps/rejected": -206.7538604736328, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.6465591192245483, "rewards/margins": 5.168542385101318, "rewards/rejected": -6.815101623535156, "step": 5466 }, { "epoch": 1.21, "learning_rate": 8.530460803859772e-06, "logits/chosen": -0.805813193321228, "logits/rejected": -0.7856371402740479, "logps/chosen": -75.00306701660156, "logps/rejected": -120.21278381347656, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -0.7479904294013977, "rewards/margins": 3.9648711681365967, "rewards/rejected": -4.71286153793335, "step": 5467 }, { "epoch": 1.21, "learning_rate": 8.529191393794645e-06, "logits/chosen": -1.3609707355499268, "logits/rejected": -1.4284312725067139, "logps/chosen": -150.13204956054688, "logps/rejected": -118.03630828857422, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": 1.4735625982284546, "rewards/margins": 2.43182373046875, "rewards/rejected": -0.9582611322402954, "step": 5468 }, { "epoch": 1.21, "learning_rate": 8.527921530236905e-06, "logits/chosen": -0.7734969258308411, "logits/rejected": -0.7734969258308411, "logps/chosen": -51.401702880859375, "logps/rejected": -51.401702880859375, "loss": 0.3495, "rewards/accuracies": 0.0, "rewards/chosen": -3.516878604888916, "rewards/margins": 0.0, "rewards/rejected": -3.516878604888916, "step": 5469 }, { "epoch": 1.21, "learning_rate": 8.52665121334973e-06, "logits/chosen": -1.0830744504928589, "logits/rejected": -1.0730891227722168, "logps/chosen": -85.99046325683594, "logps/rejected": -79.44585418701172, "loss": 0.2835, "rewards/accuracies": 1.0, "rewards/chosen": -1.4710510969161987, "rewards/margins": 0.29879188537597656, "rewards/rejected": -1.7698429822921753, "step": 5470 }, { "epoch": 1.21, "learning_rate": 8.525380443296353e-06, "logits/chosen": -0.9267036318778992, "logits/rejected": -0.8737732172012329, "logps/chosen": -105.4779052734375, "logps/rejected": -230.39109802246094, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -0.7698295712471008, "rewards/margins": 6.222762107849121, "rewards/rejected": -6.992591857910156, "step": 5471 }, { "epoch": 1.21, "learning_rate": 8.524109220240064e-06, "logits/chosen": -0.9360395073890686, "logits/rejected": -0.9360395073890686, "logps/chosen": -344.49365234375, "logps/rejected": -344.49365234375, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -3.1155335903167725, "rewards/margins": 0.0, "rewards/rejected": -3.1155335903167725, "step": 5472 }, { "epoch": 1.21, "learning_rate": 8.52283754434421e-06, "logits/chosen": -1.1469358205795288, "logits/rejected": -0.878374457359314, "logps/chosen": -106.17805480957031, "logps/rejected": -423.5639953613281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.3572952449321747, "rewards/margins": 16.620525360107422, "rewards/rejected": -16.263229370117188, "step": 5473 }, { "epoch": 1.21, "learning_rate": 8.521565415772201e-06, "logits/chosen": -1.2270890474319458, "logits/rejected": -1.0729329586029053, "logps/chosen": -73.37765502929688, "logps/rejected": -258.3583679199219, "loss": 0.0833, "rewards/accuracies": 1.0, "rewards/chosen": -2.19734263420105, "rewards/margins": 1.7398278713226318, "rewards/rejected": -3.9371705055236816, "step": 5474 }, { "epoch": 1.21, "learning_rate": 8.520292834687503e-06, "logits/chosen": -1.203151822090149, "logits/rejected": -0.5884518027305603, "logps/chosen": -90.58909606933594, "logps/rejected": -850.74365234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8951393365859985, "rewards/margins": 72.29352569580078, "rewards/rejected": -73.18866729736328, "step": 5475 }, { "epoch": 1.21, "learning_rate": 8.519019801253637e-06, "logits/chosen": -0.9911019206047058, "logits/rejected": -0.9577681422233582, "logps/chosen": -71.03489685058594, "logps/rejected": -131.8577423095703, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.2827568054199219, "rewards/margins": 4.589435577392578, "rewards/rejected": -4.8721923828125, "step": 5476 }, { "epoch": 1.21, "learning_rate": 8.517746315634186e-06, "logits/chosen": -1.0262962579727173, "logits/rejected": -1.0262962579727173, "logps/chosen": -121.34590148925781, "logps/rejected": -121.34590148925781, "loss": 0.3497, "rewards/accuracies": 0.0, "rewards/chosen": -0.3449562191963196, "rewards/margins": 0.0, "rewards/rejected": -0.3449562191963196, "step": 5477 }, { "epoch": 1.21, "learning_rate": 8.51647237799279e-06, "logits/chosen": -1.2013875246047974, "logits/rejected": -1.3039226531982422, "logps/chosen": -262.05474853515625, "logps/rejected": -170.35311889648438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.5870208740234375, "rewards/margins": 9.793179512023926, "rewards/rejected": -8.206158638000488, "step": 5478 }, { "epoch": 1.21, "learning_rate": 8.515197988493146e-06, "logits/chosen": -1.5807915925979614, "logits/rejected": -1.5724745988845825, "logps/chosen": -126.79850006103516, "logps/rejected": -158.49578857421875, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -0.5544349551200867, "rewards/margins": 3.4672858715057373, "rewards/rejected": -4.021720886230469, "step": 5479 }, { "epoch": 1.21, "learning_rate": 8.513923147299012e-06, "logits/chosen": -1.3918824195861816, "logits/rejected": -1.4477806091308594, "logps/chosen": -58.83366775512695, "logps/rejected": -63.41008377075195, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": -0.9136272668838501, "rewards/margins": 3.7407279014587402, "rewards/rejected": -4.654355049133301, "step": 5480 }, { "epoch": 1.21, "learning_rate": 8.512647854574201e-06, "logits/chosen": -0.9272947907447815, "logits/rejected": -1.0102072954177856, "logps/chosen": -288.3823547363281, "logps/rejected": -249.26971435546875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.8631181716918945, "rewards/margins": 6.537086486816406, "rewards/rejected": -11.4002046585083, "step": 5481 }, { "epoch": 1.21, "learning_rate": 8.511372110482583e-06, "logits/chosen": -1.2672367095947266, "logits/rejected": -1.1644396781921387, "logps/chosen": -132.7305908203125, "logps/rejected": -300.1829833984375, "loss": 0.0992, "rewards/accuracies": 1.0, "rewards/chosen": -5.276488780975342, "rewards/margins": 1.516219139099121, "rewards/rejected": -6.792707920074463, "step": 5482 }, { "epoch": 1.21, "learning_rate": 8.510095915188093e-06, "logits/chosen": -0.8086962103843689, "logits/rejected": -0.8340184092521667, "logps/chosen": -225.38018798828125, "logps/rejected": -101.829345703125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 1.6063812971115112, "rewards/margins": 7.2221856117248535, "rewards/rejected": -5.615804195404053, "step": 5483 }, { "epoch": 1.21, "learning_rate": 8.508819268854713e-06, "logits/chosen": -1.3020533323287964, "logits/rejected": -1.2166072130203247, "logps/chosen": -187.168701171875, "logps/rejected": -281.1253967285156, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.388467401266098, "rewards/margins": 11.594432830810547, "rewards/rejected": -11.205965042114258, "step": 5484 }, { "epoch": 1.21, "learning_rate": 8.507542171646493e-06, "logits/chosen": -1.2158944606781006, "logits/rejected": -1.2474204301834106, "logps/chosen": -119.79536437988281, "logps/rejected": -91.22650146484375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.27489015460014343, "rewards/margins": 6.504000186920166, "rewards/rejected": -6.778890132904053, "step": 5485 }, { "epoch": 1.21, "learning_rate": 8.506264623727536e-06, "logits/chosen": -1.1418380737304688, "logits/rejected": -1.1732423305511475, "logps/chosen": -101.08035278320312, "logps/rejected": -129.39993286132812, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.6621856689453125, "rewards/margins": 5.203117370605469, "rewards/rejected": -5.865303039550781, "step": 5486 }, { "epoch": 1.21, "learning_rate": 8.504986625262004e-06, "logits/chosen": -1.020919680595398, "logits/rejected": -0.9520482420921326, "logps/chosen": -99.50189971923828, "logps/rejected": -175.8371124267578, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.3812095820903778, "rewards/margins": 3.789196252822876, "rewards/rejected": -4.170405864715576, "step": 5487 }, { "epoch": 1.21, "learning_rate": 8.503708176414115e-06, "logits/chosen": -1.4140232801437378, "logits/rejected": -1.3006004095077515, "logps/chosen": -135.14755249023438, "logps/rejected": -215.21920776367188, "loss": 0.0376, "rewards/accuracies": 1.0, "rewards/chosen": -0.5193420648574829, "rewards/margins": 2.5542478561401367, "rewards/rejected": -3.073590040206909, "step": 5488 }, { "epoch": 1.21, "learning_rate": 8.50242927734815e-06, "logits/chosen": -0.959993839263916, "logits/rejected": -0.8589594960212708, "logps/chosen": -163.87411499023438, "logps/rejected": -568.08447265625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.656269073486328, "rewards/margins": 22.185592651367188, "rewards/rejected": -25.841861724853516, "step": 5489 }, { "epoch": 1.22, "learning_rate": 8.501149928228441e-06, "logits/chosen": -1.7360379695892334, "logits/rejected": -1.75506591796875, "logps/chosen": -119.42465209960938, "logps/rejected": -156.1302947998047, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/chosen": -2.1878373622894287, "rewards/margins": 2.7489311695098877, "rewards/rejected": -4.936768531799316, "step": 5490 }, { "epoch": 1.22, "learning_rate": 8.499870129219383e-06, "logits/chosen": -1.154407024383545, "logits/rejected": -1.1038997173309326, "logps/chosen": -96.0032958984375, "logps/rejected": -242.565673828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.3328078985214233, "rewards/margins": 7.7460618019104, "rewards/rejected": -9.078869819641113, "step": 5491 }, { "epoch": 1.22, "learning_rate": 8.498589880485428e-06, "logits/chosen": -1.1561280488967896, "logits/rejected": -1.1561280488967896, "logps/chosen": -116.9079360961914, "logps/rejected": -116.9079360961914, "loss": 0.3512, "rewards/accuracies": 0.0, "rewards/chosen": -5.541596412658691, "rewards/margins": 0.0, "rewards/rejected": -5.541596412658691, "step": 5492 }, { "epoch": 1.22, "learning_rate": 8.497309182191082e-06, "logits/chosen": -1.1555536985397339, "logits/rejected": -1.1368151903152466, "logps/chosen": -206.73944091796875, "logps/rejected": -349.96563720703125, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": -2.1891632080078125, "rewards/margins": 14.405441284179688, "rewards/rejected": -16.5946044921875, "step": 5493 }, { "epoch": 1.22, "learning_rate": 8.496028034500914e-06, "logits/chosen": -1.0255917310714722, "logits/rejected": -0.9644641876220703, "logps/chosen": -86.29150390625, "logps/rejected": -212.71844482421875, "loss": 0.119, "rewards/accuracies": 1.0, "rewards/chosen": -1.6015663146972656, "rewards/margins": 1.3153939247131348, "rewards/rejected": -2.9169602394104004, "step": 5494 }, { "epoch": 1.22, "learning_rate": 8.49474643757955e-06, "logits/chosen": -1.431572437286377, "logits/rejected": -1.3971940279006958, "logps/chosen": -85.65330505371094, "logps/rejected": -142.12847900390625, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": -1.3803284168243408, "rewards/margins": 2.8880279064178467, "rewards/rejected": -4.2683563232421875, "step": 5495 }, { "epoch": 1.22, "learning_rate": 8.493464391591665e-06, "logits/chosen": -0.9873080849647522, "logits/rejected": -0.8729780316352844, "logps/chosen": -146.17282104492188, "logps/rejected": -382.87066650390625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -3.049215793609619, "rewards/margins": 12.292791366577148, "rewards/rejected": -15.342007637023926, "step": 5496 }, { "epoch": 1.22, "learning_rate": 8.492181896702008e-06, "logits/chosen": -1.21144437789917, "logits/rejected": -1.2587559223175049, "logps/chosen": -166.90310668945312, "logps/rejected": -161.15060424804688, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": 0.27542877197265625, "rewards/margins": 4.436668395996094, "rewards/rejected": -4.1612396240234375, "step": 5497 }, { "epoch": 1.22, "learning_rate": 8.49089895307537e-06, "logits/chosen": -1.2562997341156006, "logits/rejected": -1.3754874467849731, "logps/chosen": -268.8016357421875, "logps/rejected": -167.99966430664062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 3.4100189208984375, "rewards/margins": 8.898602485656738, "rewards/rejected": -5.488583564758301, "step": 5498 }, { "epoch": 1.22, "learning_rate": 8.48961556087661e-06, "logits/chosen": -0.8852922320365906, "logits/rejected": -0.8827630281448364, "logps/chosen": -85.35543060302734, "logps/rejected": -121.14463806152344, "loss": 0.0604, "rewards/accuracies": 1.0, "rewards/chosen": -2.9130196571350098, "rewards/margins": 2.5472421646118164, "rewards/rejected": -5.460261821746826, "step": 5499 }, { "epoch": 1.22, "learning_rate": 8.48833172027064e-06, "logits/chosen": -1.412685513496399, "logits/rejected": -1.0398203134536743, "logps/chosen": -157.22821044921875, "logps/rejected": -477.0652160644531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.174523949623108, "rewards/margins": 40.244110107421875, "rewards/rejected": -39.06958770751953, "step": 5500 }, { "epoch": 1.22, "learning_rate": 8.487047431422426e-06, "logits/chosen": -1.552156686782837, "logits/rejected": -1.6231845617294312, "logps/chosen": -114.76306915283203, "logps/rejected": -109.0384750366211, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": -1.6973068714141846, "rewards/margins": 3.0127241611480713, "rewards/rejected": -4.710031032562256, "step": 5501 }, { "epoch": 1.22, "learning_rate": 8.485762694497001e-06, "logits/chosen": -1.1179183721542358, "logits/rejected": -1.115376591682434, "logps/chosen": -228.40646362304688, "logps/rejected": -237.22007751464844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.486531138420105, "rewards/margins": 9.98837947845459, "rewards/rejected": -8.501848220825195, "step": 5502 }, { "epoch": 1.22, "learning_rate": 8.484477509659452e-06, "logits/chosen": -1.201174259185791, "logits/rejected": -1.28885817527771, "logps/chosen": -155.6100311279297, "logps/rejected": -132.49960327148438, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -0.12908172607421875, "rewards/margins": 6.431286811828613, "rewards/rejected": -6.560368537902832, "step": 5503 }, { "epoch": 1.22, "learning_rate": 8.483191877074916e-06, "logits/chosen": -1.2738274335861206, "logits/rejected": -1.2966746091842651, "logps/chosen": -172.71737670898438, "logps/rejected": -224.0581512451172, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 2.4783051013946533, "rewards/margins": 7.0265703201293945, "rewards/rejected": -4.548264980316162, "step": 5504 }, { "epoch": 1.22, "learning_rate": 8.4819057969086e-06, "logits/chosen": -0.956936776638031, "logits/rejected": -0.9717373251914978, "logps/chosen": -81.26145935058594, "logps/rejected": -174.42788696289062, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -0.7018265128135681, "rewards/margins": 3.0438919067382812, "rewards/rejected": -3.745718479156494, "step": 5505 }, { "epoch": 1.22, "learning_rate": 8.480619269325759e-06, "logits/chosen": -1.4115746021270752, "logits/rejected": -1.5129302740097046, "logps/chosen": -168.61936950683594, "logps/rejected": -121.1236801147461, "loss": 0.9244, "rewards/accuracies": 0.0, "rewards/chosen": -5.995335578918457, "rewards/margins": -1.6772942543029785, "rewards/rejected": -4.3180413246154785, "step": 5506 }, { "epoch": 1.22, "learning_rate": 8.479332294491707e-06, "logits/chosen": -1.2426084280014038, "logits/rejected": -1.1613832712173462, "logps/chosen": -116.34098052978516, "logps/rejected": -150.462646484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 4.319657325744629, "rewards/margins": 8.660125732421875, "rewards/rejected": -4.340467929840088, "step": 5507 }, { "epoch": 1.22, "learning_rate": 8.47804487257182e-06, "logits/chosen": -0.7802700400352478, "logits/rejected": -0.7802700400352478, "logps/chosen": -197.78089904785156, "logps/rejected": -197.78089904785156, "loss": 0.3828, "rewards/accuracies": 0.0, "rewards/chosen": -6.296077251434326, "rewards/margins": 0.0, "rewards/rejected": -6.296077251434326, "step": 5508 }, { "epoch": 1.22, "learning_rate": 8.47675700373153e-06, "logits/chosen": -1.1933027505874634, "logits/rejected": -1.181482195854187, "logps/chosen": -171.06878662109375, "logps/rejected": -201.8518524169922, "loss": 0.0444, "rewards/accuracies": 1.0, "rewards/chosen": -6.3985185623168945, "rewards/margins": 2.376300811767578, "rewards/rejected": -8.774819374084473, "step": 5509 }, { "epoch": 1.22, "learning_rate": 8.475468688136322e-06, "logits/chosen": -1.3296561241149902, "logits/rejected": -1.1540286540985107, "logps/chosen": -140.7026824951172, "logps/rejected": -279.0506591796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 5.138612270355225, "rewards/margins": 9.859617233276367, "rewards/rejected": -4.721005439758301, "step": 5510 }, { "epoch": 1.22, "learning_rate": 8.47417992595174e-06, "logits/chosen": -1.2441385984420776, "logits/rejected": -1.2441385984420776, "logps/chosen": -261.096435546875, "logps/rejected": -261.096435546875, "loss": 0.3469, "rewards/accuracies": 0.0, "rewards/chosen": -11.450568199157715, "rewards/margins": 0.0, "rewards/rejected": -11.450568199157715, "step": 5511 }, { "epoch": 1.22, "learning_rate": 8.472890717343391e-06, "logits/chosen": -1.3800733089447021, "logits/rejected": -1.3956775665283203, "logps/chosen": -101.93684387207031, "logps/rejected": -169.97894287109375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.8375701904296875, "rewards/margins": 6.611512660980225, "rewards/rejected": -7.449082851409912, "step": 5512 }, { "epoch": 1.22, "learning_rate": 8.471601062476933e-06, "logits/chosen": -1.1124582290649414, "logits/rejected": -1.1431317329406738, "logps/chosen": -120.92803955078125, "logps/rejected": -219.8080596923828, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.2711021900177002, "rewards/margins": 10.01821517944336, "rewards/rejected": -11.28931713104248, "step": 5513 }, { "epoch": 1.22, "learning_rate": 8.470310961518085e-06, "logits/chosen": -1.0453433990478516, "logits/rejected": -1.034152865409851, "logps/chosen": -91.94356536865234, "logps/rejected": -244.92330932617188, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -3.0666587352752686, "rewards/margins": 4.222776412963867, "rewards/rejected": -7.289434909820557, "step": 5514 }, { "epoch": 1.22, "learning_rate": 8.469020414632619e-06, "logits/chosen": -1.2915362119674683, "logits/rejected": -1.2166069746017456, "logps/chosen": -124.14421081542969, "logps/rejected": -254.18869018554688, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": 0.7456756830215454, "rewards/margins": 2.9500246047973633, "rewards/rejected": -2.2043488025665283, "step": 5515 }, { "epoch": 1.22, "learning_rate": 8.467729421986371e-06, "logits/chosen": -1.6890225410461426, "logits/rejected": -1.6236507892608643, "logps/chosen": -82.65638732910156, "logps/rejected": -187.7222442626953, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.15741196274757385, "rewards/margins": 4.806835174560547, "rewards/rejected": -4.964247226715088, "step": 5516 }, { "epoch": 1.22, "learning_rate": 8.466437983745227e-06, "logits/chosen": -1.240518569946289, "logits/rejected": -1.1248326301574707, "logps/chosen": -165.64236450195312, "logps/rejected": -269.6012878417969, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 3.144282579421997, "rewards/margins": 5.53729248046875, "rewards/rejected": -2.393009901046753, "step": 5517 }, { "epoch": 1.22, "learning_rate": 8.465146100075136e-06, "logits/chosen": -0.964562177658081, "logits/rejected": -0.9735127687454224, "logps/chosen": -154.09854125976562, "logps/rejected": -263.29345703125, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": 0.961560070514679, "rewards/margins": 13.722625732421875, "rewards/rejected": -12.761065483093262, "step": 5518 }, { "epoch": 1.22, "learning_rate": 8.4638537711421e-06, "logits/chosen": -1.498045802116394, "logits/rejected": -1.5120755434036255, "logps/chosen": -119.67231750488281, "logps/rejected": -94.37837219238281, "loss": 0.1759, "rewards/accuracies": 1.0, "rewards/chosen": -1.7221908569335938, "rewards/margins": 0.8852982521057129, "rewards/rejected": -2.6074891090393066, "step": 5519 }, { "epoch": 1.22, "learning_rate": 8.462560997112184e-06, "logits/chosen": -1.7008724212646484, "logits/rejected": -1.0287647247314453, "logps/chosen": -209.8768768310547, "logps/rejected": -775.1665649414062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.565207481384277, "rewards/margins": 59.40203857421875, "rewards/rejected": -67.96724700927734, "step": 5520 }, { "epoch": 1.22, "learning_rate": 8.4612677781515e-06, "logits/chosen": -0.8816126585006714, "logits/rejected": -0.8670648336410522, "logps/chosen": -94.20709991455078, "logps/rejected": -106.55264282226562, "loss": 0.1911, "rewards/accuracies": 1.0, "rewards/chosen": -4.339137554168701, "rewards/margins": 0.8243961334228516, "rewards/rejected": -5.163533687591553, "step": 5521 }, { "epoch": 1.22, "learning_rate": 8.45997411442623e-06, "logits/chosen": -1.186313509941101, "logits/rejected": -1.1266064643859863, "logps/chosen": -154.43893432617188, "logps/rejected": -217.58302307128906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.6424118280410767, "rewards/margins": 9.916141510009766, "rewards/rejected": -9.27372932434082, "step": 5522 }, { "epoch": 1.22, "learning_rate": 8.458680006102602e-06, "logits/chosen": -1.4476295709609985, "logits/rejected": -1.433021068572998, "logps/chosen": -129.85926818847656, "logps/rejected": -140.1836395263672, "loss": 0.0963, "rewards/accuracies": 1.0, "rewards/chosen": -4.314770698547363, "rewards/margins": 1.54996919631958, "rewards/rejected": -5.864739894866943, "step": 5523 }, { "epoch": 1.22, "learning_rate": 8.45738545334691e-06, "logits/chosen": -1.6401156187057495, "logits/rejected": -1.1538138389587402, "logps/chosen": -178.89193725585938, "logps/rejected": -206.28176879882812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.33897095918655396, "rewards/margins": 11.213858604431152, "rewards/rejected": -11.55282974243164, "step": 5524 }, { "epoch": 1.22, "learning_rate": 8.456090456325496e-06, "logits/chosen": -1.405447244644165, "logits/rejected": -1.473359227180481, "logps/chosen": -135.95050048828125, "logps/rejected": -150.0476531982422, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -3.5988152027130127, "rewards/margins": 7.173740386962891, "rewards/rejected": -10.772555351257324, "step": 5525 }, { "epoch": 1.22, "learning_rate": 8.454795015204767e-06, "logits/chosen": -1.1555157899856567, "logits/rejected": -1.2030001878738403, "logps/chosen": -162.53379821777344, "logps/rejected": -129.1793212890625, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/chosen": 0.0038406371604651213, "rewards/margins": 2.627009630203247, "rewards/rejected": -2.6231689453125, "step": 5526 }, { "epoch": 1.22, "learning_rate": 8.453499130151183e-06, "logits/chosen": -1.421139121055603, "logits/rejected": -1.3826764822006226, "logps/chosen": -124.50761413574219, "logps/rejected": -184.6352081298828, "loss": 0.0484, "rewards/accuracies": 1.0, "rewards/chosen": -0.3462692201137543, "rewards/margins": 2.285665988922119, "rewards/rejected": -2.6319351196289062, "step": 5527 }, { "epoch": 1.22, "learning_rate": 8.452202801331265e-06, "logits/chosen": -1.377025842666626, "logits/rejected": -1.2922124862670898, "logps/chosen": -195.92999267578125, "logps/rejected": -236.1181640625, "loss": 0.3431, "rewards/accuracies": 1.0, "rewards/chosen": -5.146345615386963, "rewards/margins": 0.01539468765258789, "rewards/rejected": -5.161740303039551, "step": 5528 }, { "epoch": 1.22, "learning_rate": 8.450906028911585e-06, "logits/chosen": -1.3221642971038818, "logits/rejected": -1.3221642971038818, "logps/chosen": -222.55752563476562, "logps/rejected": -222.55752563476562, "loss": 0.352, "rewards/accuracies": 0.0, "rewards/chosen": -7.178915500640869, "rewards/margins": 0.0, "rewards/rejected": -7.178915500640869, "step": 5529 }, { "epoch": 1.22, "learning_rate": 8.449608813058776e-06, "logits/chosen": -1.4182331562042236, "logits/rejected": -1.4650824069976807, "logps/chosen": -122.16975402832031, "logps/rejected": -129.40020751953125, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": 0.07276153564453125, "rewards/margins": 3.8515305519104004, "rewards/rejected": -3.778769016265869, "step": 5530 }, { "epoch": 1.22, "learning_rate": 8.448311153939527e-06, "logits/chosen": -1.5345196723937988, "logits/rejected": -1.4715486764907837, "logps/chosen": -192.16464233398438, "logps/rejected": -317.2037658691406, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.0412094593048096, "rewards/margins": 5.207432746887207, "rewards/rejected": -6.2486419677734375, "step": 5531 }, { "epoch": 1.22, "learning_rate": 8.447013051720585e-06, "logits/chosen": -1.0927079916000366, "logits/rejected": -1.0856605768203735, "logps/chosen": -114.17869567871094, "logps/rejected": -121.91088104248047, "loss": 0.112, "rewards/accuracies": 1.0, "rewards/chosen": -2.319448947906494, "rewards/margins": 1.3818542957305908, "rewards/rejected": -3.701303243637085, "step": 5532 }, { "epoch": 1.22, "learning_rate": 8.445714506568751e-06, "logits/chosen": -1.3639944791793823, "logits/rejected": -1.356452465057373, "logps/chosen": -118.4684066772461, "logps/rejected": -202.78717041015625, "loss": 0.4205, "rewards/accuracies": 0.0, "rewards/chosen": -5.807880878448486, "rewards/margins": -0.27667808532714844, "rewards/rejected": -5.531202793121338, "step": 5533 }, { "epoch": 1.22, "learning_rate": 8.444415518650887e-06, "logits/chosen": -1.6141633987426758, "logits/rejected": -1.552925705909729, "logps/chosen": -87.51496124267578, "logps/rejected": -254.0537109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9475647211074829, "rewards/margins": 12.241442680358887, "rewards/rejected": -13.189007759094238, "step": 5534 }, { "epoch": 1.23, "learning_rate": 8.443116088133908e-06, "logits/chosen": -1.0173439979553223, "logits/rejected": -1.155053973197937, "logps/chosen": -216.97830200195312, "logps/rejected": -83.57273864746094, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.9443939328193665, "rewards/margins": 6.871799945831299, "rewards/rejected": -5.927405834197998, "step": 5535 }, { "epoch": 1.23, "learning_rate": 8.44181621518479e-06, "logits/chosen": -1.0766299962997437, "logits/rejected": -0.9805042743682861, "logps/chosen": -178.9683380126953, "logps/rejected": -240.21597290039062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.798266649246216, "rewards/margins": 9.866793632507324, "rewards/rejected": -6.0685272216796875, "step": 5536 }, { "epoch": 1.23, "learning_rate": 8.440515899970561e-06, "logits/chosen": -1.403062105178833, "logits/rejected": -1.3810853958129883, "logps/chosen": -79.37774658203125, "logps/rejected": -129.4193878173828, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -0.19153748452663422, "rewards/margins": 2.959015130996704, "rewards/rejected": -3.15055251121521, "step": 5537 }, { "epoch": 1.23, "learning_rate": 8.43921514265831e-06, "logits/chosen": -1.2551201581954956, "logits/rejected": -1.260054111480713, "logps/chosen": -165.2795867919922, "logps/rejected": -129.3028106689453, "loss": 0.0542, "rewards/accuracies": 1.0, "rewards/chosen": -1.8155487775802612, "rewards/margins": 4.37279748916626, "rewards/rejected": -6.1883463859558105, "step": 5538 }, { "epoch": 1.23, "learning_rate": 8.437913943415181e-06, "logits/chosen": -1.3611598014831543, "logits/rejected": -1.3711384534835815, "logps/chosen": -127.9925537109375, "logps/rejected": -221.5886688232422, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.478872776031494, "rewards/margins": 5.941061496734619, "rewards/rejected": -8.419934272766113, "step": 5539 }, { "epoch": 1.23, "learning_rate": 8.436612302408376e-06, "logits/chosen": -1.1917189359664917, "logits/rejected": -1.173257827758789, "logps/chosen": -132.15994262695312, "logps/rejected": -143.07386779785156, "loss": 0.0369, "rewards/accuracies": 1.0, "rewards/chosen": -1.2407249212265015, "rewards/margins": 2.6262941360473633, "rewards/rejected": -3.867018938064575, "step": 5540 }, { "epoch": 1.23, "learning_rate": 8.43531021980515e-06, "logits/chosen": -1.3879886865615845, "logits/rejected": -1.4162780046463013, "logps/chosen": -226.0296630859375, "logps/rejected": -220.53111267089844, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.62298583984375, "rewards/margins": 3.41070556640625, "rewards/rejected": -4.03369140625, "step": 5541 }, { "epoch": 1.23, "learning_rate": 8.434007695772819e-06, "logits/chosen": -1.756121277809143, "logits/rejected": -1.766235113143921, "logps/chosen": -127.45790100097656, "logps/rejected": -278.478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0068376064300537, "rewards/margins": 11.078774452209473, "rewards/rejected": -13.085612297058105, "step": 5542 }, { "epoch": 1.23, "learning_rate": 8.432704730478756e-06, "logits/chosen": -1.2980828285217285, "logits/rejected": -1.2074155807495117, "logps/chosen": -175.44749450683594, "logps/rejected": -300.9485168457031, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": 0.6641983389854431, "rewards/margins": 6.864686965942383, "rewards/rejected": -6.200488567352295, "step": 5543 }, { "epoch": 1.23, "learning_rate": 8.431401324090384e-06, "logits/chosen": -1.44896399974823, "logits/rejected": -1.4417436122894287, "logps/chosen": -130.45201110839844, "logps/rejected": -275.6788635253906, "loss": 2.5228, "rewards/accuracies": 1.0, "rewards/chosen": -2.253126621246338, "rewards/margins": 12.954750061035156, "rewards/rejected": -15.207877159118652, "step": 5544 }, { "epoch": 1.23, "learning_rate": 8.430097476775194e-06, "logits/chosen": -1.5102283954620361, "logits/rejected": -1.5156179666519165, "logps/chosen": -151.1439208984375, "logps/rejected": -91.25173950195312, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -3.255746603012085, "rewards/margins": 3.1377756595611572, "rewards/rejected": -6.393522262573242, "step": 5545 }, { "epoch": 1.23, "learning_rate": 8.428793188700722e-06, "logits/chosen": -1.1586384773254395, "logits/rejected": -1.1572439670562744, "logps/chosen": -91.52627563476562, "logps/rejected": -160.1088104248047, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -0.5284439325332642, "rewards/margins": 3.030735969543457, "rewards/rejected": -3.5591797828674316, "step": 5546 }, { "epoch": 1.23, "learning_rate": 8.427488460034567e-06, "logits/chosen": -1.3259742259979248, "logits/rejected": -1.3568394184112549, "logps/chosen": -99.21725463867188, "logps/rejected": -144.73284912109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.18277588486671448, "rewards/margins": 8.068429946899414, "rewards/rejected": -7.885653972625732, "step": 5547 }, { "epoch": 1.23, "learning_rate": 8.426183290944387e-06, "logits/chosen": -1.3310105800628662, "logits/rejected": -0.6573918461799622, "logps/chosen": -344.05633544921875, "logps/rejected": -768.787353515625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.17928467690944672, "rewards/margins": 60.13827896118164, "rewards/rejected": -60.317562103271484, "step": 5548 }, { "epoch": 1.23, "learning_rate": 8.424877681597889e-06, "logits/chosen": -1.0367499589920044, "logits/rejected": -1.0316251516342163, "logps/chosen": -149.80563354492188, "logps/rejected": -266.9916687011719, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": 1.9332367181777954, "rewards/margins": 4.812330722808838, "rewards/rejected": -2.879093885421753, "step": 5549 }, { "epoch": 1.23, "learning_rate": 8.423571632162843e-06, "logits/chosen": -1.3152576684951782, "logits/rejected": -1.2915081977844238, "logps/chosen": -101.91043853759766, "logps/rejected": -79.91980743408203, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -2.7053277492523193, "rewards/margins": 3.510852098464966, "rewards/rejected": -6.216179847717285, "step": 5550 }, { "epoch": 1.23, "learning_rate": 8.422265142807071e-06, "logits/chosen": -1.1706970930099487, "logits/rejected": -1.2154499292373657, "logps/chosen": -150.42678833007812, "logps/rejected": -106.111083984375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": 1.8309783935546875, "rewards/margins": 4.668316841125488, "rewards/rejected": -2.8373382091522217, "step": 5551 }, { "epoch": 1.23, "learning_rate": 8.420958213698455e-06, "logits/chosen": -1.041324257850647, "logits/rejected": -1.0226223468780518, "logps/chosen": -96.09288787841797, "logps/rejected": -151.01254272460938, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -1.2806023359298706, "rewards/margins": 3.422152042388916, "rewards/rejected": -4.702754497528076, "step": 5552 }, { "epoch": 1.23, "learning_rate": 8.419650845004932e-06, "logits/chosen": -1.4821252822875977, "logits/rejected": -1.574332356452942, "logps/chosen": -208.81576538085938, "logps/rejected": -136.14532470703125, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": 1.0793899297714233, "rewards/margins": 3.207977294921875, "rewards/rejected": -2.128587484359741, "step": 5553 }, { "epoch": 1.23, "learning_rate": 8.418343036894497e-06, "logits/chosen": -1.0549836158752441, "logits/rejected": -1.041932463645935, "logps/chosen": -141.12530517578125, "logps/rejected": -164.64871215820312, "loss": 0.1123, "rewards/accuracies": 1.0, "rewards/chosen": -1.0333236455917358, "rewards/margins": 4.472023010253906, "rewards/rejected": -5.505346775054932, "step": 5554 }, { "epoch": 1.23, "learning_rate": 8.4170347895352e-06, "logits/chosen": -1.2364405393600464, "logits/rejected": -1.2013856172561646, "logps/chosen": -107.26213073730469, "logps/rejected": -126.48348999023438, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.7715644836425781, "rewards/margins": 4.086050510406494, "rewards/rejected": -5.857614994049072, "step": 5555 }, { "epoch": 1.23, "learning_rate": 8.415726103095146e-06, "logits/chosen": -1.3533406257629395, "logits/rejected": -1.3075186014175415, "logps/chosen": -135.09341430664062, "logps/rejected": -282.88665771484375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 6.079077243804932, "rewards/margins": 8.984045028686523, "rewards/rejected": -2.90496826171875, "step": 5556 }, { "epoch": 1.23, "learning_rate": 8.414416977742498e-06, "logits/chosen": -1.209883451461792, "logits/rejected": -1.2082823514938354, "logps/chosen": -139.30801391601562, "logps/rejected": -169.09141540527344, "loss": 0.3426, "rewards/accuracies": 1.0, "rewards/chosen": -5.257625579833984, "rewards/margins": 0.01822376251220703, "rewards/rejected": -5.275849342346191, "step": 5557 }, { "epoch": 1.23, "learning_rate": 8.413107413645477e-06, "logits/chosen": -1.5465271472930908, "logits/rejected": -1.4165124893188477, "logps/chosen": -153.75718688964844, "logps/rejected": -318.3330993652344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 2.7760818004608154, "rewards/margins": 9.237645149230957, "rewards/rejected": -6.4615631103515625, "step": 5558 }, { "epoch": 1.23, "learning_rate": 8.411797410972358e-06, "logits/chosen": -1.130715250968933, "logits/rejected": -1.2485207319259644, "logps/chosen": -150.60760498046875, "logps/rejected": -127.15026092529297, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 3.3038132190704346, "rewards/margins": 12.425968170166016, "rewards/rejected": -9.12215518951416, "step": 5559 }, { "epoch": 1.23, "learning_rate": 8.410486969891475e-06, "logits/chosen": -1.410559058189392, "logits/rejected": -1.4167640209197998, "logps/chosen": -122.3050537109375, "logps/rejected": -185.60195922851562, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": 0.5565239191055298, "rewards/margins": 5.07268762588501, "rewards/rejected": -4.5161638259887695, "step": 5560 }, { "epoch": 1.23, "learning_rate": 8.409176090571214e-06, "logits/chosen": -1.438578724861145, "logits/rejected": -1.460459589958191, "logps/chosen": -118.7369384765625, "logps/rejected": -136.70506286621094, "loss": 0.3124, "rewards/accuracies": 1.0, "rewards/chosen": -1.071345567703247, "rewards/margins": 4.240744590759277, "rewards/rejected": -5.3120903968811035, "step": 5561 }, { "epoch": 1.23, "learning_rate": 8.40786477318002e-06, "logits/chosen": -1.2094730138778687, "logits/rejected": -1.172495722770691, "logps/chosen": -94.71015930175781, "logps/rejected": -91.19905853271484, "loss": 0.5158, "rewards/accuracies": 1.0, "rewards/chosen": -0.03095245361328125, "rewards/margins": 6.616075038909912, "rewards/rejected": -6.647027492523193, "step": 5562 }, { "epoch": 1.23, "learning_rate": 8.406553017886397e-06, "logits/chosen": -1.241885781288147, "logits/rejected": -1.1595033407211304, "logps/chosen": -235.40774536132812, "logps/rejected": -200.65277099609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2287750244140625, "rewards/margins": 12.754145622253418, "rewards/rejected": -13.98292064666748, "step": 5563 }, { "epoch": 1.23, "learning_rate": 8.405240824858898e-06, "logits/chosen": -0.9735977649688721, "logits/rejected": -0.9735977649688721, "logps/chosen": -141.411865234375, "logps/rejected": -141.411865234375, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.4503021240234375, "rewards/margins": 0.0, "rewards/rejected": -5.4503021240234375, "step": 5564 }, { "epoch": 1.23, "learning_rate": 8.40392819426614e-06, "logits/chosen": -1.5801175832748413, "logits/rejected": -1.5600080490112305, "logps/chosen": -133.89129638671875, "logps/rejected": -179.6650848388672, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.0800262689590454, "rewards/margins": 8.340579986572266, "rewards/rejected": -7.26055383682251, "step": 5565 }, { "epoch": 1.23, "learning_rate": 8.402615126276792e-06, "logits/chosen": -1.1270447969436646, "logits/rejected": -1.032530665397644, "logps/chosen": -255.11544799804688, "logps/rejected": -307.5701904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.844036817550659, "rewards/margins": 9.508028030395508, "rewards/rejected": -6.6639909744262695, "step": 5566 }, { "epoch": 1.23, "learning_rate": 8.40130162105958e-06, "logits/chosen": -1.306426763534546, "logits/rejected": -1.2450469732284546, "logps/chosen": -88.12886810302734, "logps/rejected": -156.20472717285156, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.3442604541778564, "rewards/margins": 4.15095329284668, "rewards/rejected": -5.495213508605957, "step": 5567 }, { "epoch": 1.23, "learning_rate": 8.399987678783285e-06, "logits/chosen": -1.602297306060791, "logits/rejected": -1.6609952449798584, "logps/chosen": -124.39056396484375, "logps/rejected": -166.90512084960938, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.552503228187561, "rewards/margins": 4.519728183746338, "rewards/rejected": -5.072231292724609, "step": 5568 }, { "epoch": 1.23, "learning_rate": 8.398673299616747e-06, "logits/chosen": -1.3255548477172852, "logits/rejected": -1.2766591310501099, "logps/chosen": -100.36416625976562, "logps/rejected": -202.6759490966797, "loss": 0.489, "rewards/accuracies": 1.0, "rewards/chosen": -0.8780197501182556, "rewards/margins": 6.393956184387207, "rewards/rejected": -7.271975994110107, "step": 5569 }, { "epoch": 1.23, "learning_rate": 8.397358483728861e-06, "logits/chosen": -0.9713102579116821, "logits/rejected": -0.9713102579116821, "logps/chosen": -143.16610717773438, "logps/rejected": -143.16610717773438, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -2.249788761138916, "rewards/margins": 0.0, "rewards/rejected": -2.249788761138916, "step": 5570 }, { "epoch": 1.23, "learning_rate": 8.396043231288577e-06, "logits/chosen": -1.6205896139144897, "logits/rejected": -1.5768309831619263, "logps/chosen": -80.59593200683594, "logps/rejected": -188.4110107421875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 0.0007522582891397178, "rewards/margins": 6.210489749908447, "rewards/rejected": -6.209737300872803, "step": 5571 }, { "epoch": 1.23, "learning_rate": 8.3947275424649e-06, "logits/chosen": -1.0832502841949463, "logits/rejected": -1.1316816806793213, "logps/chosen": -91.2950668334961, "logps/rejected": -62.055660247802734, "loss": 0.0514, "rewards/accuracies": 1.0, "rewards/chosen": 0.6488968133926392, "rewards/margins": 2.2224206924438477, "rewards/rejected": -1.573523759841919, "step": 5572 }, { "epoch": 1.23, "learning_rate": 8.393411417426895e-06, "logits/chosen": -0.9502995014190674, "logits/rejected": -0.9502995014190674, "logps/chosen": -112.78334045410156, "logps/rejected": -112.78334045410156, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -3.235487461090088, "rewards/margins": 0.0, "rewards/rejected": -3.235487461090088, "step": 5573 }, { "epoch": 1.23, "learning_rate": 8.392094856343682e-06, "logits/chosen": -1.3539097309112549, "logits/rejected": -1.3031729459762573, "logps/chosen": -119.1288833618164, "logps/rejected": -223.7467498779297, "loss": 0.0613, "rewards/accuracies": 1.0, "rewards/chosen": -1.450081706047058, "rewards/margins": 5.8030781745910645, "rewards/rejected": -7.253159999847412, "step": 5574 }, { "epoch": 1.23, "learning_rate": 8.390777859384434e-06, "logits/chosen": -1.5300153493881226, "logits/rejected": -1.5086897611618042, "logps/chosen": -126.05245971679688, "logps/rejected": -199.23361206054688, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -2.503730058670044, "rewards/margins": 4.998994827270508, "rewards/rejected": -7.502725124359131, "step": 5575 }, { "epoch": 1.23, "learning_rate": 8.38946042671838e-06, "logits/chosen": -1.566348910331726, "logits/rejected": -1.566216230392456, "logps/chosen": -101.7291259765625, "logps/rejected": -173.88662719726562, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.97079610824585, "rewards/margins": 6.422125339508057, "rewards/rejected": -13.392921447753906, "step": 5576 }, { "epoch": 1.23, "learning_rate": 8.388142558514811e-06, "logits/chosen": -1.1018059253692627, "logits/rejected": -0.9309563636779785, "logps/chosen": -99.28582763671875, "logps/rejected": -153.04136657714844, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.9383010864257812, "rewards/margins": 6.978161811828613, "rewards/rejected": -7.9164628982543945, "step": 5577 }, { "epoch": 1.23, "learning_rate": 8.38682425494307e-06, "logits/chosen": -1.3643336296081543, "logits/rejected": -1.4556841850280762, "logps/chosen": -200.83987426757812, "logps/rejected": -236.48831176757812, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.933789074420929, "rewards/margins": 11.399014472961426, "rewards/rejected": -10.465225219726562, "step": 5578 }, { "epoch": 1.23, "learning_rate": 8.38550551617255e-06, "logits/chosen": -1.522156834602356, "logits/rejected": -1.518638253211975, "logps/chosen": -106.1246566772461, "logps/rejected": -110.48159790039062, "loss": 0.0329, "rewards/accuracies": 1.0, "rewards/chosen": 0.3344589173793793, "rewards/margins": 3.2130126953125, "rewards/rejected": -2.878553867340088, "step": 5579 }, { "epoch": 1.24, "learning_rate": 8.384186342372711e-06, "logits/chosen": -1.161844253540039, "logits/rejected": -1.1111844778060913, "logps/chosen": -171.1463165283203, "logps/rejected": -176.07400512695312, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": 1.7238448858261108, "rewards/margins": 4.369259834289551, "rewards/rejected": -2.6454148292541504, "step": 5580 }, { "epoch": 1.24, "learning_rate": 8.382866733713064e-06, "logits/chosen": -1.810957908630371, "logits/rejected": -1.6999074220657349, "logps/chosen": -77.98971557617188, "logps/rejected": -166.91578674316406, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.25646209716796875, "rewards/margins": 7.273508548736572, "rewards/rejected": -7.0170464515686035, "step": 5581 }, { "epoch": 1.24, "learning_rate": 8.381546690363174e-06, "logits/chosen": -1.1811456680297852, "logits/rejected": -1.0916600227355957, "logps/chosen": -90.4205322265625, "logps/rejected": -150.14645385742188, "loss": 0.0816, "rewards/accuracies": 1.0, "rewards/chosen": -2.8190619945526123, "rewards/margins": 4.625913619995117, "rewards/rejected": -7.44497537612915, "step": 5582 }, { "epoch": 1.24, "learning_rate": 8.380226212492661e-06, "logits/chosen": -1.130285620689392, "logits/rejected": -1.1280343532562256, "logps/chosen": -114.1824951171875, "logps/rejected": -127.42852783203125, "loss": 0.1317, "rewards/accuracies": 1.0, "rewards/chosen": -1.0544013977050781, "rewards/margins": 1.2009332180023193, "rewards/rejected": -2.2553346157073975, "step": 5583 }, { "epoch": 1.24, "learning_rate": 8.378905300271207e-06, "logits/chosen": -1.4845572710037231, "logits/rejected": -1.4108902215957642, "logps/chosen": -152.83352661132812, "logps/rejected": -210.11367797851562, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 2.4854843616485596, "rewards/margins": 8.040270805358887, "rewards/rejected": -5.554786682128906, "step": 5584 }, { "epoch": 1.24, "learning_rate": 8.377583953868545e-06, "logits/chosen": -1.256676197052002, "logits/rejected": -1.2464573383331299, "logps/chosen": -198.0003662109375, "logps/rejected": -140.5388946533203, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/chosen": -6.678709506988525, "rewards/margins": 2.507927417755127, "rewards/rejected": -9.186636924743652, "step": 5585 }, { "epoch": 1.24, "learning_rate": 8.376262173454464e-06, "logits/chosen": -1.4396530389785767, "logits/rejected": -1.3111387491226196, "logps/chosen": -122.40108489990234, "logps/rejected": -207.93075561523438, "loss": 0.9108, "rewards/accuracies": 0.0, "rewards/chosen": -2.2846062183380127, "rewards/margins": -1.645032525062561, "rewards/rejected": -0.6395736932754517, "step": 5586 }, { "epoch": 1.24, "learning_rate": 8.374939959198809e-06, "logits/chosen": -1.4568523168563843, "logits/rejected": -1.4448026418685913, "logps/chosen": -120.49378967285156, "logps/rejected": -91.17796325683594, "loss": 0.1877, "rewards/accuracies": 1.0, "rewards/chosen": 0.10071410983800888, "rewards/margins": 0.785992443561554, "rewards/rejected": -0.6852783560752869, "step": 5587 }, { "epoch": 1.24, "learning_rate": 8.373617311271483e-06, "logits/chosen": -1.4716150760650635, "logits/rejected": -1.6079812049865723, "logps/chosen": -176.1229705810547, "logps/rejected": -83.53288269042969, "loss": 0.0437, "rewards/accuracies": 1.0, "rewards/chosen": -0.7191116213798523, "rewards/margins": 2.4097037315368652, "rewards/rejected": -3.1288154125213623, "step": 5588 }, { "epoch": 1.24, "learning_rate": 8.372294229842442e-06, "logits/chosen": -1.284903883934021, "logits/rejected": -1.3429896831512451, "logps/chosen": -137.65158081054688, "logps/rejected": -182.8622589111328, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.0963456630706787, "rewards/margins": 5.929976463317871, "rewards/rejected": -8.026322364807129, "step": 5589 }, { "epoch": 1.24, "learning_rate": 8.3709707150817e-06, "logits/chosen": -0.9489996433258057, "logits/rejected": -0.8265445232391357, "logps/chosen": -113.3733901977539, "logps/rejected": -580.055908203125, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": 4.466187477111816, "rewards/margins": 33.40287780761719, "rewards/rejected": -28.936689376831055, "step": 5590 }, { "epoch": 1.24, "learning_rate": 8.369646767159325e-06, "logits/chosen": -1.5566651821136475, "logits/rejected": -1.5977058410644531, "logps/chosen": -159.83595275878906, "logps/rejected": -132.333251953125, "loss": 1.2667, "rewards/accuracies": 0.0, "rewards/chosen": -4.478975772857666, "rewards/margins": -2.450087070465088, "rewards/rejected": -2.028888702392578, "step": 5591 }, { "epoch": 1.24, "learning_rate": 8.36832238624544e-06, "logits/chosen": -1.3118019104003906, "logits/rejected": -1.4180541038513184, "logps/chosen": -222.18429565429688, "logps/rejected": -96.90155029296875, "loss": 0.49, "rewards/accuracies": 0.0, "rewards/chosen": -1.914556860923767, "rewards/margins": -0.5092315673828125, "rewards/rejected": -1.4053252935409546, "step": 5592 }, { "epoch": 1.24, "learning_rate": 8.366997572510228e-06, "logits/chosen": -1.1859502792358398, "logits/rejected": -1.1511905193328857, "logps/chosen": -249.73190307617188, "logps/rejected": -293.0978698730469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.036651611328125, "rewards/margins": 13.684916496276855, "rewards/rejected": -13.72156810760498, "step": 5593 }, { "epoch": 1.24, "learning_rate": 8.365672326123918e-06, "logits/chosen": -1.5602760314941406, "logits/rejected": -1.5484143495559692, "logps/chosen": -206.224609375, "logps/rejected": -233.93405151367188, "loss": 0.0675, "rewards/accuracies": 1.0, "rewards/chosen": -7.232138156890869, "rewards/margins": 1.9386143684387207, "rewards/rejected": -9.17075252532959, "step": 5594 }, { "epoch": 1.24, "learning_rate": 8.364346647256808e-06, "logits/chosen": -1.6578232049942017, "logits/rejected": -1.682366967201233, "logps/chosen": -112.20999908447266, "logps/rejected": -205.69972229003906, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.8372459411621094, "rewards/margins": 8.026972770690918, "rewards/rejected": -8.864218711853027, "step": 5595 }, { "epoch": 1.24, "learning_rate": 8.36302053607924e-06, "logits/chosen": -1.5685597658157349, "logits/rejected": -1.5233310461044312, "logps/chosen": -88.88610076904297, "logps/rejected": -172.96267700195312, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3310508728027344, "rewards/margins": 6.453902721405029, "rewards/rejected": -7.784953594207764, "step": 5596 }, { "epoch": 1.24, "learning_rate": 8.361693992761617e-06, "logits/chosen": -1.3108714818954468, "logits/rejected": -1.368267297744751, "logps/chosen": -120.78327941894531, "logps/rejected": -96.86540222167969, "loss": 0.0715, "rewards/accuracies": 1.0, "rewards/chosen": -1.3446457386016846, "rewards/margins": 1.9080758094787598, "rewards/rejected": -3.2527215480804443, "step": 5597 }, { "epoch": 1.24, "learning_rate": 8.360367017474398e-06, "logits/chosen": -1.3767670392990112, "logits/rejected": -1.3605718612670898, "logps/chosen": -99.81903839111328, "logps/rejected": -124.77024841308594, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": -2.364572286605835, "rewards/margins": 2.6041791439056396, "rewards/rejected": -4.968751430511475, "step": 5598 }, { "epoch": 1.24, "learning_rate": 8.359039610388096e-06, "logits/chosen": -1.3983402252197266, "logits/rejected": -1.3802605867385864, "logps/chosen": -85.18086242675781, "logps/rejected": -188.99864196777344, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -3.1408538818359375, "rewards/margins": 6.721898078918457, "rewards/rejected": -9.862751960754395, "step": 5599 }, { "epoch": 1.24, "learning_rate": 8.357711771673278e-06, "logits/chosen": -1.308586597442627, "logits/rejected": -1.3343634605407715, "logps/chosen": -182.71484375, "logps/rejected": -117.11589050292969, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.935046374797821, "rewards/margins": 5.747208118438721, "rewards/rejected": -4.812161922454834, "step": 5600 }, { "epoch": 1.24, "learning_rate": 8.35638350150057e-06, "logits/chosen": -1.2642582654953003, "logits/rejected": -1.2994719743728638, "logps/chosen": -128.4610595703125, "logps/rejected": -114.53996276855469, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.8647674918174744, "rewards/margins": 7.559549808502197, "rewards/rejected": -8.424317359924316, "step": 5601 }, { "epoch": 1.24, "learning_rate": 8.35505480004065e-06, "logits/chosen": -1.3452510833740234, "logits/rejected": -1.2562135457992554, "logps/chosen": -183.042724609375, "logps/rejected": -271.3183288574219, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.1444061994552612, "rewards/margins": 5.977550983428955, "rewards/rejected": -7.121957302093506, "step": 5602 }, { "epoch": 1.24, "learning_rate": 8.353725667464254e-06, "logits/chosen": -1.1918718814849854, "logits/rejected": -1.1663905382156372, "logps/chosen": -216.36399841308594, "logps/rejected": -307.3625183105469, "loss": 0.1445, "rewards/accuracies": 1.0, "rewards/chosen": 1.8146408796310425, "rewards/margins": 1.0961685180664062, "rewards/rejected": 0.7184723019599915, "step": 5603 }, { "epoch": 1.24, "learning_rate": 8.352396103942171e-06, "logits/chosen": -1.0323824882507324, "logits/rejected": -1.1073459386825562, "logps/chosen": -221.88067626953125, "logps/rejected": -211.7331085205078, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -1.033911108970642, "rewards/margins": 7.4779276847839355, "rewards/rejected": -8.511838912963867, "step": 5604 }, { "epoch": 1.24, "learning_rate": 8.351066109645248e-06, "logits/chosen": -1.5073373317718506, "logits/rejected": -1.5399786233901978, "logps/chosen": -171.70594787597656, "logps/rejected": -90.6795883178711, "loss": 0.0703, "rewards/accuracies": 1.0, "rewards/chosen": -4.894297122955322, "rewards/margins": 1.8927664756774902, "rewards/rejected": -6.7870635986328125, "step": 5605 }, { "epoch": 1.24, "learning_rate": 8.349735684744385e-06, "logits/chosen": -1.337661623954773, "logits/rejected": -1.2664695978164673, "logps/chosen": -196.40609741210938, "logps/rejected": -313.19940185546875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.25758057832717896, "rewards/margins": 6.190476894378662, "rewards/rejected": -6.448057651519775, "step": 5606 }, { "epoch": 1.24, "learning_rate": 8.34840482941054e-06, "logits/chosen": -1.4569885730743408, "logits/rejected": -1.4895069599151611, "logps/chosen": -153.46803283691406, "logps/rejected": -106.27609252929688, "loss": 0.1145, "rewards/accuracies": 1.0, "rewards/chosen": -6.40048360824585, "rewards/margins": 1.392369270324707, "rewards/rejected": -7.792852878570557, "step": 5607 }, { "epoch": 1.24, "learning_rate": 8.347073543814723e-06, "logits/chosen": -1.306469202041626, "logits/rejected": -1.3316997289657593, "logps/chosen": -101.51032257080078, "logps/rejected": -170.4020538330078, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.8037223815917969, "rewards/margins": 5.715858459472656, "rewards/rejected": -6.519580841064453, "step": 5608 }, { "epoch": 1.24, "learning_rate": 8.345741828128003e-06, "logits/chosen": -1.341145634651184, "logits/rejected": -1.3370827436447144, "logps/chosen": -103.78762817382812, "logps/rejected": -173.4893798828125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.2424392700195312, "rewards/margins": 5.663998603820801, "rewards/rejected": -6.906437873840332, "step": 5609 }, { "epoch": 1.24, "learning_rate": 8.344409682521499e-06, "logits/chosen": -1.1596158742904663, "logits/rejected": -1.1513007879257202, "logps/chosen": -85.74234008789062, "logps/rejected": -186.0782012939453, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.3125, "rewards/margins": 4.179957866668701, "rewards/rejected": -4.492457866668701, "step": 5610 }, { "epoch": 1.24, "learning_rate": 8.343077107166394e-06, "logits/chosen": -2.259803056716919, "logits/rejected": -1.9233949184417725, "logps/chosen": -179.39356994628906, "logps/rejected": -407.8502197265625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -5.074580669403076, "rewards/margins": 5.454817295074463, "rewards/rejected": -10.529397964477539, "step": 5611 }, { "epoch": 1.24, "learning_rate": 8.341744102233916e-06, "logits/chosen": -1.5555527210235596, "logits/rejected": -1.593645691871643, "logps/chosen": -150.1441192626953, "logps/rejected": -157.99952697753906, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.2882125973701477, "rewards/margins": 5.854444980621338, "rewards/rejected": -6.14265775680542, "step": 5612 }, { "epoch": 1.24, "learning_rate": 8.340410667895352e-06, "logits/chosen": -1.265549659729004, "logits/rejected": -1.2476131916046143, "logps/chosen": -106.75071716308594, "logps/rejected": -190.10125732421875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 1.13214111328125, "rewards/margins": 10.034029960632324, "rewards/rejected": -8.901888847351074, "step": 5613 }, { "epoch": 1.24, "learning_rate": 8.339076804322048e-06, "logits/chosen": -1.392768144607544, "logits/rejected": -1.377609133720398, "logps/chosen": -124.4301986694336, "logps/rejected": -207.32949829101562, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -0.349234014749527, "rewards/margins": 5.2765960693359375, "rewards/rejected": -5.625830173492432, "step": 5614 }, { "epoch": 1.24, "learning_rate": 8.337742511685403e-06, "logits/chosen": -1.0505720376968384, "logits/rejected": -1.0936199426651, "logps/chosen": -186.75271606445312, "logps/rejected": -137.02822875976562, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.08841095119714737, "rewards/margins": 5.142070293426514, "rewards/rejected": -5.230481147766113, "step": 5615 }, { "epoch": 1.24, "learning_rate": 8.336407790156868e-06, "logits/chosen": -1.202674388885498, "logits/rejected": -1.2406024932861328, "logps/chosen": -214.35223388671875, "logps/rejected": -210.97328186035156, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 0.7014923095703125, "rewards/margins": 4.878239631652832, "rewards/rejected": -4.1767473220825195, "step": 5616 }, { "epoch": 1.24, "learning_rate": 8.335072639907953e-06, "logits/chosen": -1.324205756187439, "logits/rejected": -1.3726928234100342, "logps/chosen": -205.6996612548828, "logps/rejected": -271.16326904296875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.4406051635742188, "rewards/margins": 7.4908599853515625, "rewards/rejected": -9.931465148925781, "step": 5617 }, { "epoch": 1.24, "learning_rate": 8.33373706111022e-06, "logits/chosen": -1.1436687707901, "logits/rejected": -1.1032969951629639, "logps/chosen": -138.4656524658203, "logps/rejected": -581.4847412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.13652192056179047, "rewards/margins": 39.64121627807617, "rewards/rejected": -39.504695892333984, "step": 5618 }, { "epoch": 1.24, "learning_rate": 8.332401053935288e-06, "logits/chosen": -0.8992720246315002, "logits/rejected": -0.8992720246315002, "logps/chosen": -163.70852661132812, "logps/rejected": -163.70852661132812, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -6.929994106292725, "rewards/margins": 0.0, "rewards/rejected": -6.929994106292725, "step": 5619 }, { "epoch": 1.24, "learning_rate": 8.331064618554834e-06, "logits/chosen": -0.9825964570045471, "logits/rejected": -0.9620456695556641, "logps/chosen": -197.0316925048828, "logps/rejected": -156.41786193847656, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 1.48857581615448, "rewards/margins": 7.080732345581055, "rewards/rejected": -5.592156410217285, "step": 5620 }, { "epoch": 1.24, "learning_rate": 8.329727755140584e-06, "logits/chosen": -1.2205535173416138, "logits/rejected": -1.1833453178405762, "logps/chosen": -109.787353515625, "logps/rejected": -124.00189208984375, "loss": 0.0607, "rewards/accuracies": 1.0, "rewards/chosen": -3.607428789138794, "rewards/margins": 2.0662996768951416, "rewards/rejected": -5.6737284660339355, "step": 5621 }, { "epoch": 1.24, "learning_rate": 8.32839046386432e-06, "logits/chosen": -1.1124882698059082, "logits/rejected": -1.091611385345459, "logps/chosen": -113.35133361816406, "logps/rejected": -180.66322326660156, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.844456434249878, "rewards/margins": 5.8003950119018555, "rewards/rejected": -8.644851684570312, "step": 5622 }, { "epoch": 1.24, "learning_rate": 8.327052744897883e-06, "logits/chosen": -1.111526370048523, "logits/rejected": -1.0657683610916138, "logps/chosen": -167.43299865722656, "logps/rejected": -171.6819610595703, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 0.16528168320655823, "rewards/margins": 4.918542861938477, "rewards/rejected": -4.753261089324951, "step": 5623 }, { "epoch": 1.24, "learning_rate": 8.325714598413169e-06, "logits/chosen": -1.427291989326477, "logits/rejected": -1.3407926559448242, "logps/chosen": -110.10684204101562, "logps/rejected": -215.68858337402344, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 0.16632996499538422, "rewards/margins": 6.790480136871338, "rewards/rejected": -6.624150276184082, "step": 5624 }, { "epoch": 1.25, "learning_rate": 8.32437602458212e-06, "logits/chosen": -1.3466569185256958, "logits/rejected": -1.3206673860549927, "logps/chosen": -78.13662719726562, "logps/rejected": -109.09405517578125, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 0.01565857045352459, "rewards/margins": 4.26108980178833, "rewards/rejected": -4.245431423187256, "step": 5625 }, { "epoch": 1.25, "learning_rate": 8.323037023576745e-06, "logits/chosen": -1.7284228801727295, "logits/rejected": -1.720499038696289, "logps/chosen": -82.84584045410156, "logps/rejected": -131.6319580078125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.9256851077079773, "rewards/margins": 4.557231426239014, "rewards/rejected": -5.482916355133057, "step": 5626 }, { "epoch": 1.25, "learning_rate": 8.3216975955691e-06, "logits/chosen": -1.2821476459503174, "logits/rejected": -1.2692475318908691, "logps/chosen": -67.91937255859375, "logps/rejected": -53.17152786254883, "loss": 0.4697, "rewards/accuracies": 0.0, "rewards/chosen": -1.2664101123809814, "rewards/margins": -0.43288731575012207, "rewards/rejected": -0.8335227966308594, "step": 5627 }, { "epoch": 1.25, "learning_rate": 8.320357740731302e-06, "logits/chosen": -0.959540605545044, "logits/rejected": -0.8503384590148926, "logps/chosen": -124.03704833984375, "logps/rejected": -210.34567260742188, "loss": 0.1168, "rewards/accuracies": 1.0, "rewards/chosen": -3.331381320953369, "rewards/margins": 2.317172050476074, "rewards/rejected": -5.648553371429443, "step": 5628 }, { "epoch": 1.25, "learning_rate": 8.319017459235515e-06, "logits/chosen": -1.1541227102279663, "logits/rejected": -1.1541227102279663, "logps/chosen": -168.5721435546875, "logps/rejected": -168.5721435546875, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -7.629615306854248, "rewards/margins": 0.0, "rewards/rejected": -7.629615306854248, "step": 5629 }, { "epoch": 1.25, "learning_rate": 8.317676751253961e-06, "logits/chosen": -1.1764823198318481, "logits/rejected": -1.1460797786712646, "logps/chosen": -97.68380737304688, "logps/rejected": -144.8146514892578, "loss": 0.1988, "rewards/accuracies": 1.0, "rewards/chosen": -2.723170518875122, "rewards/margins": 0.956207275390625, "rewards/rejected": -3.679377794265747, "step": 5630 }, { "epoch": 1.25, "learning_rate": 8.316335616958922e-06, "logits/chosen": -0.9970429539680481, "logits/rejected": -1.0109888315200806, "logps/chosen": -83.91598510742188, "logps/rejected": -153.63034057617188, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.7307373285293579, "rewards/margins": 5.7548112869262695, "rewards/rejected": -6.485548496246338, "step": 5631 }, { "epoch": 1.25, "learning_rate": 8.314994056522727e-06, "logits/chosen": -1.4127165079116821, "logits/rejected": -1.3438750505447388, "logps/chosen": -137.90000915527344, "logps/rejected": -223.43048095703125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.076097249984741, "rewards/margins": 6.356649398803711, "rewards/rejected": -8.432746887207031, "step": 5632 }, { "epoch": 1.25, "learning_rate": 8.313652070117765e-06, "logits/chosen": -1.203550934791565, "logits/rejected": -1.535804271697998, "logps/chosen": -228.88148498535156, "logps/rejected": -171.8192596435547, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8047943115234375, "rewards/margins": 12.783258438110352, "rewards/rejected": -13.588052749633789, "step": 5633 }, { "epoch": 1.25, "learning_rate": 8.31230965791648e-06, "logits/chosen": -1.3732160329818726, "logits/rejected": -1.4637691974639893, "logps/chosen": -214.22515869140625, "logps/rejected": -209.11993408203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.641400158405304, "rewards/margins": 7.39479398727417, "rewards/rejected": -6.753393650054932, "step": 5634 }, { "epoch": 1.25, "learning_rate": 8.310966820091364e-06, "logits/chosen": -0.986162543296814, "logits/rejected": -0.9913415908813477, "logps/chosen": -133.75653076171875, "logps/rejected": -225.93478393554688, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -3.760942220687866, "rewards/margins": 4.046815872192383, "rewards/rejected": -7.80775785446167, "step": 5635 }, { "epoch": 1.25, "learning_rate": 8.309623556814972e-06, "logits/chosen": -1.3778992891311646, "logits/rejected": -1.4013917446136475, "logps/chosen": -177.13916015625, "logps/rejected": -157.64639282226562, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -2.600755453109741, "rewards/margins": 3.030529737472534, "rewards/rejected": -5.631285190582275, "step": 5636 }, { "epoch": 1.25, "learning_rate": 8.30827986825991e-06, "logits/chosen": -1.0295336246490479, "logits/rejected": -0.9434948563575745, "logps/chosen": -228.38169860839844, "logps/rejected": -118.82904052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.9165542125701904, "rewards/margins": 11.31342887878418, "rewards/rejected": -8.39687442779541, "step": 5637 }, { "epoch": 1.25, "learning_rate": 8.306935754598838e-06, "logits/chosen": -1.4631260633468628, "logits/rejected": -1.4174379110336304, "logps/chosen": -76.06851196289062, "logps/rejected": -67.61427307128906, "loss": 0.7954, "rewards/accuracies": 0.0, "rewards/chosen": -1.757710337638855, "rewards/margins": -1.306552529335022, "rewards/rejected": -0.4511577785015106, "step": 5638 }, { "epoch": 1.25, "learning_rate": 8.305591216004468e-06, "logits/chosen": -1.0150851011276245, "logits/rejected": -0.9929617047309875, "logps/chosen": -187.11676025390625, "logps/rejected": -237.88925170898438, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 1.216455101966858, "rewards/margins": 9.446686744689941, "rewards/rejected": -8.230231285095215, "step": 5639 }, { "epoch": 1.25, "learning_rate": 8.304246252649574e-06, "logits/chosen": -1.488415002822876, "logits/rejected": -1.5363101959228516, "logps/chosen": -192.97544860839844, "logps/rejected": -150.73667907714844, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.522778332233429, "rewards/margins": 7.009345531463623, "rewards/rejected": -6.48656702041626, "step": 5640 }, { "epoch": 1.25, "learning_rate": 8.302900864706982e-06, "logits/chosen": -1.7294416427612305, "logits/rejected": -1.7189300060272217, "logps/chosen": -104.1949462890625, "logps/rejected": -133.29263305664062, "loss": 0.0942, "rewards/accuracies": 1.0, "rewards/chosen": -1.9103988409042358, "rewards/margins": 1.5793763399124146, "rewards/rejected": -3.4897751808166504, "step": 5641 }, { "epoch": 1.25, "learning_rate": 8.301555052349567e-06, "logits/chosen": -1.4191229343414307, "logits/rejected": -1.4399547576904297, "logps/chosen": -83.31230926513672, "logps/rejected": -79.49622344970703, "loss": 0.1228, "rewards/accuracies": 1.0, "rewards/chosen": -1.9222313165664673, "rewards/margins": 1.282361626625061, "rewards/rejected": -3.2045929431915283, "step": 5642 }, { "epoch": 1.25, "learning_rate": 8.300208815750266e-06, "logits/chosen": -1.2656593322753906, "logits/rejected": -1.2177082300186157, "logps/chosen": -116.15179443359375, "logps/rejected": -214.32762145996094, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.2198753356933594, "rewards/margins": 6.919464111328125, "rewards/rejected": -9.139339447021484, "step": 5643 }, { "epoch": 1.25, "learning_rate": 8.298862155082065e-06, "logits/chosen": -1.2062913179397583, "logits/rejected": -1.2471469640731812, "logps/chosen": -155.44692993164062, "logps/rejected": -162.6799774169922, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 4.834593296051025, "rewards/margins": 7.251765251159668, "rewards/rejected": -2.4171721935272217, "step": 5644 }, { "epoch": 1.25, "learning_rate": 8.297515070518008e-06, "logits/chosen": -1.247523546218872, "logits/rejected": -1.247523546218872, "logps/chosen": -328.68792724609375, "logps/rejected": -328.68792724609375, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -10.094769477844238, "rewards/margins": 0.0, "rewards/rejected": -10.094769477844238, "step": 5645 }, { "epoch": 1.25, "learning_rate": 8.296167562231192e-06, "logits/chosen": -1.4309260845184326, "logits/rejected": -1.3660694360733032, "logps/chosen": -102.4107666015625, "logps/rejected": -191.991943359375, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.0031814575195312, "rewards/margins": 4.7623796463012695, "rewards/rejected": -5.765561103820801, "step": 5646 }, { "epoch": 1.25, "learning_rate": 8.294819630394767e-06, "logits/chosen": -1.3868621587753296, "logits/rejected": -1.3269968032836914, "logps/chosen": -113.52841186523438, "logps/rejected": -203.48834228515625, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.34599992632865906, "rewards/margins": 4.37272310256958, "rewards/rejected": -4.718722820281982, "step": 5647 }, { "epoch": 1.25, "learning_rate": 8.293471275181938e-06, "logits/chosen": -1.0931906700134277, "logits/rejected": -1.0931906700134277, "logps/chosen": -166.07754516601562, "logps/rejected": -166.07754516601562, "loss": 0.35, "rewards/accuracies": 0.0, "rewards/chosen": -3.9690232276916504, "rewards/margins": 0.0, "rewards/rejected": -3.9690232276916504, "step": 5648 }, { "epoch": 1.25, "learning_rate": 8.292122496765969e-06, "logits/chosen": -1.5949020385742188, "logits/rejected": -1.607359528541565, "logps/chosen": -142.65435791015625, "logps/rejected": -131.04052734375, "loss": 0.0979, "rewards/accuracies": 1.0, "rewards/chosen": -0.6618332266807556, "rewards/margins": 1.5625700950622559, "rewards/rejected": -2.2244033813476562, "step": 5649 }, { "epoch": 1.25, "learning_rate": 8.290773295320173e-06, "logits/chosen": -1.056671142578125, "logits/rejected": -0.9742566347122192, "logps/chosen": -150.38409423828125, "logps/rejected": -205.6750030517578, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.1639404296875, "rewards/margins": 5.4678497314453125, "rewards/rejected": -6.6317901611328125, "step": 5650 }, { "epoch": 1.25, "learning_rate": 8.28942367101792e-06, "logits/chosen": -1.2840343713760376, "logits/rejected": -0.4291290044784546, "logps/chosen": -54.90234375, "logps/rejected": -505.41455078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.31981393694877625, "rewards/margins": 35.47422790527344, "rewards/rejected": -35.154415130615234, "step": 5651 }, { "epoch": 1.25, "learning_rate": 8.288073624032634e-06, "logits/chosen": -1.2554609775543213, "logits/rejected": -1.2504632472991943, "logps/chosen": -90.95960235595703, "logps/rejected": -165.1743621826172, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -2.4370248317718506, "rewards/margins": 6.506053924560547, "rewards/rejected": -8.943078994750977, "step": 5652 }, { "epoch": 1.25, "learning_rate": 8.28672315453779e-06, "logits/chosen": -0.8885374665260315, "logits/rejected": -0.914004385471344, "logps/chosen": -116.97845458984375, "logps/rejected": -138.9759063720703, "loss": 0.1934, "rewards/accuracies": 1.0, "rewards/chosen": -2.7738113403320312, "rewards/margins": 0.7508080005645752, "rewards/rejected": -3.5246193408966064, "step": 5653 }, { "epoch": 1.25, "learning_rate": 8.285372262706922e-06, "logits/chosen": -1.1960110664367676, "logits/rejected": -1.1927224397659302, "logps/chosen": -232.6510467529297, "logps/rejected": -203.54734802246094, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.9683243036270142, "rewards/margins": 8.352726936340332, "rewards/rejected": -9.321051597595215, "step": 5654 }, { "epoch": 1.25, "learning_rate": 8.284020948713615e-06, "logits/chosen": -1.2500147819519043, "logits/rejected": -1.3498892784118652, "logps/chosen": -260.3502197265625, "logps/rejected": -153.01829528808594, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.08831787109375, "rewards/margins": 5.030433654785156, "rewards/rejected": -6.118751525878906, "step": 5655 }, { "epoch": 1.25, "learning_rate": 8.282669212731511e-06, "logits/chosen": -1.4729297161102295, "logits/rejected": -1.2065659761428833, "logps/chosen": -157.91253662109375, "logps/rejected": -406.6794738769531, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -4.332150936126709, "rewards/margins": 3.799193859100342, "rewards/rejected": -8.13134479522705, "step": 5656 }, { "epoch": 1.25, "learning_rate": 8.281317054934306e-06, "logits/chosen": -1.3513904809951782, "logits/rejected": -1.3365447521209717, "logps/chosen": -215.4102783203125, "logps/rejected": -380.4819641113281, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.2718856930732727, "rewards/margins": 7.432984828948975, "rewards/rejected": -7.704870700836182, "step": 5657 }, { "epoch": 1.25, "learning_rate": 8.279964475495745e-06, "logits/chosen": -1.4584124088287354, "logits/rejected": -1.4787869453430176, "logps/chosen": -131.41184997558594, "logps/rejected": -125.33983612060547, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9144355654716492, "rewards/margins": 8.747127532958984, "rewards/rejected": -9.6615629196167, "step": 5658 }, { "epoch": 1.25, "learning_rate": 8.278611474589635e-06, "logits/chosen": -1.3462899923324585, "logits/rejected": -1.2601672410964966, "logps/chosen": -101.57917785644531, "logps/rejected": -256.3489990234375, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -2.1095504760742188, "rewards/margins": 9.09770393371582, "rewards/rejected": -11.207254409790039, "step": 5659 }, { "epoch": 1.25, "learning_rate": 8.277258052389834e-06, "logits/chosen": -1.2164292335510254, "logits/rejected": -1.205876111984253, "logps/chosen": -98.46967315673828, "logps/rejected": -278.00848388671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.1128852367401123, "rewards/margins": 8.144771575927734, "rewards/rejected": -10.257657051086426, "step": 5660 }, { "epoch": 1.25, "learning_rate": 8.27590420907025e-06, "logits/chosen": -1.0305784940719604, "logits/rejected": -1.0575706958770752, "logps/chosen": -86.09156036376953, "logps/rejected": -127.29048919677734, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -1.8301239013671875, "rewards/margins": 5.847192287445068, "rewards/rejected": -7.677316188812256, "step": 5661 }, { "epoch": 1.25, "learning_rate": 8.27454994480485e-06, "logits/chosen": -1.2810484170913696, "logits/rejected": -1.2810484170913696, "logps/chosen": -171.5342559814453, "logps/rejected": -171.5342559814453, "loss": 0.3485, "rewards/accuracies": 0.0, "rewards/chosen": -7.623152256011963, "rewards/margins": 0.0, "rewards/rejected": -7.623152256011963, "step": 5662 }, { "epoch": 1.25, "learning_rate": 8.273195259767653e-06, "logits/chosen": -1.5037120580673218, "logits/rejected": -1.4927505254745483, "logps/chosen": -125.41371154785156, "logps/rejected": -224.75131225585938, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.11195679008960724, "rewards/margins": 9.9937162399292, "rewards/rejected": -9.881759643554688, "step": 5663 }, { "epoch": 1.25, "learning_rate": 8.271840154132736e-06, "logits/chosen": -1.042061686515808, "logits/rejected": -1.0483092069625854, "logps/chosen": -191.83909606933594, "logps/rejected": -200.217529296875, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -0.16034698486328125, "rewards/margins": 3.621232748031616, "rewards/rejected": -3.7815797328948975, "step": 5664 }, { "epoch": 1.25, "learning_rate": 8.270484628074222e-06, "logits/chosen": -1.6138739585876465, "logits/rejected": -1.635232925415039, "logps/chosen": -149.65359497070312, "logps/rejected": -156.57833862304688, "loss": 0.3746, "rewards/accuracies": 1.0, "rewards/chosen": -7.201670169830322, "rewards/margins": 0.07896280288696289, "rewards/rejected": -7.280632972717285, "step": 5665 }, { "epoch": 1.25, "learning_rate": 8.269128681766296e-06, "logits/chosen": -1.2468127012252808, "logits/rejected": -1.2391337156295776, "logps/chosen": -111.5678939819336, "logps/rejected": -125.25096130371094, "loss": 0.0656, "rewards/accuracies": 1.0, "rewards/chosen": -1.5489342212677002, "rewards/margins": 2.2984108924865723, "rewards/rejected": -3.8473451137542725, "step": 5666 }, { "epoch": 1.25, "learning_rate": 8.267772315383195e-06, "logits/chosen": -0.7969796061515808, "logits/rejected": -0.8334873914718628, "logps/chosen": -219.580322265625, "logps/rejected": -118.23970794677734, "loss": 0.1145, "rewards/accuracies": 1.0, "rewards/chosen": -4.0839385986328125, "rewards/margins": 1.4816889762878418, "rewards/rejected": -5.565627574920654, "step": 5667 }, { "epoch": 1.25, "learning_rate": 8.266415529099205e-06, "logits/chosen": -1.2955198287963867, "logits/rejected": -1.3086278438568115, "logps/chosen": -148.28778076171875, "logps/rejected": -172.8649139404297, "loss": 0.0542, "rewards/accuracies": 1.0, "rewards/chosen": -5.166571140289307, "rewards/margins": 2.192002773284912, "rewards/rejected": -7.358573913574219, "step": 5668 }, { "epoch": 1.25, "learning_rate": 8.265058323088673e-06, "logits/chosen": -1.0956250429153442, "logits/rejected": -1.1411068439483643, "logps/chosen": -144.57064819335938, "logps/rejected": -145.39276123046875, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": 2.66800856590271, "rewards/margins": 4.619549751281738, "rewards/rejected": -1.9515411853790283, "step": 5669 }, { "epoch": 1.25, "learning_rate": 8.263700697525994e-06, "logits/chosen": -1.5112749338150024, "logits/rejected": -1.5600937604904175, "logps/chosen": -166.69952392578125, "logps/rejected": -191.199462890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.4212982654571533, "rewards/margins": 9.052909851074219, "rewards/rejected": -11.474207878112793, "step": 5670 }, { "epoch": 1.26, "learning_rate": 8.262342652585621e-06, "logits/chosen": -1.141501545906067, "logits/rejected": -1.1857903003692627, "logps/chosen": -259.08770751953125, "logps/rejected": -236.27178955078125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.43954774737358093, "rewards/margins": 5.010708808898926, "rewards/rejected": -5.45025634765625, "step": 5671 }, { "epoch": 1.26, "learning_rate": 8.260984188442063e-06, "logits/chosen": -1.319700002670288, "logits/rejected": -1.3147950172424316, "logps/chosen": -81.22093963623047, "logps/rejected": -83.3209457397461, "loss": 2.3488, "rewards/accuracies": 1.0, "rewards/chosen": -1.983144760131836, "rewards/margins": 4.188252925872803, "rewards/rejected": -6.171397686004639, "step": 5672 }, { "epoch": 1.26, "learning_rate": 8.259625305269873e-06, "logits/chosen": -1.2906773090362549, "logits/rejected": -1.2410407066345215, "logps/chosen": -140.28631591796875, "logps/rejected": -268.21728515625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.6791183352470398, "rewards/margins": 7.6468353271484375, "rewards/rejected": -8.325953483581543, "step": 5673 }, { "epoch": 1.26, "learning_rate": 8.258266003243667e-06, "logits/chosen": -0.7710800766944885, "logits/rejected": -0.7710800766944885, "logps/chosen": -107.80110168457031, "logps/rejected": -107.80110168457031, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -7.8401384353637695, "rewards/margins": 0.0, "rewards/rejected": -7.8401384353637695, "step": 5674 }, { "epoch": 1.26, "learning_rate": 8.256906282538113e-06, "logits/chosen": -1.1754966974258423, "logits/rejected": -1.367496132850647, "logps/chosen": -154.6842803955078, "logps/rejected": -134.50914001464844, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 1.958471655845642, "rewards/margins": 9.559539794921875, "rewards/rejected": -7.601068019866943, "step": 5675 }, { "epoch": 1.26, "learning_rate": 8.25554614332793e-06, "logits/chosen": -1.1806330680847168, "logits/rejected": -1.3223363161087036, "logps/chosen": -215.63525390625, "logps/rejected": -112.03218078613281, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.0084381103515625, "rewards/margins": 7.653273582458496, "rewards/rejected": -8.661711692810059, "step": 5676 }, { "epoch": 1.26, "learning_rate": 8.254185585787895e-06, "logits/chosen": -1.1285573244094849, "logits/rejected": -1.1634340286254883, "logps/chosen": -161.92440795898438, "logps/rejected": -140.67169189453125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": 1.5006027221679688, "rewards/margins": 4.789325714111328, "rewards/rejected": -3.2887229919433594, "step": 5677 }, { "epoch": 1.26, "learning_rate": 8.252824610092835e-06, "logits/chosen": -1.0210946798324585, "logits/rejected": -0.9920923113822937, "logps/chosen": -131.15658569335938, "logps/rejected": -239.34507751464844, "loss": 0.0596, "rewards/accuracies": 1.0, "rewards/chosen": -1.8156890869140625, "rewards/margins": 4.170726299285889, "rewards/rejected": -5.986415386199951, "step": 5678 }, { "epoch": 1.26, "learning_rate": 8.251463216417632e-06, "logits/chosen": -1.3318358659744263, "logits/rejected": -1.236816644668579, "logps/chosen": -93.2410888671875, "logps/rejected": -177.04660034179688, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -4.735736846923828, "rewards/margins": 4.060046195983887, "rewards/rejected": -8.795783042907715, "step": 5679 }, { "epoch": 1.26, "learning_rate": 8.250101404937223e-06, "logits/chosen": -1.4020413160324097, "logits/rejected": -1.4250253438949585, "logps/chosen": -183.3699188232422, "logps/rejected": -205.13548278808594, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": 0.8462112545967102, "rewards/margins": 5.844876289367676, "rewards/rejected": -4.998664855957031, "step": 5680 }, { "epoch": 1.26, "learning_rate": 8.248739175826594e-06, "logits/chosen": -1.1145625114440918, "logits/rejected": -0.8078896999359131, "logps/chosen": -243.81414794921875, "logps/rejected": -329.53460693359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.7215301990509033, "rewards/margins": 18.03557777404785, "rewards/rejected": -16.31404685974121, "step": 5681 }, { "epoch": 1.26, "learning_rate": 8.247376529260793e-06, "logits/chosen": -1.329563856124878, "logits/rejected": -1.329563856124878, "logps/chosen": -153.28164672851562, "logps/rejected": -153.28164672851562, "loss": 0.3492, "rewards/accuracies": 0.0, "rewards/chosen": -11.047039985656738, "rewards/margins": 0.0, "rewards/rejected": -11.047039985656738, "step": 5682 }, { "epoch": 1.26, "learning_rate": 8.246013465414914e-06, "logits/chosen": -1.5790940523147583, "logits/rejected": -1.5829691886901855, "logps/chosen": -60.701560974121094, "logps/rejected": -72.52979278564453, "loss": 0.135, "rewards/accuracies": 1.0, "rewards/chosen": -1.9167617559432983, "rewards/margins": 1.1718753576278687, "rewards/rejected": -3.088637113571167, "step": 5683 }, { "epoch": 1.26, "learning_rate": 8.244649984464109e-06, "logits/chosen": -1.1283891201019287, "logits/rejected": -1.0388721227645874, "logps/chosen": -127.55801391601562, "logps/rejected": -299.01806640625, "loss": 0.0905, "rewards/accuracies": 1.0, "rewards/chosen": -1.1937042474746704, "rewards/margins": 1.6228209733963013, "rewards/rejected": -2.8165252208709717, "step": 5684 }, { "epoch": 1.26, "learning_rate": 8.243286086583577e-06, "logits/chosen": -0.8447037935256958, "logits/rejected": -0.7921236753463745, "logps/chosen": -197.25701904296875, "logps/rejected": -503.82830810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5070099234580994, "rewards/margins": 27.163846969604492, "rewards/rejected": -27.670856475830078, "step": 5685 }, { "epoch": 1.26, "learning_rate": 8.241921771948583e-06, "logits/chosen": -1.072847843170166, "logits/rejected": -1.0340009927749634, "logps/chosen": -87.02300262451172, "logps/rejected": -136.445556640625, "loss": 0.044, "rewards/accuracies": 1.0, "rewards/chosen": -1.2729133367538452, "rewards/margins": 2.4665274620056152, "rewards/rejected": -3.73944091796875, "step": 5686 }, { "epoch": 1.26, "learning_rate": 8.240557040734434e-06, "logits/chosen": -1.3574302196502686, "logits/rejected": -1.4555162191390991, "logps/chosen": -274.4347229003906, "logps/rejected": -218.02224731445312, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": 1.1688110828399658, "rewards/margins": 8.556780815124512, "rewards/rejected": -7.387969970703125, "step": 5687 }, { "epoch": 1.26, "learning_rate": 8.239191893116494e-06, "logits/chosen": -1.4638526439666748, "logits/rejected": -1.3927720785140991, "logps/chosen": -108.15742492675781, "logps/rejected": -208.53750610351562, "loss": 0.2729, "rewards/accuracies": 1.0, "rewards/chosen": -0.5536346435546875, "rewards/margins": 0.32679444551467896, "rewards/rejected": -0.8804290890693665, "step": 5688 }, { "epoch": 1.26, "learning_rate": 8.237826329270183e-06, "logits/chosen": -1.304007649421692, "logits/rejected": -1.286138892173767, "logps/chosen": -136.95233154296875, "logps/rejected": -176.64254760742188, "loss": 0.4159, "rewards/accuracies": 1.0, "rewards/chosen": -0.6247955560684204, "rewards/margins": 4.2012176513671875, "rewards/rejected": -4.826013088226318, "step": 5689 }, { "epoch": 1.26, "learning_rate": 8.236460349370972e-06, "logits/chosen": -1.1856915950775146, "logits/rejected": -1.1713213920593262, "logps/chosen": -144.4723358154297, "logps/rejected": -155.1927032470703, "loss": 0.166, "rewards/accuracies": 1.0, "rewards/chosen": -1.4599746465682983, "rewards/margins": 0.9432984590530396, "rewards/rejected": -2.403273105621338, "step": 5690 }, { "epoch": 1.26, "learning_rate": 8.235093953594387e-06, "logits/chosen": -1.3726705312728882, "logits/rejected": -1.4242221117019653, "logps/chosen": -157.7194366455078, "logps/rejected": -165.20477294921875, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 0.4526992738246918, "rewards/margins": 5.706747531890869, "rewards/rejected": -5.2540483474731445, "step": 5691 }, { "epoch": 1.26, "learning_rate": 8.233727142116007e-06, "logits/chosen": -1.0774502754211426, "logits/rejected": -1.0770936012268066, "logps/chosen": -77.81825256347656, "logps/rejected": -99.70681762695312, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -0.15740127861499786, "rewards/margins": 4.272017955780029, "rewards/rejected": -4.429419040679932, "step": 5692 }, { "epoch": 1.26, "learning_rate": 8.232359915111462e-06, "logits/chosen": -0.9514486789703369, "logits/rejected": -0.9696036577224731, "logps/chosen": -153.85751342773438, "logps/rejected": -46.61266326904297, "loss": 0.4374, "rewards/accuracies": 0.0, "rewards/chosen": -3.406768798828125, "rewards/margins": -0.3351132869720459, "rewards/rejected": -3.071655511856079, "step": 5693 }, { "epoch": 1.26, "learning_rate": 8.230992272756438e-06, "logits/chosen": -1.3071256875991821, "logits/rejected": -1.3973543643951416, "logps/chosen": -158.38922119140625, "logps/rejected": -152.70285034179688, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.4817627668380737, "rewards/margins": 10.125656127929688, "rewards/rejected": -11.60741901397705, "step": 5694 }, { "epoch": 1.26, "learning_rate": 8.229624215226675e-06, "logits/chosen": -0.9136452674865723, "logits/rejected": -0.9493169784545898, "logps/chosen": -246.5347137451172, "logps/rejected": -171.69752502441406, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 3.992649793624878, "rewards/margins": 12.048074722290039, "rewards/rejected": -8.055424690246582, "step": 5695 }, { "epoch": 1.26, "learning_rate": 8.228255742697962e-06, "logits/chosen": -0.8194692134857178, "logits/rejected": -0.8487529754638672, "logps/chosen": -71.00730895996094, "logps/rejected": -84.48532104492188, "loss": 0.15, "rewards/accuracies": 1.0, "rewards/chosen": -0.16421203315258026, "rewards/margins": 1.0632081031799316, "rewards/rejected": -1.2274200916290283, "step": 5696 }, { "epoch": 1.26, "learning_rate": 8.226886855346148e-06, "logits/chosen": -0.8076356649398804, "logits/rejected": -0.7606936693191528, "logps/chosen": -138.01165771484375, "logps/rejected": -245.71096801757812, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 0.5877304077148438, "rewards/margins": 4.295646667480469, "rewards/rejected": -3.707916259765625, "step": 5697 }, { "epoch": 1.26, "learning_rate": 8.225517553347132e-06, "logits/chosen": -1.0472915172576904, "logits/rejected": -0.19029034674167633, "logps/chosen": -65.97251892089844, "logps/rejected": -453.5502624511719, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.3792232573032379, "rewards/margins": 37.132667541503906, "rewards/rejected": -37.51189041137695, "step": 5698 }, { "epoch": 1.26, "learning_rate": 8.224147836876861e-06, "logits/chosen": -1.6212724447250366, "logits/rejected": -1.6933728456497192, "logps/chosen": -109.91334533691406, "logps/rejected": -114.93029022216797, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -0.2752273678779602, "rewards/margins": 4.961193561553955, "rewards/rejected": -5.23642110824585, "step": 5699 }, { "epoch": 1.26, "learning_rate": 8.222777706111345e-06, "logits/chosen": -0.8155277371406555, "logits/rejected": -0.7153127789497375, "logps/chosen": -79.4726791381836, "logps/rejected": -294.172119140625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.962702989578247, "rewards/margins": 4.861630439758301, "rewards/rejected": -6.824333190917969, "step": 5700 }, { "epoch": 1.26, "learning_rate": 8.221407161226641e-06, "logits/chosen": -1.3328204154968262, "logits/rejected": -1.1775288581848145, "logps/chosen": -137.3968505859375, "logps/rejected": -350.552734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 2.5471084117889404, "rewards/margins": 7.595222473144531, "rewards/rejected": -5.04811429977417, "step": 5701 }, { "epoch": 1.26, "learning_rate": 8.220036202398861e-06, "logits/chosen": -1.7562077045440674, "logits/rejected": -1.7735328674316406, "logps/chosen": -136.54232788085938, "logps/rejected": -176.14163208007812, "loss": 0.3935, "rewards/accuracies": 1.0, "rewards/chosen": -1.0593055486679077, "rewards/margins": 2.3190622329711914, "rewards/rejected": -3.3783676624298096, "step": 5702 }, { "epoch": 1.26, "learning_rate": 8.21866482980417e-06, "logits/chosen": -1.29574716091156, "logits/rejected": -1.2462326288223267, "logps/chosen": -96.01555633544922, "logps/rejected": -105.8153076171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.6435950994491577, "rewards/margins": 6.046486854553223, "rewards/rejected": -7.69008207321167, "step": 5703 }, { "epoch": 1.26, "learning_rate": 8.217293043618786e-06, "logits/chosen": -1.0585911273956299, "logits/rejected": -0.9930144548416138, "logps/chosen": -188.67294311523438, "logps/rejected": -278.33642578125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.366018682718277, "rewards/margins": 6.2618513107299805, "rewards/rejected": -6.627870082855225, "step": 5704 }, { "epoch": 1.26, "learning_rate": 8.21592084401898e-06, "logits/chosen": -1.0063055753707886, "logits/rejected": -1.0179672241210938, "logps/chosen": -188.5811767578125, "logps/rejected": -216.03684997558594, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 1.2433273792266846, "rewards/margins": 6.909219741821289, "rewards/rejected": -5.665892124176025, "step": 5705 }, { "epoch": 1.26, "learning_rate": 8.214548231181077e-06, "logits/chosen": -1.0410678386688232, "logits/rejected": -1.0499171018600464, "logps/chosen": -148.38267517089844, "logps/rejected": -182.20706176757812, "loss": 0.0695, "rewards/accuracies": 1.0, "rewards/chosen": -1.80241858959198, "rewards/margins": 1.9040879011154175, "rewards/rejected": -3.7065064907073975, "step": 5706 }, { "epoch": 1.26, "learning_rate": 8.213175205281451e-06, "logits/chosen": -1.3618590831756592, "logits/rejected": -1.3612029552459717, "logps/chosen": -147.239501953125, "logps/rejected": -161.8194580078125, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": 2.069021701812744, "rewards/margins": 3.268136739730835, "rewards/rejected": -1.1991150379180908, "step": 5707 }, { "epoch": 1.26, "learning_rate": 8.211801766496537e-06, "logits/chosen": -0.8565359115600586, "logits/rejected": -0.7917986512184143, "logps/chosen": -195.26382446289062, "logps/rejected": -269.40838623046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.3913055658340454, "rewards/margins": 7.770285129547119, "rewards/rejected": -6.378979682922363, "step": 5708 }, { "epoch": 1.26, "learning_rate": 8.210427915002819e-06, "logits/chosen": -0.887992799282074, "logits/rejected": -0.754601001739502, "logps/chosen": -189.37437438964844, "logps/rejected": -538.5430908203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 2.424913167953491, "rewards/margins": 16.324953079223633, "rewards/rejected": -13.900039672851562, "step": 5709 }, { "epoch": 1.26, "learning_rate": 8.20905365097683e-06, "logits/chosen": -1.1287399530410767, "logits/rejected": -1.1028988361358643, "logps/chosen": -99.73495483398438, "logps/rejected": -185.83340454101562, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": -0.932879626750946, "rewards/margins": 7.595521926879883, "rewards/rejected": -8.528401374816895, "step": 5710 }, { "epoch": 1.26, "learning_rate": 8.20767897459516e-06, "logits/chosen": -1.3201000690460205, "logits/rejected": -0.7412636876106262, "logps/chosen": -149.95477294921875, "logps/rejected": -596.409423828125, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.3713897466659546, "rewards/margins": 46.43659973144531, "rewards/rejected": -47.80799102783203, "step": 5711 }, { "epoch": 1.26, "learning_rate": 8.206303886034455e-06, "logits/chosen": -0.7955452799797058, "logits/rejected": -0.8085145950317383, "logps/chosen": -181.41336059570312, "logps/rejected": -205.836669921875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 2.4055542945861816, "rewards/margins": 8.776800155639648, "rewards/rejected": -6.371246337890625, "step": 5712 }, { "epoch": 1.26, "learning_rate": 8.204928385471406e-06, "logits/chosen": -1.3208794593811035, "logits/rejected": -1.3734421730041504, "logps/chosen": -134.54347229003906, "logps/rejected": -194.7605743408203, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.12449341267347336, "rewards/margins": 14.886260986328125, "rewards/rejected": -14.761767387390137, "step": 5713 }, { "epoch": 1.26, "learning_rate": 8.203552473082766e-06, "logits/chosen": -1.2181223630905151, "logits/rejected": -1.2107175588607788, "logps/chosen": -201.24346923828125, "logps/rejected": -255.77542114257812, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -2.2664856910705566, "rewards/margins": 4.592900276184082, "rewards/rejected": -6.859385967254639, "step": 5714 }, { "epoch": 1.26, "learning_rate": 8.202176149045334e-06, "logits/chosen": -1.412716031074524, "logits/rejected": -1.4122999906539917, "logps/chosen": -91.02058410644531, "logps/rejected": -120.11749267578125, "loss": 0.2416, "rewards/accuracies": 1.0, "rewards/chosen": -1.875115990638733, "rewards/margins": 0.8587325811386108, "rewards/rejected": -2.7338485717773438, "step": 5715 }, { "epoch": 1.27, "learning_rate": 8.200799413535962e-06, "logits/chosen": -1.3889821767807007, "logits/rejected": -1.3831931352615356, "logps/chosen": -104.3141860961914, "logps/rejected": -92.1044921875, "loss": 0.1932, "rewards/accuracies": 1.0, "rewards/chosen": -1.5953606367111206, "rewards/margins": 0.9874290227890015, "rewards/rejected": -2.582789659500122, "step": 5716 }, { "epoch": 1.27, "learning_rate": 8.199422266731563e-06, "logits/chosen": -1.353350043296814, "logits/rejected": -1.5752971172332764, "logps/chosen": -269.1224670410156, "logps/rejected": -116.88551330566406, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": 1.1058319807052612, "rewards/margins": 7.71821403503418, "rewards/rejected": -6.612381935119629, "step": 5717 }, { "epoch": 1.27, "learning_rate": 8.198044708809094e-06, "logits/chosen": -1.2240517139434814, "logits/rejected": -1.2894948720932007, "logps/chosen": -77.26084899902344, "logps/rejected": -79.8648681640625, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -1.6208702325820923, "rewards/margins": 2.9751687049865723, "rewards/rejected": -4.596038818359375, "step": 5718 }, { "epoch": 1.27, "learning_rate": 8.196666739945566e-06, "logits/chosen": -1.0608878135681152, "logits/rejected": -1.0854319334030151, "logps/chosen": -212.02609252929688, "logps/rejected": -202.32159423828125, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -3.2895874977111816, "rewards/margins": 3.569169521331787, "rewards/rejected": -6.858757019042969, "step": 5719 }, { "epoch": 1.27, "learning_rate": 8.195288360318048e-06, "logits/chosen": -1.1276253461837769, "logits/rejected": -1.096532940864563, "logps/chosen": -96.53836059570312, "logps/rejected": -137.3169403076172, "loss": 0.0623, "rewards/accuracies": 1.0, "rewards/chosen": 0.5034546256065369, "rewards/margins": 5.3415751457214355, "rewards/rejected": -4.838120460510254, "step": 5720 }, { "epoch": 1.27, "learning_rate": 8.193909570103656e-06, "logits/chosen": -0.9138087034225464, "logits/rejected": -0.9586142897605896, "logps/chosen": -163.57139587402344, "logps/rejected": -221.83901977539062, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": 5.395329475402832, "rewards/margins": 5.521257400512695, "rewards/rejected": -0.12592773139476776, "step": 5721 }, { "epoch": 1.27, "learning_rate": 8.192530369479562e-06, "logits/chosen": -1.2920455932617188, "logits/rejected": -1.3408924341201782, "logps/chosen": -106.67903137207031, "logps/rejected": -108.1380615234375, "loss": 0.2752, "rewards/accuracies": 1.0, "rewards/chosen": -2.212345838546753, "rewards/margins": 0.3104705810546875, "rewards/rejected": -2.5228164196014404, "step": 5722 }, { "epoch": 1.27, "learning_rate": 8.191150758622991e-06, "logits/chosen": -1.6063590049743652, "logits/rejected": -1.5475703477859497, "logps/chosen": -94.76036071777344, "logps/rejected": -152.5478515625, "loss": 0.4798, "rewards/accuracies": 0.0, "rewards/chosen": -1.6018295288085938, "rewards/margins": -0.47650909423828125, "rewards/rejected": -1.1253204345703125, "step": 5723 }, { "epoch": 1.27, "learning_rate": 8.189770737711218e-06, "logits/chosen": -1.3089509010314941, "logits/rejected": -1.3626985549926758, "logps/chosen": -169.88182067871094, "logps/rejected": -206.57269287109375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 1.0246384143829346, "rewards/margins": 7.87009334564209, "rewards/rejected": -6.845454692840576, "step": 5724 }, { "epoch": 1.27, "learning_rate": 8.188390306921574e-06, "logits/chosen": -1.4212610721588135, "logits/rejected": -0.8148677349090576, "logps/chosen": -142.49472045898438, "logps/rejected": -631.9231567382812, "loss": 0.3667, "rewards/accuracies": 1.0, "rewards/chosen": -2.435356855392456, "rewards/margins": 23.990806579589844, "rewards/rejected": -26.426162719726562, "step": 5725 }, { "epoch": 1.27, "learning_rate": 8.18700946643144e-06, "logits/chosen": -0.9018007516860962, "logits/rejected": -0.8908796906471252, "logps/chosen": -82.97359466552734, "logps/rejected": -73.32295989990234, "loss": 0.0875, "rewards/accuracies": 1.0, "rewards/chosen": 0.316314697265625, "rewards/margins": 1.6666061878204346, "rewards/rejected": -1.3502914905548096, "step": 5726 }, { "epoch": 1.27, "learning_rate": 8.18562821641825e-06, "logits/chosen": -1.1104384660720825, "logits/rejected": -1.1297017335891724, "logps/chosen": -164.9820556640625, "logps/rejected": -75.22801208496094, "loss": 0.1594, "rewards/accuracies": 1.0, "rewards/chosen": -5.038572788238525, "rewards/margins": 1.0001220703125, "rewards/rejected": -6.038694858551025, "step": 5727 }, { "epoch": 1.27, "learning_rate": 8.184246557059493e-06, "logits/chosen": -1.0904121398925781, "logits/rejected": -1.089568018913269, "logps/chosen": -120.05181121826172, "logps/rejected": -143.696044921875, "loss": 0.3524, "rewards/accuracies": 1.0, "rewards/chosen": -2.4197967052459717, "rewards/margins": 4.449558258056641, "rewards/rejected": -6.869355201721191, "step": 5728 }, { "epoch": 1.27, "learning_rate": 8.182864488532707e-06, "logits/chosen": -1.0224930047988892, "logits/rejected": -1.001166820526123, "logps/chosen": -116.00846862792969, "logps/rejected": -125.4273681640625, "loss": 1.0583, "rewards/accuracies": 0.0, "rewards/chosen": -2.7691361904144287, "rewards/margins": -1.9829957485198975, "rewards/rejected": -0.7861404418945312, "step": 5729 }, { "epoch": 1.27, "learning_rate": 8.181482011015488e-06, "logits/chosen": -1.0194447040557861, "logits/rejected": -0.8580924272537231, "logps/chosen": -111.9661865234375, "logps/rejected": -315.1341857910156, "loss": 0.0338, "rewards/accuracies": 1.0, "rewards/chosen": 0.6799217462539673, "rewards/margins": 5.04681921005249, "rewards/rejected": -4.3668975830078125, "step": 5730 }, { "epoch": 1.27, "learning_rate": 8.180099124685476e-06, "logits/chosen": -1.0855071544647217, "logits/rejected": -1.0609170198440552, "logps/chosen": -121.57258605957031, "logps/rejected": -111.63127136230469, "loss": 0.1056, "rewards/accuracies": 1.0, "rewards/chosen": -0.6116912961006165, "rewards/margins": 3.6122920513153076, "rewards/rejected": -4.223983287811279, "step": 5731 }, { "epoch": 1.27, "learning_rate": 8.178715829720374e-06, "logits/chosen": -1.181834101676941, "logits/rejected": -1.2375900745391846, "logps/chosen": -226.34397888183594, "logps/rejected": -118.58462524414062, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.06908264011144638, "rewards/margins": 8.294564247131348, "rewards/rejected": -8.363646507263184, "step": 5732 }, { "epoch": 1.27, "learning_rate": 8.177332126297928e-06, "logits/chosen": -1.4498573541641235, "logits/rejected": -1.3505005836486816, "logps/chosen": -123.43133544921875, "logps/rejected": -156.83001708984375, "loss": 0.7594, "rewards/accuracies": 0.0, "rewards/chosen": -2.1237709522247314, "rewards/margins": -1.2717690467834473, "rewards/rejected": -0.852001965045929, "step": 5733 }, { "epoch": 1.27, "learning_rate": 8.175948014595942e-06, "logits/chosen": -1.086568832397461, "logits/rejected": -1.1797116994857788, "logps/chosen": -144.6883087158203, "logps/rejected": -126.45247650146484, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 2.1658341884613037, "rewards/margins": 6.050417423248291, "rewards/rejected": -3.8845832347869873, "step": 5734 }, { "epoch": 1.27, "learning_rate": 8.17456349479227e-06, "logits/chosen": -0.8790814280509949, "logits/rejected": -0.9021450877189636, "logps/chosen": -217.49636840820312, "logps/rejected": -205.31622314453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 2.29856276512146, "rewards/margins": 8.115824699401855, "rewards/rejected": -5.817262172698975, "step": 5735 }, { "epoch": 1.27, "learning_rate": 8.17317856706482e-06, "logits/chosen": -1.5353361368179321, "logits/rejected": -1.539336919784546, "logps/chosen": -61.805397033691406, "logps/rejected": -148.71597290039062, "loss": 0.4272, "rewards/accuracies": 1.0, "rewards/chosen": -0.6074302792549133, "rewards/margins": 5.555446624755859, "rewards/rejected": -6.162877082824707, "step": 5736 }, { "epoch": 1.27, "learning_rate": 8.171793231591553e-06, "logits/chosen": -1.307172417640686, "logits/rejected": -1.2920814752578735, "logps/chosen": -111.24478149414062, "logps/rejected": -126.65906524658203, "loss": 0.4126, "rewards/accuracies": 0.0, "rewards/chosen": -0.2945747375488281, "rewards/margins": -0.13063812255859375, "rewards/rejected": -0.16393661499023438, "step": 5737 }, { "epoch": 1.27, "learning_rate": 8.170407488550482e-06, "logits/chosen": -1.0505421161651611, "logits/rejected": -0.8817481994628906, "logps/chosen": -238.40713500976562, "logps/rejected": -305.3958435058594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 1.397619605064392, "rewards/margins": 7.371649265289307, "rewards/rejected": -5.974029541015625, "step": 5738 }, { "epoch": 1.27, "learning_rate": 8.169021338119669e-06, "logits/chosen": -0.746376097202301, "logits/rejected": -0.7674880027770996, "logps/chosen": -93.97271728515625, "logps/rejected": -122.8796615600586, "loss": 0.4086, "rewards/accuracies": 1.0, "rewards/chosen": -0.403604120016098, "rewards/margins": 3.525641679763794, "rewards/rejected": -3.929245710372925, "step": 5739 }, { "epoch": 1.27, "learning_rate": 8.167634780477231e-06, "logits/chosen": -1.11127769947052, "logits/rejected": -1.0889226198196411, "logps/chosen": -193.29461669921875, "logps/rejected": -207.85780334472656, "loss": 0.0518, "rewards/accuracies": 1.0, "rewards/chosen": 0.849871814250946, "rewards/margins": 2.2151458263397217, "rewards/rejected": -1.3652740716934204, "step": 5740 }, { "epoch": 1.27, "learning_rate": 8.16624781580134e-06, "logits/chosen": -1.5045119524002075, "logits/rejected": -1.5195287466049194, "logps/chosen": -143.21917724609375, "logps/rejected": -199.61822509765625, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.9278267621994019, "rewards/margins": 4.071939945220947, "rewards/rejected": -5.999766826629639, "step": 5741 }, { "epoch": 1.27, "learning_rate": 8.164860444270217e-06, "logits/chosen": -1.3795589208602905, "logits/rejected": -1.2604886293411255, "logps/chosen": -131.70510864257812, "logps/rejected": -267.18682861328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 1.7227295637130737, "rewards/margins": 7.82921028137207, "rewards/rejected": -6.106480598449707, "step": 5742 }, { "epoch": 1.27, "learning_rate": 8.163472666062133e-06, "logits/chosen": -1.3498231172561646, "logits/rejected": -1.381831407546997, "logps/chosen": -169.89097595214844, "logps/rejected": -188.64512634277344, "loss": 0.0454, "rewards/accuracies": 1.0, "rewards/chosen": -4.510565280914307, "rewards/margins": 2.37357234954834, "rewards/rejected": -6.8841376304626465, "step": 5743 }, { "epoch": 1.27, "learning_rate": 8.162084481355418e-06, "logits/chosen": -1.158298134803772, "logits/rejected": -1.1795872449874878, "logps/chosen": -101.82986450195312, "logps/rejected": -133.14947509765625, "loss": 0.1587, "rewards/accuracies": 1.0, "rewards/chosen": -1.4461463689804077, "rewards/margins": 0.987810492515564, "rewards/rejected": -2.4339568614959717, "step": 5744 }, { "epoch": 1.27, "learning_rate": 8.160695890328448e-06, "logits/chosen": -1.120429515838623, "logits/rejected": -1.1319167613983154, "logps/chosen": -117.7298583984375, "logps/rejected": -160.3600616455078, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": -0.21426086127758026, "rewards/margins": 2.1477677822113037, "rewards/rejected": -2.3620285987854004, "step": 5745 }, { "epoch": 1.27, "learning_rate": 8.159306893159652e-06, "logits/chosen": -1.2824411392211914, "logits/rejected": -1.2311536073684692, "logps/chosen": -160.40225219726562, "logps/rejected": -239.0537872314453, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": -3.920518636703491, "rewards/margins": 3.7068984508514404, "rewards/rejected": -7.627417087554932, "step": 5746 }, { "epoch": 1.27, "learning_rate": 8.157917490027518e-06, "logits/chosen": -1.3369457721710205, "logits/rejected": -1.3895647525787354, "logps/chosen": -171.5767822265625, "logps/rejected": -78.82627868652344, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 0.2670135498046875, "rewards/margins": 5.767056465148926, "rewards/rejected": -5.500042915344238, "step": 5747 }, { "epoch": 1.27, "learning_rate": 8.156527681110576e-06, "logits/chosen": -1.2646492719650269, "logits/rejected": -1.2562247514724731, "logps/chosen": -129.8682403564453, "logps/rejected": -183.15621948242188, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7832199335098267, "rewards/margins": 4.835090160369873, "rewards/rejected": -6.61830997467041, "step": 5748 }, { "epoch": 1.27, "learning_rate": 8.155137466587415e-06, "logits/chosen": -1.2766374349594116, "logits/rejected": -1.2638425827026367, "logps/chosen": -114.66217803955078, "logps/rejected": -141.0216827392578, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.1921669244766235, "rewards/margins": 4.387951850891113, "rewards/rejected": -5.580118656158447, "step": 5749 }, { "epoch": 1.27, "learning_rate": 8.153746846636675e-06, "logits/chosen": -1.079544186592102, "logits/rejected": -1.0826948881149292, "logps/chosen": -212.99728393554688, "logps/rejected": -215.58578491210938, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 2.7292938232421875, "rewards/margins": 5.352113723754883, "rewards/rejected": -2.622819662094116, "step": 5750 }, { "epoch": 1.27, "learning_rate": 8.152355821437048e-06, "logits/chosen": -1.453015685081482, "logits/rejected": -1.3388662338256836, "logps/chosen": -192.58447265625, "logps/rejected": -217.89178466796875, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": 1.933929443359375, "rewards/margins": 4.684206962585449, "rewards/rejected": -2.7502777576446533, "step": 5751 }, { "epoch": 1.27, "learning_rate": 8.150964391167273e-06, "logits/chosen": -1.1652549505233765, "logits/rejected": -1.1773561239242554, "logps/chosen": -113.02996826171875, "logps/rejected": -158.9033203125, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -1.3237472772598267, "rewards/margins": 3.9826226234436035, "rewards/rejected": -5.306369781494141, "step": 5752 }, { "epoch": 1.27, "learning_rate": 8.149572556006151e-06, "logits/chosen": -1.472274899482727, "logits/rejected": -1.4829905033111572, "logps/chosen": -202.1829071044922, "logps/rejected": -262.7451171875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 3.9806900024414062, "rewards/margins": 11.216409683227539, "rewards/rejected": -7.235719203948975, "step": 5753 }, { "epoch": 1.27, "learning_rate": 8.148180316132526e-06, "logits/chosen": -1.4373273849487305, "logits/rejected": -1.427229404449463, "logps/chosen": -118.11146545410156, "logps/rejected": -218.31336975097656, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.4101097285747528, "rewards/margins": 5.903326034545898, "rewards/rejected": -6.3134355545043945, "step": 5754 }, { "epoch": 1.27, "learning_rate": 8.146787671725299e-06, "logits/chosen": -1.3690776824951172, "logits/rejected": -1.3834623098373413, "logps/chosen": -90.17044067382812, "logps/rejected": -165.6607666015625, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -1.0153976678848267, "rewards/margins": 11.021327018737793, "rewards/rejected": -12.036725044250488, "step": 5755 }, { "epoch": 1.27, "learning_rate": 8.14539462296342e-06, "logits/chosen": -1.208369255065918, "logits/rejected": -1.208369255065918, "logps/chosen": -195.66110229492188, "logps/rejected": -195.66110229492188, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -4.1583404541015625, "rewards/margins": 0.0, "rewards/rejected": -4.1583404541015625, "step": 5756 }, { "epoch": 1.27, "learning_rate": 8.144001170025894e-06, "logits/chosen": -1.3303632736206055, "logits/rejected": -1.224258303642273, "logps/chosen": -125.05157470703125, "logps/rejected": -234.46279907226562, "loss": 0.129, "rewards/accuracies": 1.0, "rewards/chosen": -3.626696825027466, "rewards/margins": 1.32340407371521, "rewards/rejected": -4.950100898742676, "step": 5757 }, { "epoch": 1.27, "learning_rate": 8.142607313091775e-06, "logits/chosen": -1.2361798286437988, "logits/rejected": -1.2361798286437988, "logps/chosen": -106.35879516601562, "logps/rejected": -106.35879516601562, "loss": 0.3896, "rewards/accuracies": 0.0, "rewards/chosen": -0.6431472897529602, "rewards/margins": 0.0, "rewards/rejected": -0.6431472897529602, "step": 5758 }, { "epoch": 1.27, "learning_rate": 8.141213052340171e-06, "logits/chosen": -1.1115769147872925, "logits/rejected": -1.1115769147872925, "logps/chosen": -117.13540649414062, "logps/rejected": -117.13540649414062, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.379263401031494, "rewards/margins": 0.0, "rewards/rejected": -4.379263401031494, "step": 5759 }, { "epoch": 1.27, "learning_rate": 8.13981838795024e-06, "logits/chosen": -1.0236098766326904, "logits/rejected": -0.9954012632369995, "logps/chosen": -207.736572265625, "logps/rejected": -203.35675048828125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.6192520260810852, "rewards/margins": 5.734123229980469, "rewards/rejected": -6.353375434875488, "step": 5760 }, { "epoch": 1.28, "learning_rate": 8.138423320101196e-06, "logits/chosen": -1.0892935991287231, "logits/rejected": -1.1639240980148315, "logps/chosen": -156.38693237304688, "logps/rejected": -141.11077880859375, "loss": 0.131, "rewards/accuracies": 1.0, "rewards/chosen": 0.4937988221645355, "rewards/margins": 4.988592624664307, "rewards/rejected": -4.494793891906738, "step": 5761 }, { "epoch": 1.28, "learning_rate": 8.1370278489723e-06, "logits/chosen": -1.050340175628662, "logits/rejected": -0.9224744439125061, "logps/chosen": -201.63497924804688, "logps/rejected": -385.9573974609375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.6966370344161987, "rewards/margins": 6.979235649108887, "rewards/rejected": -8.675872802734375, "step": 5762 }, { "epoch": 1.28, "learning_rate": 8.135631974742863e-06, "logits/chosen": -1.0928115844726562, "logits/rejected": -1.0198581218719482, "logps/chosen": -99.94740295410156, "logps/rejected": -228.77542114257812, "loss": 0.2652, "rewards/accuracies": 1.0, "rewards/chosen": -1.4671906232833862, "rewards/margins": 0.3972228765487671, "rewards/rejected": -1.8644134998321533, "step": 5763 }, { "epoch": 1.28, "learning_rate": 8.13423569759226e-06, "logits/chosen": -1.1041722297668457, "logits/rejected": -1.102114200592041, "logps/chosen": -94.61788940429688, "logps/rejected": -135.94314575195312, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/chosen": -1.139556884765625, "rewards/margins": 3.0341949462890625, "rewards/rejected": -4.1737518310546875, "step": 5764 }, { "epoch": 1.28, "learning_rate": 8.132839017699901e-06, "logits/chosen": -1.4624755382537842, "logits/rejected": -1.6120930910110474, "logps/chosen": -149.84007263183594, "logps/rejected": -146.62045288085938, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.4764267206192017, "rewards/margins": 9.862950325012207, "rewards/rejected": -11.339377403259277, "step": 5765 }, { "epoch": 1.28, "learning_rate": 8.131441935245261e-06, "logits/chosen": -1.0174816846847534, "logits/rejected": -0.9634992480278015, "logps/chosen": -108.15582275390625, "logps/rejected": -158.67718505859375, "loss": 0.0447, "rewards/accuracies": 1.0, "rewards/chosen": -1.6825439929962158, "rewards/margins": 2.3693153858184814, "rewards/rejected": -4.051859378814697, "step": 5766 }, { "epoch": 1.28, "learning_rate": 8.13004445040786e-06, "logits/chosen": -1.366388201713562, "logits/rejected": -1.3859937191009521, "logps/chosen": -150.75619506835938, "logps/rejected": -325.02911376953125, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": 0.21928711235523224, "rewards/margins": 16.84571075439453, "rewards/rejected": -16.626422882080078, "step": 5767 }, { "epoch": 1.28, "learning_rate": 8.128646563367271e-06, "logits/chosen": -1.0168336629867554, "logits/rejected": -1.0159056186676025, "logps/chosen": -57.66543197631836, "logps/rejected": -133.09063720703125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.3826007843017578, "rewards/margins": 7.134252548217773, "rewards/rejected": -7.516853332519531, "step": 5768 }, { "epoch": 1.28, "learning_rate": 8.12724827430312e-06, "logits/chosen": -0.9987095594406128, "logits/rejected": -0.43570366501808167, "logps/chosen": -101.69297790527344, "logps/rejected": -688.536376953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.001763939857483, "rewards/margins": 36.67119216918945, "rewards/rejected": -37.67295455932617, "step": 5769 }, { "epoch": 1.28, "learning_rate": 8.125849583395083e-06, "logits/chosen": -1.2144372463226318, "logits/rejected": -1.1952109336853027, "logps/chosen": -220.94126892089844, "logps/rejected": -289.9519958496094, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -7.909379482269287, "rewards/margins": 3.813711643218994, "rewards/rejected": -11.723091125488281, "step": 5770 }, { "epoch": 1.28, "learning_rate": 8.124450490822889e-06, "logits/chosen": -1.4025954008102417, "logits/rejected": -1.381891131401062, "logps/chosen": -97.38540649414062, "logps/rejected": -194.84646606445312, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.3485527038574219, "rewards/margins": 4.2786126136779785, "rewards/rejected": -5.6271653175354, "step": 5771 }, { "epoch": 1.28, "learning_rate": 8.123050996766317e-06, "logits/chosen": -1.319266676902771, "logits/rejected": -1.2947933673858643, "logps/chosen": -128.02806091308594, "logps/rejected": -235.6753692626953, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.2628585994243622, "rewards/margins": 11.602466583251953, "rewards/rejected": -11.339608192443848, "step": 5772 }, { "epoch": 1.28, "learning_rate": 8.121651101405202e-06, "logits/chosen": -1.4450684785842896, "logits/rejected": -1.4981446266174316, "logps/chosen": -118.99728393554688, "logps/rejected": -65.4184799194336, "loss": 0.0665, "rewards/accuracies": 1.0, "rewards/chosen": -2.1608338356018066, "rewards/margins": 1.9496803283691406, "rewards/rejected": -4.110514163970947, "step": 5773 }, { "epoch": 1.28, "learning_rate": 8.120250804919424e-06, "logits/chosen": -1.1131432056427002, "logits/rejected": -1.0935630798339844, "logps/chosen": -87.21054077148438, "logps/rejected": -109.45603942871094, "loss": 0.0846, "rewards/accuracies": 1.0, "rewards/chosen": -0.6015083193778992, "rewards/margins": 1.6994469165802002, "rewards/rejected": -2.300955295562744, "step": 5774 }, { "epoch": 1.28, "learning_rate": 8.118850107488916e-06, "logits/chosen": -1.3578431606292725, "logits/rejected": -1.4406858682632446, "logps/chosen": -177.2559814453125, "logps/rejected": -174.79763793945312, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.4133743345737457, "rewards/margins": 6.481648921966553, "rewards/rejected": -6.895023345947266, "step": 5775 }, { "epoch": 1.28, "learning_rate": 8.117449009293668e-06, "logits/chosen": -1.0362000465393066, "logits/rejected": -0.9297998547554016, "logps/chosen": -200.63372802734375, "logps/rejected": -297.9302673339844, "loss": 0.0394, "rewards/accuracies": 1.0, "rewards/chosen": -4.070056438446045, "rewards/margins": 5.374352931976318, "rewards/rejected": -9.444409370422363, "step": 5776 }, { "epoch": 1.28, "learning_rate": 8.116047510513718e-06, "logits/chosen": -1.2467154264450073, "logits/rejected": -1.0966631174087524, "logps/chosen": -265.87030029296875, "logps/rejected": -300.717529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.6605957746505737, "rewards/margins": 11.117003440856934, "rewards/rejected": -9.45640754699707, "step": 5777 }, { "epoch": 1.28, "learning_rate": 8.114645611329152e-06, "logits/chosen": -1.1968516111373901, "logits/rejected": -1.268044114112854, "logps/chosen": -186.94998168945312, "logps/rejected": -263.1776123046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.10495300590991974, "rewards/margins": 11.044811248779297, "rewards/rejected": -11.149764060974121, "step": 5778 }, { "epoch": 1.28, "learning_rate": 8.113243311920113e-06, "logits/chosen": -1.0716973543167114, "logits/rejected": -1.0102310180664062, "logps/chosen": -100.82450866699219, "logps/rejected": -233.10751342773438, "loss": 0.0903, "rewards/accuracies": 1.0, "rewards/chosen": -1.4110863208770752, "rewards/margins": 1.621405839920044, "rewards/rejected": -3.032492160797119, "step": 5779 }, { "epoch": 1.28, "learning_rate": 8.111840612466792e-06, "logits/chosen": -0.9684078693389893, "logits/rejected": -0.9529158473014832, "logps/chosen": -89.46624755859375, "logps/rejected": -88.96331787109375, "loss": 0.1809, "rewards/accuracies": 1.0, "rewards/chosen": -2.244671583175659, "rewards/margins": 0.8330941200256348, "rewards/rejected": -3.077765703201294, "step": 5780 }, { "epoch": 1.28, "learning_rate": 8.110437513149433e-06, "logits/chosen": -1.4799985885620117, "logits/rejected": -1.1119354963302612, "logps/chosen": -62.38609313964844, "logps/rejected": -273.90032958984375, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.20413780212402344, "rewards/margins": 4.130359172821045, "rewards/rejected": -4.334496974945068, "step": 5781 }, { "epoch": 1.28, "learning_rate": 8.109034014148331e-06, "logits/chosen": -1.3449095487594604, "logits/rejected": -1.196104884147644, "logps/chosen": -233.18441772460938, "logps/rejected": -345.2719421386719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.805960178375244, "rewards/margins": 12.550260543823242, "rewards/rejected": -8.74429988861084, "step": 5782 }, { "epoch": 1.28, "learning_rate": 8.107630115643832e-06, "logits/chosen": -1.512689232826233, "logits/rejected": -1.4017467498779297, "logps/chosen": -136.66744995117188, "logps/rejected": -331.83673095703125, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": 1.9765137434005737, "rewards/margins": 2.862509250640869, "rewards/rejected": -0.8859955072402954, "step": 5783 }, { "epoch": 1.28, "learning_rate": 8.106225817816333e-06, "logits/chosen": -1.1661672592163086, "logits/rejected": -1.2140332460403442, "logps/chosen": -189.60215759277344, "logps/rejected": -164.33131408691406, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": 0.826629638671875, "rewards/margins": 4.034502029418945, "rewards/rejected": -3.207872152328491, "step": 5784 }, { "epoch": 1.28, "learning_rate": 8.104821120846287e-06, "logits/chosen": -1.3841444253921509, "logits/rejected": -1.3439983129501343, "logps/chosen": -180.91732788085938, "logps/rejected": -296.2377624511719, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.0222489833831787, "rewards/margins": 5.444967269897461, "rewards/rejected": -8.467216491699219, "step": 5785 }, { "epoch": 1.28, "learning_rate": 8.103416024914186e-06, "logits/chosen": -1.4592852592468262, "logits/rejected": -1.4523112773895264, "logps/chosen": -103.03196716308594, "logps/rejected": -226.679931640625, "loss": 0.5864, "rewards/accuracies": 1.0, "rewards/chosen": -0.9381133913993835, "rewards/margins": 12.690462112426758, "rewards/rejected": -13.628575325012207, "step": 5786 }, { "epoch": 1.28, "learning_rate": 8.102010530200589e-06, "logits/chosen": -1.081525444984436, "logits/rejected": -1.0829895734786987, "logps/chosen": -84.0988998413086, "logps/rejected": -142.6289520263672, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -1.1892356872558594, "rewards/margins": 6.409873962402344, "rewards/rejected": -7.599109649658203, "step": 5787 }, { "epoch": 1.28, "learning_rate": 8.100604636886095e-06, "logits/chosen": -1.6278176307678223, "logits/rejected": -1.6297080516815186, "logps/chosen": -102.33966064453125, "logps/rejected": -129.0238494873047, "loss": 0.4729, "rewards/accuracies": 1.0, "rewards/chosen": -4.977915287017822, "rewards/margins": 1.2466368675231934, "rewards/rejected": -6.224552154541016, "step": 5788 }, { "epoch": 1.28, "learning_rate": 8.09919834515136e-06, "logits/chosen": -1.0115134716033936, "logits/rejected": -0.8802618384361267, "logps/chosen": -282.43780517578125, "logps/rejected": -336.9996337890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.6117005348205566, "rewards/margins": 7.85397481918335, "rewards/rejected": -10.465675354003906, "step": 5789 }, { "epoch": 1.28, "learning_rate": 8.097791655177085e-06, "logits/chosen": -1.6121989488601685, "logits/rejected": -1.6231211423873901, "logps/chosen": -106.39700317382812, "logps/rejected": -166.99209594726562, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.31440964341163635, "rewards/margins": 12.953888893127441, "rewards/rejected": -12.639479637145996, "step": 5790 }, { "epoch": 1.28, "learning_rate": 8.096384567144033e-06, "logits/chosen": -1.1938424110412598, "logits/rejected": -1.1938424110412598, "logps/chosen": -126.67902374267578, "logps/rejected": -126.67902374267578, "loss": 0.349, "rewards/accuracies": 0.0, "rewards/chosen": -4.0456438064575195, "rewards/margins": 0.0, "rewards/rejected": -4.0456438064575195, "step": 5791 }, { "epoch": 1.28, "learning_rate": 8.094977081233006e-06, "logits/chosen": -0.9771866798400879, "logits/rejected": -1.0064315795898438, "logps/chosen": -218.87808227539062, "logps/rejected": -158.7906494140625, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -0.68585205078125, "rewards/margins": 9.49604320526123, "rewards/rejected": -10.18189525604248, "step": 5792 }, { "epoch": 1.28, "learning_rate": 8.093569197624864e-06, "logits/chosen": -1.093314290046692, "logits/rejected": -1.123731017112732, "logps/chosen": -207.55984497070312, "logps/rejected": -143.12799072265625, "loss": 0.1647, "rewards/accuracies": 1.0, "rewards/chosen": -2.3598861694335938, "rewards/margins": 0.9708237648010254, "rewards/rejected": -3.330709934234619, "step": 5793 }, { "epoch": 1.28, "learning_rate": 8.092160916500515e-06, "logits/chosen": -1.2955371141433716, "logits/rejected": -1.3128701448440552, "logps/chosen": -154.98529052734375, "logps/rejected": -141.71945190429688, "loss": 0.6488, "rewards/accuracies": 1.0, "rewards/chosen": -1.9605530500411987, "rewards/margins": 0.4603065252304077, "rewards/rejected": -2.4208595752716064, "step": 5794 }, { "epoch": 1.28, "learning_rate": 8.090752238040925e-06, "logits/chosen": -1.319300651550293, "logits/rejected": -1.4271612167358398, "logps/chosen": -219.423828125, "logps/rejected": -144.96478271484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.9484802484512329, "rewards/margins": 9.864801406860352, "rewards/rejected": -8.91632080078125, "step": 5795 }, { "epoch": 1.28, "learning_rate": 8.0893431624271e-06, "logits/chosen": -1.0379917621612549, "logits/rejected": -1.0509436130523682, "logps/chosen": -117.08245849609375, "logps/rejected": -120.49264526367188, "loss": 0.0428, "rewards/accuracies": 1.0, "rewards/chosen": -2.031031847000122, "rewards/margins": 2.846914052963257, "rewards/rejected": -4.877945899963379, "step": 5796 }, { "epoch": 1.28, "learning_rate": 8.087933689840107e-06, "logits/chosen": -1.5011783838272095, "logits/rejected": -0.8740867972373962, "logps/chosen": -170.6436767578125, "logps/rejected": -783.5856323242188, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.08588104695081711, "rewards/margins": 63.82280731201172, "rewards/rejected": -63.908687591552734, "step": 5797 }, { "epoch": 1.28, "learning_rate": 8.086523820461057e-06, "logits/chosen": -1.2040022611618042, "logits/rejected": -1.168821930885315, "logps/chosen": -79.48063659667969, "logps/rejected": -191.66845703125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.3110671937465668, "rewards/margins": 5.295244216918945, "rewards/rejected": -5.606311321258545, "step": 5798 }, { "epoch": 1.28, "learning_rate": 8.085113554471115e-06, "logits/chosen": -1.256542444229126, "logits/rejected": -1.2949752807617188, "logps/chosen": -215.2487335205078, "logps/rejected": -210.56207275390625, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -2.020369052886963, "rewards/margins": 4.651780605316162, "rewards/rejected": -6.672149658203125, "step": 5799 }, { "epoch": 1.28, "learning_rate": 8.083702892051499e-06, "logits/chosen": -1.5820337533950806, "logits/rejected": -1.5810530185699463, "logps/chosen": -145.97048950195312, "logps/rejected": -159.75128173828125, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -2.372631072998047, "rewards/margins": 3.1560096740722656, "rewards/rejected": -5.5286407470703125, "step": 5800 }, { "epoch": 1.28, "learning_rate": 8.082291833383475e-06, "logits/chosen": -1.30176842212677, "logits/rejected": -1.3056310415267944, "logps/chosen": -174.49267578125, "logps/rejected": -211.1459503173828, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0433349609375, "rewards/margins": 10.294782638549805, "rewards/rejected": -11.338117599487305, "step": 5801 }, { "epoch": 1.28, "learning_rate": 8.080880378648359e-06, "logits/chosen": -1.5046762228012085, "logits/rejected": -1.4623641967773438, "logps/chosen": -160.14276123046875, "logps/rejected": -295.654052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5813310146331787, "rewards/margins": 9.285070419311523, "rewards/rejected": -12.866401672363281, "step": 5802 }, { "epoch": 1.28, "learning_rate": 8.079468528027519e-06, "logits/chosen": -1.4972580671310425, "logits/rejected": -1.294231653213501, "logps/chosen": -52.61778259277344, "logps/rejected": -250.71026611328125, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": 1.5243606567382812, "rewards/margins": 5.801246643066406, "rewards/rejected": -4.276885986328125, "step": 5803 }, { "epoch": 1.28, "learning_rate": 8.078056281702378e-06, "logits/chosen": -1.5766422748565674, "logits/rejected": -1.5897951126098633, "logps/chosen": -71.452880859375, "logps/rejected": -99.85559844970703, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.10051691532135, "rewards/margins": 4.054442405700684, "rewards/rejected": -5.154959201812744, "step": 5804 }, { "epoch": 1.28, "learning_rate": 8.076643639854405e-06, "logits/chosen": -1.4668288230895996, "logits/rejected": -1.53516685962677, "logps/chosen": -163.1865234375, "logps/rejected": -161.54013061523438, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 1.566931128501892, "rewards/margins": 12.767447471618652, "rewards/rejected": -11.200516700744629, "step": 5805 }, { "epoch": 1.29, "learning_rate": 8.075230602665118e-06, "logits/chosen": -1.4556549787521362, "logits/rejected": -1.5369126796722412, "logps/chosen": -119.24797058105469, "logps/rejected": -91.87075805664062, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/chosen": 0.3653427064418793, "rewards/margins": 7.198530673980713, "rewards/rejected": -6.833188056945801, "step": 5806 }, { "epoch": 1.29, "learning_rate": 8.073817170316093e-06, "logits/chosen": -1.53013014793396, "logits/rejected": -1.5492424964904785, "logps/chosen": -153.07168579101562, "logps/rejected": -149.55645751953125, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -3.108477830886841, "rewards/margins": 3.103083848953247, "rewards/rejected": -6.211561679840088, "step": 5807 }, { "epoch": 1.29, "learning_rate": 8.07240334298895e-06, "logits/chosen": -1.0553609132766724, "logits/rejected": -0.9922338724136353, "logps/chosen": -97.91758728027344, "logps/rejected": -220.72183227539062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.72630774974823, "rewards/margins": 9.32580280303955, "rewards/rejected": -11.05211067199707, "step": 5808 }, { "epoch": 1.29, "learning_rate": 8.070989120865362e-06, "logits/chosen": -1.5116654634475708, "logits/rejected": -1.5306529998779297, "logps/chosen": -176.8515167236328, "logps/rejected": -118.99555206298828, "loss": 0.7712, "rewards/accuracies": 0.0, "rewards/chosen": -9.764603614807129, "rewards/margins": -1.3016681671142578, "rewards/rejected": -8.462935447692871, "step": 5809 }, { "epoch": 1.29, "learning_rate": 8.069574504127058e-06, "logits/chosen": -1.4748674631118774, "logits/rejected": -1.4569404125213623, "logps/chosen": -96.62785339355469, "logps/rejected": -95.40187072753906, "loss": 0.1242, "rewards/accuracies": 1.0, "rewards/chosen": -1.5929092168807983, "rewards/margins": 2.64357852935791, "rewards/rejected": -4.236487865447998, "step": 5810 }, { "epoch": 1.29, "learning_rate": 8.068159492955806e-06, "logits/chosen": -1.3347446918487549, "logits/rejected": -1.2821824550628662, "logps/chosen": -82.38143157958984, "logps/rejected": -166.9337158203125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.734204888343811, "rewards/margins": 6.502208709716797, "rewards/rejected": -7.236413478851318, "step": 5811 }, { "epoch": 1.29, "learning_rate": 8.066744087533436e-06, "logits/chosen": -1.6273462772369385, "logits/rejected": -1.623524785041809, "logps/chosen": -118.1276626586914, "logps/rejected": -134.40847778320312, "loss": 0.1063, "rewards/accuracies": 1.0, "rewards/chosen": -3.05985951423645, "rewards/margins": 1.4481523036956787, "rewards/rejected": -4.508011817932129, "step": 5812 }, { "epoch": 1.29, "learning_rate": 8.065328288041823e-06, "logits/chosen": -1.3010823726654053, "logits/rejected": -1.3781449794769287, "logps/chosen": -307.3720397949219, "logps/rejected": -176.6839599609375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.38949891924858093, "rewards/margins": 5.047089576721191, "rewards/rejected": -5.436588287353516, "step": 5813 }, { "epoch": 1.29, "learning_rate": 8.063912094662893e-06, "logits/chosen": -1.2487705945968628, "logits/rejected": -1.2517088651657104, "logps/chosen": -169.07347106933594, "logps/rejected": -73.30467987060547, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 1.772059679031372, "rewards/margins": 6.256671905517578, "rewards/rejected": -4.484612464904785, "step": 5814 }, { "epoch": 1.29, "learning_rate": 8.062495507578628e-06, "logits/chosen": -1.2320725917816162, "logits/rejected": -0.9811151623725891, "logps/chosen": -153.29318237304688, "logps/rejected": -903.6217041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.369886875152588, "rewards/margins": 55.86911392211914, "rewards/rejected": -59.2390022277832, "step": 5815 }, { "epoch": 1.29, "learning_rate": 8.061078526971048e-06, "logits/chosen": -1.1158019304275513, "logits/rejected": -1.0995168685913086, "logps/chosen": -243.76751708984375, "logps/rejected": -289.197998046875, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": 0.5773651003837585, "rewards/margins": 6.477618217468262, "rewards/rejected": -5.9002532958984375, "step": 5816 }, { "epoch": 1.29, "learning_rate": 8.059661153022236e-06, "logits/chosen": -1.3255943059921265, "logits/rejected": -1.3281034231185913, "logps/chosen": -238.478271484375, "logps/rejected": -234.0689697265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.103399634361267, "rewards/margins": 7.870906352996826, "rewards/rejected": -8.974306106567383, "step": 5817 }, { "epoch": 1.29, "learning_rate": 8.058243385914324e-06, "logits/chosen": -1.2358505725860596, "logits/rejected": -1.2358505725860596, "logps/chosen": -125.69131469726562, "logps/rejected": -125.69131469726562, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -7.041928768157959, "rewards/margins": 0.0, "rewards/rejected": -7.041928768157959, "step": 5818 }, { "epoch": 1.29, "learning_rate": 8.056825225829486e-06, "logits/chosen": -1.1438175439834595, "logits/rejected": -1.5159887075424194, "logps/chosen": -142.15811157226562, "logps/rejected": -450.8788146972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5305763483047485, "rewards/margins": 29.376033782958984, "rewards/rejected": -30.9066104888916, "step": 5819 }, { "epoch": 1.29, "learning_rate": 8.055406672949957e-06, "logits/chosen": -0.998307466506958, "logits/rejected": -0.9919266104698181, "logps/chosen": -148.36566162109375, "logps/rejected": -120.42132568359375, "loss": 0.1855, "rewards/accuracies": 1.0, "rewards/chosen": -3.653764486312866, "rewards/margins": 0.8003289699554443, "rewards/rejected": -4.4540934562683105, "step": 5820 }, { "epoch": 1.29, "learning_rate": 8.053987727458013e-06, "logits/chosen": -1.0788623094558716, "logits/rejected": -1.5062799453735352, "logps/chosen": -184.81143188476562, "logps/rejected": -601.1170043945312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.931901454925537, "rewards/margins": 45.562705993652344, "rewards/rejected": -51.494606018066406, "step": 5821 }, { "epoch": 1.29, "learning_rate": 8.05256838953599e-06, "logits/chosen": -1.342028260231018, "logits/rejected": -1.3515547513961792, "logps/chosen": -139.27398681640625, "logps/rejected": -97.57380676269531, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -3.7070891857147217, "rewards/margins": 3.5788042545318604, "rewards/rejected": -7.285893440246582, "step": 5822 }, { "epoch": 1.29, "learning_rate": 8.051148659366265e-06, "logits/chosen": -0.8394516110420227, "logits/rejected": -0.8446448445320129, "logps/chosen": -190.9588165283203, "logps/rejected": -193.52566528320312, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 2.0262131690979004, "rewards/margins": 8.792922973632812, "rewards/rejected": -6.766709804534912, "step": 5823 }, { "epoch": 1.29, "learning_rate": 8.049728537131275e-06, "logits/chosen": -0.815260112285614, "logits/rejected": -0.8246263861656189, "logps/chosen": -112.42512512207031, "logps/rejected": -134.9617919921875, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -3.299187421798706, "rewards/margins": 2.6370551586151123, "rewards/rejected": -5.936242580413818, "step": 5824 }, { "epoch": 1.29, "learning_rate": 8.048308023013498e-06, "logits/chosen": -1.1988494396209717, "logits/rejected": -1.254035234451294, "logps/chosen": -213.35202026367188, "logps/rejected": -189.45846557617188, "loss": 0.0984, "rewards/accuracies": 1.0, "rewards/chosen": -3.698394775390625, "rewards/margins": 1.5836520195007324, "rewards/rejected": -5.282046794891357, "step": 5825 }, { "epoch": 1.29, "learning_rate": 8.046887117195467e-06, "logits/chosen": -1.2986118793487549, "logits/rejected": -1.2504477500915527, "logps/chosen": -98.75515747070312, "logps/rejected": -139.81785583496094, "loss": 0.1634, "rewards/accuracies": 1.0, "rewards/chosen": -2.0974717140197754, "rewards/margins": 0.9507858753204346, "rewards/rejected": -3.04825758934021, "step": 5826 }, { "epoch": 1.29, "learning_rate": 8.045465819859766e-06, "logits/chosen": -1.561300277709961, "logits/rejected": -1.529812216758728, "logps/chosen": -109.28852844238281, "logps/rejected": -208.7089080810547, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -2.7053070068359375, "rewards/margins": 6.291272163391113, "rewards/rejected": -8.99657917022705, "step": 5827 }, { "epoch": 1.29, "learning_rate": 8.044044131189029e-06, "logits/chosen": -0.8163689970970154, "logits/rejected": -0.8116790652275085, "logps/chosen": -132.93482971191406, "logps/rejected": -207.3807373046875, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": -3.6341819763183594, "rewards/margins": 2.7695717811584473, "rewards/rejected": -6.403753757476807, "step": 5828 }, { "epoch": 1.29, "learning_rate": 8.042622051365938e-06, "logits/chosen": -1.3200825452804565, "logits/rejected": -1.361474871635437, "logps/chosen": -139.60214233398438, "logps/rejected": -209.37994384765625, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.5850052237510681, "rewards/margins": 9.52342700958252, "rewards/rejected": -10.108431816101074, "step": 5829 }, { "epoch": 1.29, "learning_rate": 8.041199580573229e-06, "logits/chosen": -1.1894145011901855, "logits/rejected": -1.0960824489593506, "logps/chosen": -210.219482421875, "logps/rejected": -295.239990234375, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": 1.1020889282226562, "rewards/margins": 7.7108964920043945, "rewards/rejected": -6.608807563781738, "step": 5830 }, { "epoch": 1.29, "learning_rate": 8.039776718993683e-06, "logits/chosen": -1.4343944787979126, "logits/rejected": -1.4391629695892334, "logps/chosen": -106.26958465576172, "logps/rejected": -108.17546081542969, "loss": 0.272, "rewards/accuracies": 1.0, "rewards/chosen": -8.206339836120605, "rewards/margins": 0.39408302307128906, "rewards/rejected": -8.600422859191895, "step": 5831 }, { "epoch": 1.29, "learning_rate": 8.038353466810137e-06, "logits/chosen": -1.1092580556869507, "logits/rejected": -1.149226427078247, "logps/chosen": -184.21279907226562, "logps/rejected": -148.08749389648438, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.691027820110321, "rewards/margins": 5.602247714996338, "rewards/rejected": -4.911220073699951, "step": 5832 }, { "epoch": 1.29, "learning_rate": 8.036929824205476e-06, "logits/chosen": -1.336097002029419, "logits/rejected": -1.3278297185897827, "logps/chosen": -176.50289916992188, "logps/rejected": -240.71803283691406, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -2.3220276832580566, "rewards/margins": 4.7727556228637695, "rewards/rejected": -7.094783306121826, "step": 5833 }, { "epoch": 1.29, "learning_rate": 8.03550579136263e-06, "logits/chosen": -1.2896531820297241, "logits/rejected": -1.2736127376556396, "logps/chosen": -90.02731323242188, "logps/rejected": -196.94418334960938, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.6911819577217102, "rewards/margins": 9.271838188171387, "rewards/rejected": -9.963020324707031, "step": 5834 }, { "epoch": 1.29, "learning_rate": 8.03408136846459e-06, "logits/chosen": -1.2430607080459595, "logits/rejected": -1.3926348686218262, "logps/chosen": -196.4685516357422, "logps/rejected": -154.69100952148438, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": 1.164753794670105, "rewards/margins": 6.04683256149292, "rewards/rejected": -4.882078647613525, "step": 5835 }, { "epoch": 1.29, "learning_rate": 8.032656555694388e-06, "logits/chosen": -1.3413435220718384, "logits/rejected": -1.3394482135772705, "logps/chosen": -102.79491424560547, "logps/rejected": -131.70779418945312, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": 0.21303482353687286, "rewards/margins": 3.0076844692230225, "rewards/rejected": -2.794649600982666, "step": 5836 }, { "epoch": 1.29, "learning_rate": 8.031231353235104e-06, "logits/chosen": -0.9462396502494812, "logits/rejected": -0.9409651756286621, "logps/chosen": -204.39666748046875, "logps/rejected": -126.87205505371094, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -1.3635681867599487, "rewards/margins": 3.4713058471679688, "rewards/rejected": -4.834874153137207, "step": 5837 }, { "epoch": 1.29, "learning_rate": 8.029805761269881e-06, "logits/chosen": -1.5138698816299438, "logits/rejected": -1.4379868507385254, "logps/chosen": -100.15885925292969, "logps/rejected": -197.7449188232422, "loss": 0.3646, "rewards/accuracies": 0.0, "rewards/chosen": -1.5744308233261108, "rewards/margins": -0.06998443603515625, "rewards/rejected": -1.5044463872909546, "step": 5838 }, { "epoch": 1.29, "learning_rate": 8.028379779981902e-06, "logits/chosen": -1.4986062049865723, "logits/rejected": -1.494926929473877, "logps/chosen": -136.61732482910156, "logps/rejected": -177.8626708984375, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -1.1939918994903564, "rewards/margins": 3.8329522609710693, "rewards/rejected": -5.026944160461426, "step": 5839 }, { "epoch": 1.29, "learning_rate": 8.026953409554402e-06, "logits/chosen": -1.5561530590057373, "logits/rejected": -1.5522404909133911, "logps/chosen": -103.23969268798828, "logps/rejected": -115.09239196777344, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -1.2688026428222656, "rewards/margins": 2.9027915000915527, "rewards/rejected": -4.171594142913818, "step": 5840 }, { "epoch": 1.29, "learning_rate": 8.025526650170665e-06, "logits/chosen": -1.3677380084991455, "logits/rejected": -1.3720465898513794, "logps/chosen": -222.7245330810547, "logps/rejected": -202.1966094970703, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -1.3578293323516846, "rewards/margins": 4.405431747436523, "rewards/rejected": -5.763261318206787, "step": 5841 }, { "epoch": 1.29, "learning_rate": 8.024099502014024e-06, "logits/chosen": -1.297969102859497, "logits/rejected": -1.3041276931762695, "logps/chosen": -208.32876586914062, "logps/rejected": -181.90093994140625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.154486060142517, "rewards/margins": 7.04658842086792, "rewards/rejected": -8.201074600219727, "step": 5842 }, { "epoch": 1.29, "learning_rate": 8.02267196526787e-06, "logits/chosen": -1.1134910583496094, "logits/rejected": -1.1032356023788452, "logps/chosen": -193.77342224121094, "logps/rejected": -277.61962890625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.0455948114395142, "rewards/margins": 10.81336784362793, "rewards/rejected": -11.858963012695312, "step": 5843 }, { "epoch": 1.29, "learning_rate": 8.021244040115634e-06, "logits/chosen": -0.9439200162887573, "logits/rejected": -0.8950391411781311, "logps/chosen": -75.10243225097656, "logps/rejected": -169.81692504882812, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.2245110273361206, "rewards/margins": 4.524184226989746, "rewards/rejected": -5.748695373535156, "step": 5844 }, { "epoch": 1.29, "learning_rate": 8.019815726740801e-06, "logits/chosen": -1.0637084245681763, "logits/rejected": -1.0784140825271606, "logps/chosen": -158.98587036132812, "logps/rejected": -135.475341796875, "loss": 0.046, "rewards/accuracies": 1.0, "rewards/chosen": -3.143514394760132, "rewards/margins": 2.372912645339966, "rewards/rejected": -5.516427040100098, "step": 5845 }, { "epoch": 1.29, "learning_rate": 8.018387025326906e-06, "logits/chosen": -1.1244674921035767, "logits/rejected": -1.1378257274627686, "logps/chosen": -196.79278564453125, "logps/rejected": -211.5948486328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.476588487625122, "rewards/margins": 7.865379333496094, "rewards/rejected": -6.388791084289551, "step": 5846 }, { "epoch": 1.29, "learning_rate": 8.016957936057535e-06, "logits/chosen": -1.5658955574035645, "logits/rejected": -1.5694539546966553, "logps/chosen": -50.31121826171875, "logps/rejected": -68.50487518310547, "loss": 0.1989, "rewards/accuracies": 1.0, "rewards/chosen": -0.7049781680107117, "rewards/margins": 0.7232677340507507, "rewards/rejected": -1.4282459020614624, "step": 5847 }, { "epoch": 1.29, "learning_rate": 8.015528459116321e-06, "logits/chosen": -1.5081918239593506, "logits/rejected": -1.6067250967025757, "logps/chosen": -205.08042907714844, "logps/rejected": -170.2106475830078, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -4.605821132659912, "rewards/margins": 2.8465471267700195, "rewards/rejected": -7.452368259429932, "step": 5848 }, { "epoch": 1.29, "learning_rate": 8.014098594686951e-06, "logits/chosen": -1.265804648399353, "logits/rejected": -1.2473613023757935, "logps/chosen": -216.10092163085938, "logps/rejected": -217.0438690185547, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.2717025876045227, "rewards/margins": 4.857471942901611, "rewards/rejected": -5.129174709320068, "step": 5849 }, { "epoch": 1.29, "learning_rate": 8.012668342953155e-06, "logits/chosen": -1.5253489017486572, "logits/rejected": -1.5077767372131348, "logps/chosen": -102.27188110351562, "logps/rejected": -124.7037582397461, "loss": 0.1364, "rewards/accuracies": 1.0, "rewards/chosen": -0.4090438783168793, "rewards/margins": 1.1624664068222046, "rewards/rejected": -1.5715103149414062, "step": 5850 }, { "epoch": 1.3, "learning_rate": 8.011237704098721e-06, "logits/chosen": -1.2050367593765259, "logits/rejected": -1.181952714920044, "logps/chosen": -158.31826782226562, "logps/rejected": -159.70492553710938, "loss": 0.1575, "rewards/accuracies": 1.0, "rewards/chosen": -1.0713608264923096, "rewards/margins": 1.6732635498046875, "rewards/rejected": -2.744624376296997, "step": 5851 }, { "epoch": 1.3, "learning_rate": 8.00980667830748e-06, "logits/chosen": -1.3183186054229736, "logits/rejected": -1.3191910982131958, "logps/chosen": -187.2809600830078, "logps/rejected": -209.62612915039062, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -1.2960251569747925, "rewards/margins": 3.356001377105713, "rewards/rejected": -4.652026653289795, "step": 5852 }, { "epoch": 1.3, "learning_rate": 8.008375265763317e-06, "logits/chosen": -1.1836177110671997, "logits/rejected": -1.190131664276123, "logps/chosen": -81.3646240234375, "logps/rejected": -138.44760131835938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8043525815010071, "rewards/margins": 10.466713905334473, "rewards/rejected": -11.271066665649414, "step": 5853 }, { "epoch": 1.3, "learning_rate": 8.006943466650163e-06, "logits/chosen": -1.383773684501648, "logits/rejected": -1.3802205324172974, "logps/chosen": -100.26029968261719, "logps/rejected": -103.73046112060547, "loss": 0.2765, "rewards/accuracies": 1.0, "rewards/chosen": -1.6630516052246094, "rewards/margins": 0.30323636531829834, "rewards/rejected": -1.9662879705429077, "step": 5854 }, { "epoch": 1.3, "learning_rate": 8.005511281152004e-06, "logits/chosen": -1.3699227571487427, "logits/rejected": -1.3772705793380737, "logps/chosen": -96.80299377441406, "logps/rejected": -112.97328186035156, "loss": 0.596, "rewards/accuracies": 0.0, "rewards/chosen": -2.729771375656128, "rewards/margins": -0.82825767993927, "rewards/rejected": -1.901513695716858, "step": 5855 }, { "epoch": 1.3, "learning_rate": 8.004078709452869e-06, "logits/chosen": -1.0432900190353394, "logits/rejected": -1.0432900190353394, "logps/chosen": -174.57675170898438, "logps/rejected": -174.57675170898438, "loss": 0.3471, "rewards/accuracies": 0.0, "rewards/chosen": -5.063391208648682, "rewards/margins": 0.0, "rewards/rejected": -5.063391208648682, "step": 5856 }, { "epoch": 1.3, "learning_rate": 8.002645751736841e-06, "logits/chosen": -1.0741217136383057, "logits/rejected": -1.0395996570587158, "logps/chosen": -213.49520874023438, "logps/rejected": -239.6105499267578, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.02825927734375, "rewards/margins": 13.990202903747559, "rewards/rejected": -12.961943626403809, "step": 5857 }, { "epoch": 1.3, "learning_rate": 8.001212408188052e-06, "logits/chosen": -1.2935620546340942, "logits/rejected": -1.3370808362960815, "logps/chosen": -156.29798889160156, "logps/rejected": -112.843994140625, "loss": 0.0394, "rewards/accuracies": 1.0, "rewards/chosen": -0.31451722979545593, "rewards/margins": 2.727151393890381, "rewards/rejected": -3.041668653488159, "step": 5858 }, { "epoch": 1.3, "learning_rate": 7.999778678990685e-06, "logits/chosen": -0.9747611284255981, "logits/rejected": -0.9296733140945435, "logps/chosen": -100.12213134765625, "logps/rejected": -196.81240844726562, "loss": 0.0699, "rewards/accuracies": 1.0, "rewards/chosen": -1.5468895435333252, "rewards/margins": 2.3818717002868652, "rewards/rejected": -3.9287612438201904, "step": 5859 }, { "epoch": 1.3, "learning_rate": 7.998344564328967e-06, "logits/chosen": -1.2920018434524536, "logits/rejected": -1.227280855178833, "logps/chosen": -112.03583526611328, "logps/rejected": -119.70878601074219, "loss": 0.6914, "rewards/accuracies": 1.0, "rewards/chosen": -1.4003517627716064, "rewards/margins": 0.007030487060546875, "rewards/rejected": -1.4073822498321533, "step": 5860 }, { "epoch": 1.3, "learning_rate": 7.996910064387181e-06, "logits/chosen": -1.1962908506393433, "logits/rejected": -1.1340957880020142, "logps/chosen": -205.55856323242188, "logps/rejected": -354.8684997558594, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -2.707249402999878, "rewards/margins": 3.8157031536102295, "rewards/rejected": -6.522952556610107, "step": 5861 }, { "epoch": 1.3, "learning_rate": 7.995475179349657e-06, "logits/chosen": -1.2183494567871094, "logits/rejected": -1.2541530132293701, "logps/chosen": -121.10420227050781, "logps/rejected": -133.8094940185547, "loss": 0.2701, "rewards/accuracies": 1.0, "rewards/chosen": -0.13141785562038422, "rewards/margins": 0.35883331298828125, "rewards/rejected": -0.4902511537075043, "step": 5862 }, { "epoch": 1.3, "learning_rate": 7.994039909400773e-06, "logits/chosen": -1.619462490081787, "logits/rejected": -1.5560922622680664, "logps/chosen": -161.10977172851562, "logps/rejected": -219.92535400390625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.6182861328125, "rewards/margins": 7.062967300415039, "rewards/rejected": -8.681253433227539, "step": 5863 }, { "epoch": 1.3, "learning_rate": 7.992604254724957e-06, "logits/chosen": -0.9464390277862549, "logits/rejected": -0.9464390277862549, "logps/chosen": -135.0618438720703, "logps/rejected": -135.0618438720703, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -7.633479595184326, "rewards/margins": 0.0, "rewards/rejected": -7.633479595184326, "step": 5864 }, { "epoch": 1.3, "learning_rate": 7.991168215506688e-06, "logits/chosen": -1.0388944149017334, "logits/rejected": -1.0050472021102905, "logps/chosen": -96.34446716308594, "logps/rejected": -86.21653747558594, "loss": 1.371, "rewards/accuracies": 0.0, "rewards/chosen": -6.875221252441406, "rewards/margins": -2.6752734184265137, "rewards/rejected": -4.199947834014893, "step": 5865 }, { "epoch": 1.3, "learning_rate": 7.989731791930497e-06, "logits/chosen": -1.3669650554656982, "logits/rejected": -1.2549314498901367, "logps/chosen": -137.55319213867188, "logps/rejected": -438.05242919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3429787158966064, "rewards/margins": 11.66958236694336, "rewards/rejected": -13.012560844421387, "step": 5866 }, { "epoch": 1.3, "learning_rate": 7.988294984180956e-06, "logits/chosen": -1.4457776546478271, "logits/rejected": -1.473131537437439, "logps/chosen": -145.86619567871094, "logps/rejected": -155.13229370117188, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -2.761509656906128, "rewards/margins": 4.482693672180176, "rewards/rejected": -7.244203090667725, "step": 5867 }, { "epoch": 1.3, "learning_rate": 7.986857792442692e-06, "logits/chosen": -1.556574821472168, "logits/rejected": -1.5243922472000122, "logps/chosen": -159.7787322998047, "logps/rejected": -232.1341552734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.31750795245170593, "rewards/margins": 9.494012832641602, "rewards/rejected": -9.81152057647705, "step": 5868 }, { "epoch": 1.3, "learning_rate": 7.985420216900384e-06, "logits/chosen": -1.3618462085723877, "logits/rejected": -1.3058592081069946, "logps/chosen": -96.00515747070312, "logps/rejected": -230.59605407714844, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.2050026655197144, "rewards/margins": 4.945803165435791, "rewards/rejected": -6.150805950164795, "step": 5869 }, { "epoch": 1.3, "learning_rate": 7.983982257738752e-06, "logits/chosen": -1.498637080192566, "logits/rejected": -1.2621748447418213, "logps/chosen": -119.79337310791016, "logps/rejected": -326.32989501953125, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": -0.6689506769180298, "rewards/margins": 2.579730987548828, "rewards/rejected": -3.2486817836761475, "step": 5870 }, { "epoch": 1.3, "learning_rate": 7.982543915142575e-06, "logits/chosen": -1.7530112266540527, "logits/rejected": -1.7656774520874023, "logps/chosen": -121.57905578613281, "logps/rejected": -102.20343780517578, "loss": 0.2822, "rewards/accuracies": 1.0, "rewards/chosen": -1.660456895828247, "rewards/margins": 0.2764556407928467, "rewards/rejected": -1.9369125366210938, "step": 5871 }, { "epoch": 1.3, "learning_rate": 7.981105189296676e-06, "logits/chosen": -1.1846017837524414, "logits/rejected": -1.1586076021194458, "logps/chosen": -97.40567779541016, "logps/rejected": -190.21859741210938, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.0073082447052, "rewards/margins": 4.907297134399414, "rewards/rejected": -6.914605140686035, "step": 5872 }, { "epoch": 1.3, "learning_rate": 7.979666080385923e-06, "logits/chosen": -1.1007131338119507, "logits/rejected": -1.1007131338119507, "logps/chosen": -195.18014526367188, "logps/rejected": -195.18014526367188, "loss": 0.424, "rewards/accuracies": 0.0, "rewards/chosen": -5.964085578918457, "rewards/margins": 0.0, "rewards/rejected": -5.964085578918457, "step": 5873 }, { "epoch": 1.3, "learning_rate": 7.978226588595245e-06, "logits/chosen": -1.4731358289718628, "logits/rejected": -1.3344593048095703, "logps/chosen": -104.63081359863281, "logps/rejected": -271.6444396972656, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.3059463500976562, "rewards/margins": 3.955336093902588, "rewards/rejected": -5.261282444000244, "step": 5874 }, { "epoch": 1.3, "learning_rate": 7.976786714109608e-06, "logits/chosen": -0.8705533742904663, "logits/rejected": -0.8178645968437195, "logps/chosen": -176.8852996826172, "logps/rejected": -191.4960479736328, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.5220443606376648, "rewards/margins": 8.150049209594727, "rewards/rejected": -7.628004550933838, "step": 5875 }, { "epoch": 1.3, "learning_rate": 7.975346457114034e-06, "logits/chosen": -1.3105453252792358, "logits/rejected": -1.2742135524749756, "logps/chosen": -253.57847595214844, "logps/rejected": -231.14053344726562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.2650437355041504, "rewards/margins": 11.725883483886719, "rewards/rejected": -8.460840225219727, "step": 5876 }, { "epoch": 1.3, "learning_rate": 7.973905817793594e-06, "logits/chosen": -1.2421427965164185, "logits/rejected": -1.2895056009292603, "logps/chosen": -161.5589599609375, "logps/rejected": -135.63555908203125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -5.976595401763916, "rewards/margins": 5.438138484954834, "rewards/rejected": -11.41473388671875, "step": 5877 }, { "epoch": 1.3, "learning_rate": 7.972464796333408e-06, "logits/chosen": -1.1552467346191406, "logits/rejected": -1.2074953317642212, "logps/chosen": -220.2863311767578, "logps/rejected": -158.23373413085938, "loss": 0.1862, "rewards/accuracies": 1.0, "rewards/chosen": -2.9854233264923096, "rewards/margins": 0.7960898876190186, "rewards/rejected": -3.781513214111328, "step": 5878 }, { "epoch": 1.3, "learning_rate": 7.971023392918637e-06, "logits/chosen": -1.2120962142944336, "logits/rejected": -1.2067188024520874, "logps/chosen": -139.3795166015625, "logps/rejected": -111.67496490478516, "loss": 0.468, "rewards/accuracies": 0.0, "rewards/chosen": -3.261983633041382, "rewards/margins": -0.4375474452972412, "rewards/rejected": -2.8244361877441406, "step": 5879 }, { "epoch": 1.3, "learning_rate": 7.969581607734504e-06, "logits/chosen": -1.3325061798095703, "logits/rejected": -1.312890887260437, "logps/chosen": -110.95965576171875, "logps/rejected": -156.93295288085938, "loss": 0.0839, "rewards/accuracies": 1.0, "rewards/chosen": -1.1546516418457031, "rewards/margins": 1.700228214263916, "rewards/rejected": -2.854879856109619, "step": 5880 }, { "epoch": 1.3, "learning_rate": 7.968139440966271e-06, "logits/chosen": -1.5117675065994263, "logits/rejected": -1.4269070625305176, "logps/chosen": -158.0105743408203, "logps/rejected": -284.32867431640625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.1178131103515625, "rewards/margins": 5.550086975097656, "rewards/rejected": -6.667900085449219, "step": 5881 }, { "epoch": 1.3, "learning_rate": 7.966696892799257e-06, "logits/chosen": -1.517702579498291, "logits/rejected": -1.509402871131897, "logps/chosen": -121.21842956542969, "logps/rejected": -105.40322875976562, "loss": 0.4349, "rewards/accuracies": 1.0, "rewards/chosen": -2.833975315093994, "rewards/margins": 0.05023503303527832, "rewards/rejected": -2.8842103481292725, "step": 5882 }, { "epoch": 1.3, "learning_rate": 7.965253963418825e-06, "logits/chosen": -1.4472852945327759, "logits/rejected": -1.4689263105392456, "logps/chosen": -208.10357666015625, "logps/rejected": -220.16578674316406, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.203869581222534, "rewards/margins": 6.375129699707031, "rewards/rejected": -8.578999519348145, "step": 5883 }, { "epoch": 1.3, "learning_rate": 7.963810653010385e-06, "logits/chosen": -1.2791355848312378, "logits/rejected": -1.2618367671966553, "logps/chosen": -152.8453826904297, "logps/rejected": -194.39840698242188, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -2.1262528896331787, "rewards/margins": 4.415185928344727, "rewards/rejected": -6.541438579559326, "step": 5884 }, { "epoch": 1.3, "learning_rate": 7.962366961759402e-06, "logits/chosen": -1.356882929801941, "logits/rejected": -1.4354832172393799, "logps/chosen": -149.38491821289062, "logps/rejected": -116.2374496459961, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -2.2118728160858154, "rewards/margins": 4.277880668640137, "rewards/rejected": -6.489753246307373, "step": 5885 }, { "epoch": 1.3, "learning_rate": 7.960922889851386e-06, "logits/chosen": -1.2881731986999512, "logits/rejected": -1.2740931510925293, "logps/chosen": -114.15621948242188, "logps/rejected": -187.60733032226562, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -1.6073410511016846, "rewards/margins": 6.431096076965332, "rewards/rejected": -8.038436889648438, "step": 5886 }, { "epoch": 1.3, "learning_rate": 7.959478437471894e-06, "logits/chosen": -1.2889727354049683, "logits/rejected": -0.7773368954658508, "logps/chosen": -164.5621337890625, "logps/rejected": -401.54046630859375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.015853404998779, "rewards/margins": 26.551015853881836, "rewards/rejected": -31.566869735717773, "step": 5887 }, { "epoch": 1.3, "learning_rate": 7.95803360480654e-06, "logits/chosen": -0.8755329847335815, "logits/rejected": -0.8736719489097595, "logps/chosen": -71.76978302001953, "logps/rejected": -70.13601684570312, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -2.8383629322052, "rewards/margins": 2.808095693588257, "rewards/rejected": -5.646458625793457, "step": 5888 }, { "epoch": 1.3, "learning_rate": 7.956588392040978e-06, "logits/chosen": -1.0249191522598267, "logits/rejected": -1.0551115274429321, "logps/chosen": -119.29183959960938, "logps/rejected": -109.16674041748047, "loss": 0.1886, "rewards/accuracies": 1.0, "rewards/chosen": -5.915666580200195, "rewards/margins": 1.4986653327941895, "rewards/rejected": -7.414331912994385, "step": 5889 }, { "epoch": 1.3, "learning_rate": 7.955142799360914e-06, "logits/chosen": -1.2127797603607178, "logits/rejected": -1.2057369947433472, "logps/chosen": -252.1214599609375, "logps/rejected": -262.93536376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.47598573565483093, "rewards/margins": 14.766111373901367, "rewards/rejected": -14.290125846862793, "step": 5890 }, { "epoch": 1.3, "learning_rate": 7.953696826952106e-06, "logits/chosen": -1.4586514234542847, "logits/rejected": -1.4356181621551514, "logps/chosen": -68.802001953125, "logps/rejected": -145.0501708984375, "loss": 0.3499, "rewards/accuracies": 1.0, "rewards/chosen": -2.103973150253296, "rewards/margins": 4.994714736938477, "rewards/rejected": -7.098687648773193, "step": 5891 }, { "epoch": 1.3, "learning_rate": 7.952250475000354e-06, "logits/chosen": -1.167447805404663, "logits/rejected": -1.14999520778656, "logps/chosen": -138.0447998046875, "logps/rejected": -217.41049194335938, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -5.555768013000488, "rewards/margins": 3.1199750900268555, "rewards/rejected": -8.675743103027344, "step": 5892 }, { "epoch": 1.3, "learning_rate": 7.950803743691516e-06, "logits/chosen": -1.1744879484176636, "logits/rejected": -1.1260285377502441, "logps/chosen": -124.06651306152344, "logps/rejected": -128.53237915039062, "loss": 0.2922, "rewards/accuracies": 1.0, "rewards/chosen": -4.695775032043457, "rewards/margins": 0.2962784767150879, "rewards/rejected": -4.992053508758545, "step": 5893 }, { "epoch": 1.3, "learning_rate": 7.949356633211487e-06, "logits/chosen": -1.2815210819244385, "logits/rejected": -1.3144015073776245, "logps/chosen": -83.88372802734375, "logps/rejected": -87.9896240234375, "loss": 0.4042, "rewards/accuracies": 0.0, "rewards/chosen": -1.8085167407989502, "rewards/margins": -0.21846389770507812, "rewards/rejected": -1.590052843093872, "step": 5894 }, { "epoch": 1.3, "learning_rate": 7.947909143746221e-06, "logits/chosen": -1.5396039485931396, "logits/rejected": -1.621346116065979, "logps/chosen": -188.87503051757812, "logps/rejected": -164.2288360595703, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -3.0256271362304688, "rewards/margins": 5.7726240158081055, "rewards/rejected": -8.798251152038574, "step": 5895 }, { "epoch": 1.31, "learning_rate": 7.946461275481719e-06, "logits/chosen": -1.385786771774292, "logits/rejected": -1.385786771774292, "logps/chosen": -204.2930450439453, "logps/rejected": -204.2930450439453, "loss": 0.3471, "rewards/accuracies": 0.0, "rewards/chosen": -4.2144975662231445, "rewards/margins": 0.0, "rewards/rejected": -4.2144975662231445, "step": 5896 }, { "epoch": 1.31, "learning_rate": 7.945013028604026e-06, "logits/chosen": -1.3650976419448853, "logits/rejected": -1.3242013454437256, "logps/chosen": -155.22552490234375, "logps/rejected": -210.3240509033203, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.4153198301792145, "rewards/margins": 8.538559913635254, "rewards/rejected": -8.953879356384277, "step": 5897 }, { "epoch": 1.31, "learning_rate": 7.943564403299238e-06, "logits/chosen": -1.4194109439849854, "logits/rejected": -1.4130045175552368, "logps/chosen": -73.5158462524414, "logps/rejected": -119.18194580078125, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -3.567152738571167, "rewards/margins": 6.254510879516602, "rewards/rejected": -9.821663856506348, "step": 5898 }, { "epoch": 1.31, "learning_rate": 7.9421153997535e-06, "logits/chosen": -1.4661363363265991, "logits/rejected": -1.4560643434524536, "logps/chosen": -175.615234375, "logps/rejected": -82.5280990600586, "loss": 1.8173, "rewards/accuracies": 0.0, "rewards/chosen": -9.940299034118652, "rewards/margins": -3.5638318061828613, "rewards/rejected": -6.376467227935791, "step": 5899 }, { "epoch": 1.31, "learning_rate": 7.940666018153004e-06, "logits/chosen": -1.2352834939956665, "logits/rejected": -1.1590534448623657, "logps/chosen": -95.52857971191406, "logps/rejected": -216.19876098632812, "loss": 0.0965, "rewards/accuracies": 1.0, "rewards/chosen": -1.3741852045059204, "rewards/margins": 1.546855092048645, "rewards/rejected": -2.9210402965545654, "step": 5900 }, { "epoch": 1.31, "learning_rate": 7.939216258683997e-06, "logits/chosen": -1.3913947343826294, "logits/rejected": -1.4171240329742432, "logps/chosen": -90.78546142578125, "logps/rejected": -93.0201416015625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.5014686584472656, "rewards/margins": 4.806931495666504, "rewards/rejected": -7.3084001541137695, "step": 5901 }, { "epoch": 1.31, "learning_rate": 7.937766121532766e-06, "logits/chosen": -1.6865334510803223, "logits/rejected": -1.6405763626098633, "logps/chosen": -140.48065185546875, "logps/rejected": -247.38336181640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.456381320953369, "rewards/margins": 7.998318195343018, "rewards/rejected": -10.454699516296387, "step": 5902 }, { "epoch": 1.31, "learning_rate": 7.936315606885649e-06, "logits/chosen": -1.4719282388687134, "logits/rejected": -1.4929101467132568, "logps/chosen": -116.84149169921875, "logps/rejected": -166.62335205078125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7668030261993408, "rewards/margins": 7.855496406555176, "rewards/rejected": -9.622299194335938, "step": 5903 }, { "epoch": 1.31, "learning_rate": 7.934864714929036e-06, "logits/chosen": -1.2986944913864136, "logits/rejected": -1.2953627109527588, "logps/chosen": -86.78813171386719, "logps/rejected": -95.77360534667969, "loss": 0.0608, "rewards/accuracies": 1.0, "rewards/chosen": -2.4006545543670654, "rewards/margins": 2.0484015941619873, "rewards/rejected": -4.449056148529053, "step": 5904 }, { "epoch": 1.31, "learning_rate": 7.933413445849361e-06, "logits/chosen": -1.3516658544540405, "logits/rejected": -1.2666919231414795, "logps/chosen": -196.58560180664062, "logps/rejected": -187.63768005371094, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 1.4346160888671875, "rewards/margins": 7.173887729644775, "rewards/rejected": -5.739271640777588, "step": 5905 }, { "epoch": 1.31, "learning_rate": 7.931961799833112e-06, "logits/chosen": -1.3586808443069458, "logits/rejected": -1.3052068948745728, "logps/chosen": -117.5418701171875, "logps/rejected": -239.42918395996094, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -3.230616807937622, "rewards/margins": 4.6483154296875, "rewards/rejected": -7.878932476043701, "step": 5906 }, { "epoch": 1.31, "learning_rate": 7.930509777066819e-06, "logits/chosen": -1.5922226905822754, "logits/rejected": -1.6095478534698486, "logps/chosen": -127.323974609375, "logps/rejected": -105.40856170654297, "loss": 0.2468, "rewards/accuracies": 1.0, "rewards/chosen": -4.7809882164001465, "rewards/margins": 0.4495811462402344, "rewards/rejected": -5.230569362640381, "step": 5907 }, { "epoch": 1.31, "learning_rate": 7.929057377737064e-06, "logits/chosen": -1.6321029663085938, "logits/rejected": -1.6321864128112793, "logps/chosen": -102.36073303222656, "logps/rejected": -75.36920166015625, "loss": 0.6959, "rewards/accuracies": 0.0, "rewards/chosen": -6.519652843475342, "rewards/margins": -0.010997772216796875, "rewards/rejected": -6.508655071258545, "step": 5908 }, { "epoch": 1.31, "learning_rate": 7.92760460203048e-06, "logits/chosen": -1.0728466510772705, "logits/rejected": -1.0703550577163696, "logps/chosen": -94.42646026611328, "logps/rejected": -170.94677734375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.4690109193325043, "rewards/margins": 6.639273166656494, "rewards/rejected": -7.108283996582031, "step": 5909 }, { "epoch": 1.31, "learning_rate": 7.926151450133738e-06, "logits/chosen": -1.277841329574585, "logits/rejected": -1.2616171836853027, "logps/chosen": -111.18317413330078, "logps/rejected": -94.54358673095703, "loss": 0.3212, "rewards/accuracies": 1.0, "rewards/chosen": -2.1249237060546875, "rewards/margins": 0.1040198802947998, "rewards/rejected": -2.2289435863494873, "step": 5910 }, { "epoch": 1.31, "learning_rate": 7.924697922233571e-06, "logits/chosen": -1.1497293710708618, "logits/rejected": -1.1497293710708618, "logps/chosen": -137.9325408935547, "logps/rejected": -137.9325408935547, "loss": 0.3508, "rewards/accuracies": 0.0, "rewards/chosen": -6.8090996742248535, "rewards/margins": 0.0, "rewards/rejected": -6.8090996742248535, "step": 5911 }, { "epoch": 1.31, "learning_rate": 7.923244018516751e-06, "logits/chosen": -1.1038893461227417, "logits/rejected": -1.1485767364501953, "logps/chosen": -263.63006591796875, "logps/rejected": -366.52020263671875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1743592023849487, "rewards/margins": 6.398504734039307, "rewards/rejected": -7.572864055633545, "step": 5912 }, { "epoch": 1.31, "learning_rate": 7.921789739170102e-06, "logits/chosen": -1.3788986206054688, "logits/rejected": -1.0307459831237793, "logps/chosen": -121.18424987792969, "logps/rejected": -710.6488037109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.570180654525757, "rewards/margins": 52.72991180419922, "rewards/rejected": -56.30009078979492, "step": 5913 }, { "epoch": 1.31, "learning_rate": 7.920335084380497e-06, "logits/chosen": -1.5860695838928223, "logits/rejected": -1.578177809715271, "logps/chosen": -194.38209533691406, "logps/rejected": -174.09043884277344, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": -4.082040309906006, "rewards/margins": 2.6099328994750977, "rewards/rejected": -6.6919732093811035, "step": 5914 }, { "epoch": 1.31, "learning_rate": 7.918880054334853e-06, "logits/chosen": -1.2973662614822388, "logits/rejected": -1.2973662614822388, "logps/chosen": -151.17510986328125, "logps/rejected": -151.17510986328125, "loss": 0.359, "rewards/accuracies": 0.0, "rewards/chosen": -5.775152683258057, "rewards/margins": 0.0, "rewards/rejected": -5.775152683258057, "step": 5915 }, { "epoch": 1.31, "learning_rate": 7.91742464922014e-06, "logits/chosen": -1.5364419221878052, "logits/rejected": -1.5286799669265747, "logps/chosen": -100.81501007080078, "logps/rejected": -102.93150329589844, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -2.6217308044433594, "rewards/margins": 4.202583312988281, "rewards/rejected": -6.824314117431641, "step": 5916 }, { "epoch": 1.31, "learning_rate": 7.915968869223372e-06, "logits/chosen": -1.2034142017364502, "logits/rejected": -1.19667649269104, "logps/chosen": -287.269775390625, "logps/rejected": -229.99395751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.5102173089981079, "rewards/margins": 9.69163990020752, "rewards/rejected": -9.181422233581543, "step": 5917 }, { "epoch": 1.31, "learning_rate": 7.914512714531612e-06, "logits/chosen": -1.4875106811523438, "logits/rejected": -1.4418541193008423, "logps/chosen": -130.39999389648438, "logps/rejected": -127.04512786865234, "loss": 0.1906, "rewards/accuracies": 1.0, "rewards/chosen": -1.3882538080215454, "rewards/margins": 0.7687019109725952, "rewards/rejected": -2.1569557189941406, "step": 5918 }, { "epoch": 1.31, "learning_rate": 7.913056185331978e-06, "logits/chosen": -1.4146265983581543, "logits/rejected": -1.4146265983581543, "logps/chosen": -174.80149841308594, "logps/rejected": -174.80149841308594, "loss": 0.3491, "rewards/accuracies": 0.0, "rewards/chosen": -5.109200954437256, "rewards/margins": 0.0, "rewards/rejected": -5.109200954437256, "step": 5919 }, { "epoch": 1.31, "learning_rate": 7.911599281811624e-06, "logits/chosen": -1.1395342350006104, "logits/rejected": -1.2359228134155273, "logps/chosen": -118.43643188476562, "logps/rejected": -123.54037475585938, "loss": 0.3573, "rewards/accuracies": 0.0, "rewards/chosen": -3.4977707862854004, "rewards/margins": -0.039124250411987305, "rewards/rejected": -3.458646535873413, "step": 5920 }, { "epoch": 1.31, "learning_rate": 7.910142004157762e-06, "logits/chosen": -1.2550146579742432, "logits/rejected": -1.2050210237503052, "logps/chosen": -152.62490844726562, "logps/rejected": -208.3721923828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.3447509706020355, "rewards/margins": 8.034266471862793, "rewards/rejected": -8.37901782989502, "step": 5921 }, { "epoch": 1.31, "learning_rate": 7.90868435255765e-06, "logits/chosen": -1.3085181713104248, "logits/rejected": -1.3588241338729858, "logps/chosen": -152.44906616210938, "logps/rejected": -152.8203887939453, "loss": 0.2965, "rewards/accuracies": 1.0, "rewards/chosen": -4.599483489990234, "rewards/margins": 0.21221542358398438, "rewards/rejected": -4.811698913574219, "step": 5922 }, { "epoch": 1.31, "learning_rate": 7.90722632719859e-06, "logits/chosen": -1.5892348289489746, "logits/rejected": -1.6016197204589844, "logps/chosen": -127.08748626708984, "logps/rejected": -193.96881103515625, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -2.170861005783081, "rewards/margins": 6.699639320373535, "rewards/rejected": -8.870500564575195, "step": 5923 }, { "epoch": 1.31, "learning_rate": 7.905767928267936e-06, "logits/chosen": -1.4608676433563232, "logits/rejected": -1.4829206466674805, "logps/chosen": -149.9047393798828, "logps/rejected": -137.79225158691406, "loss": 0.2052, "rewards/accuracies": 1.0, "rewards/chosen": -3.15165638923645, "rewards/margins": 0.6784248352050781, "rewards/rejected": -3.8300812244415283, "step": 5924 }, { "epoch": 1.31, "learning_rate": 7.904309155953087e-06, "logits/chosen": -0.9783840179443359, "logits/rejected": -0.6139146685600281, "logps/chosen": -198.93777465820312, "logps/rejected": -417.99542236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.5727325677871704, "rewards/margins": 23.443387985229492, "rewards/rejected": -22.870655059814453, "step": 5925 }, { "epoch": 1.31, "learning_rate": 7.902850010441494e-06, "logits/chosen": -1.7666828632354736, "logits/rejected": -1.7728121280670166, "logps/chosen": -165.13339233398438, "logps/rejected": -184.90328979492188, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.03974304348230362, "rewards/margins": 11.965020179748535, "rewards/rejected": -11.925276756286621, "step": 5926 }, { "epoch": 1.31, "learning_rate": 7.901390491920655e-06, "logits/chosen": -1.1113754510879517, "logits/rejected": -1.1170275211334229, "logps/chosen": -125.78338623046875, "logps/rejected": -229.55453491210938, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.384871006011963, "rewards/margins": 6.011966228485107, "rewards/rejected": -8.39683723449707, "step": 5927 }, { "epoch": 1.31, "learning_rate": 7.899930600578112e-06, "logits/chosen": -1.0157746076583862, "logits/rejected": -0.9728959798812866, "logps/chosen": -122.71768188476562, "logps/rejected": -135.19007873535156, "loss": 0.0976, "rewards/accuracies": 1.0, "rewards/chosen": -4.492211818695068, "rewards/margins": 2.091279983520508, "rewards/rejected": -6.583491802215576, "step": 5928 }, { "epoch": 1.31, "learning_rate": 7.898470336601456e-06, "logits/chosen": -1.340032935142517, "logits/rejected": -1.363080382347107, "logps/chosen": -153.2122802734375, "logps/rejected": -149.89224243164062, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": -2.57187819480896, "rewards/margins": 2.5940053462982178, "rewards/rejected": -5.165883541107178, "step": 5929 }, { "epoch": 1.31, "learning_rate": 7.897009700178331e-06, "logits/chosen": -1.5546516180038452, "logits/rejected": -1.5546516180038452, "logps/chosen": -209.982666015625, "logps/rejected": -209.982666015625, "loss": 0.3474, "rewards/accuracies": 0.0, "rewards/chosen": -4.660177707672119, "rewards/margins": 0.0, "rewards/rejected": -4.660177707672119, "step": 5930 }, { "epoch": 1.31, "learning_rate": 7.895548691496421e-06, "logits/chosen": -1.2613065242767334, "logits/rejected": -1.2973171472549438, "logps/chosen": -138.49876403808594, "logps/rejected": -194.24728393554688, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -1.9691437482833862, "rewards/margins": 5.885244846343994, "rewards/rejected": -7.85438871383667, "step": 5931 }, { "epoch": 1.31, "learning_rate": 7.894087310743468e-06, "logits/chosen": -1.3269039392471313, "logits/rejected": -1.3327637910842896, "logps/chosen": -146.88339233398438, "logps/rejected": -215.15399169921875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.0501495599746704, "rewards/margins": 9.690629959106445, "rewards/rejected": -10.740779876708984, "step": 5932 }, { "epoch": 1.31, "learning_rate": 7.892625558107252e-06, "logits/chosen": -0.9991261959075928, "logits/rejected": -0.968361496925354, "logps/chosen": -192.76438903808594, "logps/rejected": -216.973388671875, "loss": 0.0643, "rewards/accuracies": 1.0, "rewards/chosen": 0.45491334795951843, "rewards/margins": 13.461265563964844, "rewards/rejected": -13.006352424621582, "step": 5933 }, { "epoch": 1.31, "learning_rate": 7.891163433775605e-06, "logits/chosen": -1.480406403541565, "logits/rejected": -1.479713797569275, "logps/chosen": -117.69844055175781, "logps/rejected": -135.90338134765625, "loss": 0.364, "rewards/accuracies": 0.0, "rewards/chosen": -2.6909148693084717, "rewards/margins": -0.06813573837280273, "rewards/rejected": -2.622779130935669, "step": 5934 }, { "epoch": 1.31, "learning_rate": 7.889700937936408e-06, "logits/chosen": -1.6881389617919922, "logits/rejected": -1.708516240119934, "logps/chosen": -118.99057006835938, "logps/rejected": -210.29998779296875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.4863221645355225, "rewards/margins": 7.673480033874512, "rewards/rejected": -10.159802436828613, "step": 5935 }, { "epoch": 1.31, "learning_rate": 7.888238070777586e-06, "logits/chosen": -1.365250825881958, "logits/rejected": -1.3623476028442383, "logps/chosen": -61.661888122558594, "logps/rejected": -86.60232543945312, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/chosen": 0.5925941467285156, "rewards/margins": 2.7122154235839844, "rewards/rejected": -2.1196212768554688, "step": 5936 }, { "epoch": 1.31, "learning_rate": 7.886774832487116e-06, "logits/chosen": -1.2261443138122559, "logits/rejected": -1.1928412914276123, "logps/chosen": -81.0995101928711, "logps/rejected": -122.08202362060547, "loss": 0.3173, "rewards/accuracies": 1.0, "rewards/chosen": -4.688095569610596, "rewards/margins": 0.16738605499267578, "rewards/rejected": -4.8554816246032715, "step": 5937 }, { "epoch": 1.31, "learning_rate": 7.885311223253018e-06, "logits/chosen": -1.2954071760177612, "logits/rejected": -1.2954071760177612, "logps/chosen": -229.24530029296875, "logps/rejected": -229.24530029296875, "loss": 0.3477, "rewards/accuracies": 0.0, "rewards/chosen": -6.398171901702881, "rewards/margins": 0.0, "rewards/rejected": -6.398171901702881, "step": 5938 }, { "epoch": 1.31, "learning_rate": 7.883847243263366e-06, "logits/chosen": -1.5749874114990234, "logits/rejected": -1.500370979309082, "logps/chosen": -173.82969665527344, "logps/rejected": -191.3929443359375, "loss": 3.082, "rewards/accuracies": 0.0, "rewards/chosen": -6.147244453430176, "rewards/margins": -6.158870220184326, "rewards/rejected": 0.01162567175924778, "step": 5939 }, { "epoch": 1.31, "learning_rate": 7.882382892706273e-06, "logits/chosen": -1.1689010858535767, "logits/rejected": -0.6147762537002563, "logps/chosen": -110.14450073242188, "logps/rejected": -693.5379638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.39324188232421875, "rewards/margins": 51.92093276977539, "rewards/rejected": -52.31417465209961, "step": 5940 }, { "epoch": 1.31, "learning_rate": 7.88091817176991e-06, "logits/chosen": -1.0943925380706787, "logits/rejected": -1.095357894897461, "logps/chosen": -234.90977478027344, "logps/rejected": -241.6951446533203, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 2.7714736461639404, "rewards/margins": 15.941047668457031, "rewards/rejected": -13.169573783874512, "step": 5941 }, { "epoch": 1.32, "learning_rate": 7.879453080642486e-06, "logits/chosen": -1.3520370721817017, "logits/rejected": -1.297682762145996, "logps/chosen": -160.31564331054688, "logps/rejected": -192.95875549316406, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.7076050043106079, "rewards/margins": 2.1453676223754883, "rewards/rejected": -1.4377624988555908, "step": 5942 }, { "epoch": 1.32, "learning_rate": 7.877987619512263e-06, "logits/chosen": -1.3938472270965576, "logits/rejected": -1.085801362991333, "logps/chosen": -164.5630645751953, "logps/rejected": -277.8546447753906, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 0.03790435940027237, "rewards/margins": 17.521682739257812, "rewards/rejected": -17.48377799987793, "step": 5943 }, { "epoch": 1.32, "learning_rate": 7.87652178856755e-06, "logits/chosen": -1.396061897277832, "logits/rejected": -1.357845664024353, "logps/chosen": -141.64219665527344, "logps/rejected": -235.4678192138672, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -5.122862339019775, "rewards/margins": 4.509667873382568, "rewards/rejected": -9.632530212402344, "step": 5944 }, { "epoch": 1.32, "learning_rate": 7.875055587996703e-06, "logits/chosen": -1.5908503532409668, "logits/rejected": -1.5824646949768066, "logps/chosen": -127.14649963378906, "logps/rejected": -169.0509490966797, "loss": 0.2739, "rewards/accuracies": 1.0, "rewards/chosen": -1.5042259693145752, "rewards/margins": 6.5884246826171875, "rewards/rejected": -8.092650413513184, "step": 5945 }, { "epoch": 1.32, "learning_rate": 7.873589017988124e-06, "logits/chosen": -1.3521649837493896, "logits/rejected": -1.3521649837493896, "logps/chosen": -213.06878662109375, "logps/rejected": -213.06878662109375, "loss": 0.3475, "rewards/accuracies": 0.0, "rewards/chosen": -9.513996124267578, "rewards/margins": 0.0, "rewards/rejected": -9.513996124267578, "step": 5946 }, { "epoch": 1.32, "learning_rate": 7.872122078730263e-06, "logits/chosen": -1.6761847734451294, "logits/rejected": -1.5729633569717407, "logps/chosen": -134.81959533691406, "logps/rejected": -266.20281982421875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.690603017807007, "rewards/margins": 5.968044281005859, "rewards/rejected": -9.658647537231445, "step": 5947 }, { "epoch": 1.32, "learning_rate": 7.87065477041162e-06, "logits/chosen": -1.186309576034546, "logits/rejected": -1.1194781064987183, "logps/chosen": -88.97904968261719, "logps/rejected": -212.210693359375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.5358413457870483, "rewards/margins": 10.131927490234375, "rewards/rejected": -11.667768478393555, "step": 5948 }, { "epoch": 1.32, "learning_rate": 7.86918709322074e-06, "logits/chosen": -1.3161208629608154, "logits/rejected": -1.3347924947738647, "logps/chosen": -117.41886901855469, "logps/rejected": -166.8895721435547, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.758612871170044, "rewards/margins": 6.789542198181152, "rewards/rejected": -8.548154830932617, "step": 5949 }, { "epoch": 1.32, "learning_rate": 7.867719047346216e-06, "logits/chosen": -1.344866394996643, "logits/rejected": -1.2194093465805054, "logps/chosen": -162.29981994628906, "logps/rejected": -280.5404052734375, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": -2.2267684936523438, "rewards/margins": 2.128814697265625, "rewards/rejected": -4.355583190917969, "step": 5950 }, { "epoch": 1.32, "learning_rate": 7.86625063297669e-06, "logits/chosen": -1.5047694444656372, "logits/rejected": -1.5047694444656372, "logps/chosen": -147.0618896484375, "logps/rejected": -147.0618896484375, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.2096710205078125, "rewards/margins": 0.0, "rewards/rejected": -4.2096710205078125, "step": 5951 }, { "epoch": 1.32, "learning_rate": 7.864781850300844e-06, "logits/chosen": -1.2735655307769775, "logits/rejected": -1.2246284484863281, "logps/chosen": -151.6876983642578, "logps/rejected": -228.714111328125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -3.4455108642578125, "rewards/margins": 5.985496520996094, "rewards/rejected": -9.431007385253906, "step": 5952 }, { "epoch": 1.32, "learning_rate": 7.863312699507419e-06, "logits/chosen": -1.0637246370315552, "logits/rejected": -0.9923622608184814, "logps/chosen": -116.56034851074219, "logps/rejected": -222.15682983398438, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.5153442621231079, "rewards/margins": 7.615087985992432, "rewards/rejected": -8.13043212890625, "step": 5953 }, { "epoch": 1.32, "learning_rate": 7.861843180785196e-06, "logits/chosen": -1.151272177696228, "logits/rejected": -1.0732334852218628, "logps/chosen": -122.56468200683594, "logps/rejected": -307.91180419921875, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -2.9378952980041504, "rewards/margins": 3.9318251609802246, "rewards/rejected": -6.869720458984375, "step": 5954 }, { "epoch": 1.32, "learning_rate": 7.860373294323002e-06, "logits/chosen": -1.5418460369110107, "logits/rejected": -1.5639848709106445, "logps/chosen": -126.10855102539062, "logps/rejected": -103.01570129394531, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -3.0474693775177, "rewards/margins": 4.703281402587891, "rewards/rejected": -7.75075101852417, "step": 5955 }, { "epoch": 1.32, "learning_rate": 7.858903040309717e-06, "logits/chosen": -1.2845903635025024, "logits/rejected": -1.3214114904403687, "logps/chosen": -120.41289520263672, "logps/rejected": -73.14710235595703, "loss": 0.5098, "rewards/accuracies": 0.0, "rewards/chosen": -5.717482566833496, "rewards/margins": -0.5537109375, "rewards/rejected": -5.163771629333496, "step": 5956 }, { "epoch": 1.32, "learning_rate": 7.857432418934264e-06, "logits/chosen": -1.162543773651123, "logits/rejected": -1.1039811372756958, "logps/chosen": -118.93208312988281, "logps/rejected": -189.26004028320312, "loss": 0.0523, "rewards/accuracies": 1.0, "rewards/chosen": 1.2944320440292358, "rewards/margins": 5.951911926269531, "rewards/rejected": -4.657479763031006, "step": 5957 }, { "epoch": 1.32, "learning_rate": 7.855961430385615e-06, "logits/chosen": -0.9848499894142151, "logits/rejected": -1.0609400272369385, "logps/chosen": -139.829833984375, "logps/rejected": -176.16693115234375, "loss": 0.0737, "rewards/accuracies": 1.0, "rewards/chosen": -6.800144195556641, "rewards/margins": 1.839564323425293, "rewards/rejected": -8.639708518981934, "step": 5958 }, { "epoch": 1.32, "learning_rate": 7.854490074852784e-06, "logits/chosen": -1.4410548210144043, "logits/rejected": -1.4575918912887573, "logps/chosen": -112.28407287597656, "logps/rejected": -92.21733093261719, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -4.5885210037231445, "rewards/margins": 2.633500576019287, "rewards/rejected": -7.222021579742432, "step": 5959 }, { "epoch": 1.32, "learning_rate": 7.853018352524845e-06, "logits/chosen": -1.4595749378204346, "logits/rejected": -1.5200016498565674, "logps/chosen": -162.71539306640625, "logps/rejected": -233.1930694580078, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.36763307452201843, "rewards/margins": 9.635807991027832, "rewards/rejected": -9.26817512512207, "step": 5960 }, { "epoch": 1.32, "learning_rate": 7.851546263590905e-06, "logits/chosen": -1.274163842201233, "logits/rejected": -1.3018637895584106, "logps/chosen": -76.15594482421875, "logps/rejected": -84.38821411132812, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.7186416983604431, "rewards/margins": 6.484702110290527, "rewards/rejected": -5.7660603523254395, "step": 5961 }, { "epoch": 1.32, "learning_rate": 7.850073808240125e-06, "logits/chosen": -1.2452718019485474, "logits/rejected": -1.1985552310943604, "logps/chosen": -90.8306884765625, "logps/rejected": -185.78257751464844, "loss": 0.3478, "rewards/accuracies": 1.0, "rewards/chosen": -1.010475993156433, "rewards/margins": 5.991302490234375, "rewards/rejected": -7.001778602600098, "step": 5962 }, { "epoch": 1.32, "learning_rate": 7.84860098666171e-06, "logits/chosen": -1.1341279745101929, "logits/rejected": -1.1199564933776855, "logps/chosen": -86.58386993408203, "logps/rejected": -93.556640625, "loss": 0.2151, "rewards/accuracies": 1.0, "rewards/chosen": -2.9717137813568115, "rewards/margins": 0.6207809448242188, "rewards/rejected": -3.5924947261810303, "step": 5963 }, { "epoch": 1.32, "learning_rate": 7.847127799044918e-06, "logits/chosen": -1.457167148590088, "logits/rejected": -0.879267156124115, "logps/chosen": -95.50570678710938, "logps/rejected": -989.7263793945312, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.028751373291015625, "rewards/margins": 81.72589111328125, "rewards/rejected": -81.754638671875, "step": 5964 }, { "epoch": 1.32, "learning_rate": 7.845654245579047e-06, "logits/chosen": -0.8957621455192566, "logits/rejected": -0.8957621455192566, "logps/chosen": -106.48320007324219, "logps/rejected": -106.48320007324219, "loss": 0.3471, "rewards/accuracies": 0.0, "rewards/chosen": -4.124847412109375, "rewards/margins": 0.0, "rewards/rejected": -4.124847412109375, "step": 5965 }, { "epoch": 1.32, "learning_rate": 7.844180326453447e-06, "logits/chosen": -0.7354082465171814, "logits/rejected": -0.7764208912849426, "logps/chosen": -164.58505249023438, "logps/rejected": -220.8929901123047, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": 2.8874619007110596, "rewards/margins": 3.975726366043091, "rewards/rejected": -1.0882644653320312, "step": 5966 }, { "epoch": 1.32, "learning_rate": 7.842706041857512e-06, "logits/chosen": -1.1413428783416748, "logits/rejected": -1.1575196981430054, "logps/chosen": -167.2850341796875, "logps/rejected": -174.76583862304688, "loss": 0.3462, "rewards/accuracies": 1.0, "rewards/chosen": -5.215342044830322, "rewards/margins": 0.0013480186462402344, "rewards/rejected": -5.2166900634765625, "step": 5967 }, { "epoch": 1.32, "learning_rate": 7.841231391980687e-06, "logits/chosen": -1.4511864185333252, "logits/rejected": -1.5622875690460205, "logps/chosen": -152.27127075195312, "logps/rejected": -158.09426879882812, "loss": 0.1698, "rewards/accuracies": 1.0, "rewards/chosen": -2.901844024658203, "rewards/margins": 8.574252128601074, "rewards/rejected": -11.476096153259277, "step": 5968 }, { "epoch": 1.32, "learning_rate": 7.839756377012453e-06, "logits/chosen": -1.4282481670379639, "logits/rejected": -1.5197609663009644, "logps/chosen": -145.73251342773438, "logps/rejected": -164.4082794189453, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -1.4404128789901733, "rewards/margins": 10.977005958557129, "rewards/rejected": -12.417418479919434, "step": 5969 }, { "epoch": 1.32, "learning_rate": 7.838280997142355e-06, "logits/chosen": -1.1901706457138062, "logits/rejected": -1.211977243423462, "logps/chosen": -124.31393432617188, "logps/rejected": -151.26751708984375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -2.084538221359253, "rewards/margins": 4.621421813964844, "rewards/rejected": -6.705960273742676, "step": 5970 }, { "epoch": 1.32, "learning_rate": 7.836805252559971e-06, "logits/chosen": -1.2106106281280518, "logits/rejected": -1.1733770370483398, "logps/chosen": -174.28823852539062, "logps/rejected": -222.17889404296875, "loss": 0.3851, "rewards/accuracies": 1.0, "rewards/chosen": -1.4907104969024658, "rewards/margins": 4.402128219604492, "rewards/rejected": -5.892838954925537, "step": 5971 }, { "epoch": 1.32, "learning_rate": 7.83532914345493e-06, "logits/chosen": -1.1945096254348755, "logits/rejected": -1.232672929763794, "logps/chosen": -311.44342041015625, "logps/rejected": -157.40182495117188, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.8852264881134033, "rewards/margins": 7.305549621582031, "rewards/rejected": -9.190775871276855, "step": 5972 }, { "epoch": 1.32, "learning_rate": 7.833852670016912e-06, "logits/chosen": -1.3873565196990967, "logits/rejected": -1.359053611755371, "logps/chosen": -132.75070190429688, "logps/rejected": -205.0496063232422, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -4.134471416473389, "rewards/margins": 4.58186674118042, "rewards/rejected": -8.716338157653809, "step": 5973 }, { "epoch": 1.32, "learning_rate": 7.832375832435637e-06, "logits/chosen": -1.479838490486145, "logits/rejected": -1.4937115907669067, "logps/chosen": -123.19729614257812, "logps/rejected": -138.0554656982422, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -2.7191712856292725, "rewards/margins": 2.6972548961639404, "rewards/rejected": -5.416426181793213, "step": 5974 }, { "epoch": 1.32, "learning_rate": 7.830898630900877e-06, "logits/chosen": -1.1720592975616455, "logits/rejected": -0.8651682138442993, "logps/chosen": -146.46768188476562, "logps/rejected": -793.0988159179688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.921322822570801, "rewards/margins": 51.63044357299805, "rewards/rejected": -56.55176544189453, "step": 5975 }, { "epoch": 1.32, "learning_rate": 7.829421065602448e-06, "logits/chosen": -1.317029595375061, "logits/rejected": -1.317029595375061, "logps/chosen": -164.3684539794922, "logps/rejected": -164.3684539794922, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -5.670024871826172, "rewards/margins": 0.0, "rewards/rejected": -5.670024871826172, "step": 5976 }, { "epoch": 1.32, "learning_rate": 7.827943136730214e-06, "logits/chosen": -1.1590784788131714, "logits/rejected": -1.1379278898239136, "logps/chosen": -163.28274536132812, "logps/rejected": -279.9855041503906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 5.8710784912109375, "rewards/margins": 13.8281831741333, "rewards/rejected": -7.957104682922363, "step": 5977 }, { "epoch": 1.32, "learning_rate": 7.826464844474086e-06, "logits/chosen": -1.266941785812378, "logits/rejected": -1.2494494915008545, "logps/chosen": -93.18665313720703, "logps/rejected": -178.88150024414062, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.2581321895122528, "rewards/margins": 4.852713108062744, "rewards/rejected": -5.11084508895874, "step": 5978 }, { "epoch": 1.32, "learning_rate": 7.82498618902402e-06, "logits/chosen": -0.9024856090545654, "logits/rejected": -0.8803282380104065, "logps/chosen": -209.2763671875, "logps/rejected": -324.62451171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7040328979492188, "rewards/margins": 6.0465593338012695, "rewards/rejected": -6.750592231750488, "step": 5979 }, { "epoch": 1.32, "learning_rate": 7.823507170570018e-06, "logits/chosen": -1.5271501541137695, "logits/rejected": -1.4465388059616089, "logps/chosen": -112.24099731445312, "logps/rejected": -214.66510009765625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.3564910888671875, "rewards/margins": 6.399363994598389, "rewards/rejected": -7.755855083465576, "step": 5980 }, { "epoch": 1.32, "learning_rate": 7.822027789302134e-06, "logits/chosen": -0.8910925984382629, "logits/rejected": -0.8910925984382629, "logps/chosen": -118.55134582519531, "logps/rejected": -118.55134582519531, "loss": 0.3513, "rewards/accuracies": 0.0, "rewards/chosen": -5.221032619476318, "rewards/margins": 0.0, "rewards/rejected": -5.221032619476318, "step": 5981 }, { "epoch": 1.32, "learning_rate": 7.820548045410462e-06, "logits/chosen": -1.332047462463379, "logits/rejected": -1.273883581161499, "logps/chosen": -81.17780303955078, "logps/rejected": -199.992431640625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9413337707519531, "rewards/margins": 6.094214916229248, "rewards/rejected": -7.035548686981201, "step": 5982 }, { "epoch": 1.32, "learning_rate": 7.819067939085145e-06, "logits/chosen": -1.2336477041244507, "logits/rejected": -1.2050546407699585, "logps/chosen": -209.8018798828125, "logps/rejected": -295.81817626953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5646332502365112, "rewards/margins": 8.809605598449707, "rewards/rejected": -10.374238967895508, "step": 5983 }, { "epoch": 1.32, "learning_rate": 7.817587470516378e-06, "logits/chosen": -1.0951969623565674, "logits/rejected": -0.8250383138656616, "logps/chosen": -255.49502563476562, "logps/rejected": -913.9910278320312, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": 0.3550659120082855, "rewards/margins": 75.0149917602539, "rewards/rejected": -74.65992736816406, "step": 5984 }, { "epoch": 1.32, "learning_rate": 7.816106639894392e-06, "logits/chosen": -1.366608738899231, "logits/rejected": -1.3224306106567383, "logps/chosen": -99.160888671875, "logps/rejected": -138.95779418945312, "loss": 0.1136, "rewards/accuracies": 1.0, "rewards/chosen": -1.3486312627792358, "rewards/margins": 1.3662599325180054, "rewards/rejected": -2.714891195297241, "step": 5985 }, { "epoch": 1.32, "learning_rate": 7.814625447409474e-06, "logits/chosen": -1.0489840507507324, "logits/rejected": -0.9717134833335876, "logps/chosen": -81.69313049316406, "logps/rejected": -205.11830139160156, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": -2.2513020038604736, "rewards/margins": 6.584196090698242, "rewards/rejected": -8.835497856140137, "step": 5986 }, { "epoch": 1.33, "learning_rate": 7.813143893251951e-06, "logits/chosen": -1.0737634897232056, "logits/rejected": -0.9958146810531616, "logps/chosen": -141.4678955078125, "logps/rejected": -272.04962158203125, "loss": 0.0584, "rewards/accuracies": 1.0, "rewards/chosen": -1.3729782104492188, "rewards/margins": 6.102337837219238, "rewards/rejected": -7.475316047668457, "step": 5987 }, { "epoch": 1.33, "learning_rate": 7.811661977612202e-06, "logits/chosen": -1.1990940570831299, "logits/rejected": -1.1415448188781738, "logps/chosen": -179.54486083984375, "logps/rejected": -200.0927276611328, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 1.9008620977401733, "rewards/margins": 5.3466596603393555, "rewards/rejected": -3.4457976818084717, "step": 5988 }, { "epoch": 1.33, "learning_rate": 7.810179700680646e-06, "logits/chosen": -1.0337234735488892, "logits/rejected": -1.0719636678695679, "logps/chosen": -203.15087890625, "logps/rejected": -223.67193603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.23501281440258026, "rewards/margins": 11.117416381835938, "rewards/rejected": -11.352429389953613, "step": 5989 }, { "epoch": 1.33, "learning_rate": 7.808697062647755e-06, "logits/chosen": -1.4357671737670898, "logits/rejected": -1.6318633556365967, "logps/chosen": -183.90655517578125, "logps/rejected": -156.2591094970703, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 3.762371778488159, "rewards/margins": 9.021600723266602, "rewards/rejected": -5.259228706359863, "step": 5990 }, { "epoch": 1.33, "learning_rate": 7.807214063704042e-06, "logits/chosen": -1.3993154764175415, "logits/rejected": -1.430220127105713, "logps/chosen": -125.3357925415039, "logps/rejected": -107.37484741210938, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": -0.8318451046943665, "rewards/margins": 2.098494052886963, "rewards/rejected": -2.9303390979766846, "step": 5991 }, { "epoch": 1.33, "learning_rate": 7.805730704040072e-06, "logits/chosen": -1.3112190961837769, "logits/rejected": -1.300689458847046, "logps/chosen": -129.4814453125, "logps/rejected": -185.42184448242188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.15151672065258026, "rewards/margins": 8.547614097595215, "rewards/rejected": -8.396097183227539, "step": 5992 }, { "epoch": 1.33, "learning_rate": 7.804246983846449e-06, "logits/chosen": -1.600079894065857, "logits/rejected": -1.633657455444336, "logps/chosen": -147.64219665527344, "logps/rejected": -165.47943115234375, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -1.2134567499160767, "rewards/margins": 2.7358040809631348, "rewards/rejected": -3.949260711669922, "step": 5993 }, { "epoch": 1.33, "learning_rate": 7.802762903313831e-06, "logits/chosen": -1.7209091186523438, "logits/rejected": -1.831787109375, "logps/chosen": -176.12515258789062, "logps/rejected": -264.61871337890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.12496032565832138, "rewards/margins": 11.213567733764648, "rewards/rejected": -11.088607788085938, "step": 5994 }, { "epoch": 1.33, "learning_rate": 7.80127846263292e-06, "logits/chosen": -1.5741808414459229, "logits/rejected": -1.6194208860397339, "logps/chosen": -121.08050537109375, "logps/rejected": -145.7358856201172, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.1899429559707642, "rewards/margins": 5.548615455627441, "rewards/rejected": -6.738558292388916, "step": 5995 }, { "epoch": 1.33, "learning_rate": 7.799793661994457e-06, "logits/chosen": -1.325989842414856, "logits/rejected": -1.2711832523345947, "logps/chosen": -98.79609680175781, "logps/rejected": -195.63943481445312, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.058795928955078, "rewards/margins": 5.53497314453125, "rewards/rejected": -7.593769073486328, "step": 5996 }, { "epoch": 1.33, "learning_rate": 7.79830850158924e-06, "logits/chosen": -1.345694899559021, "logits/rejected": -1.345694899559021, "logps/chosen": -76.95914459228516, "logps/rejected": -76.95914459228516, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.821272611618042, "rewards/margins": 0.0, "rewards/rejected": -2.821272611618042, "step": 5997 }, { "epoch": 1.33, "learning_rate": 7.796822981608109e-06, "logits/chosen": -1.171857237815857, "logits/rejected": -1.2155519723892212, "logps/chosen": -213.33815002441406, "logps/rejected": -166.90269470214844, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.3186477720737457, "rewards/margins": 4.8381853103637695, "rewards/rejected": -5.156833171844482, "step": 5998 }, { "epoch": 1.33, "learning_rate": 7.795337102241948e-06, "logits/chosen": -1.3103227615356445, "logits/rejected": -1.3974485397338867, "logps/chosen": -241.7919921875, "logps/rejected": -175.51480102539062, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": -3.849325656890869, "rewards/margins": 2.897261142730713, "rewards/rejected": -6.746586799621582, "step": 5999 }, { "epoch": 1.33, "learning_rate": 7.793850863681688e-06, "logits/chosen": -1.2745652198791504, "logits/rejected": -1.2358649969100952, "logps/chosen": -92.05415344238281, "logps/rejected": -144.2474822998047, "loss": 0.982, "rewards/accuracies": 1.0, "rewards/chosen": -1.3221313953399658, "rewards/margins": 3.035710096359253, "rewards/rejected": -4.357841491699219, "step": 6000 }, { "epoch": 1.33, "learning_rate": 3.6764705882352945e-08, "logits/chosen": -1.4086600542068481, "logits/rejected": -0.8874123096466064, "logps/chosen": -120.7567138671875, "logps/rejected": -1020.2146606445312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7576645016670227, "rewards/margins": 75.7879867553711, "rewards/rejected": -76.545654296875, "step": 6001 }, { "epoch": 1.33, "learning_rate": 7.352941176470589e-08, "logits/chosen": -1.2005218267440796, "logits/rejected": -1.2853593826293945, "logps/chosen": -180.49594116210938, "logps/rejected": -171.90182495117188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 3.4685425758361816, "rewards/margins": 8.118417739868164, "rewards/rejected": -4.649875164031982, "step": 6002 }, { "epoch": 1.33, "learning_rate": 1.1029411764705884e-07, "logits/chosen": -1.2842036485671997, "logits/rejected": -1.2501088380813599, "logps/chosen": -78.51339721679688, "logps/rejected": -155.9669189453125, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": -1.5727951526641846, "rewards/margins": 3.346503496170044, "rewards/rejected": -4.9192986488342285, "step": 6003 }, { "epoch": 1.33, "learning_rate": 1.4705882352941178e-07, "logits/chosen": -1.3930240869522095, "logits/rejected": -1.4819287061691284, "logps/chosen": -196.42340087890625, "logps/rejected": -156.23806762695312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 3.0099427700042725, "rewards/margins": 8.773158073425293, "rewards/rejected": -5.7632155418396, "step": 6004 }, { "epoch": 1.33, "learning_rate": 1.8382352941176472e-07, "logits/chosen": -1.1336536407470703, "logits/rejected": -0.9827350974082947, "logps/chosen": -337.6873779296875, "logps/rejected": -414.8115234375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 2.1771514415740967, "rewards/margins": 7.137585639953613, "rewards/rejected": -4.9604339599609375, "step": 6005 }, { "epoch": 1.33, "learning_rate": 2.2058823529411768e-07, "logits/chosen": -1.2275782823562622, "logits/rejected": -1.1559584140777588, "logps/chosen": -87.76309967041016, "logps/rejected": -218.32015991210938, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": 0.4060569703578949, "rewards/margins": 3.000978708267212, "rewards/rejected": -2.594921827316284, "step": 6006 }, { "epoch": 1.33, "learning_rate": 2.573529411764706e-07, "logits/chosen": -1.4216920137405396, "logits/rejected": -1.373978853225708, "logps/chosen": -146.79994201660156, "logps/rejected": -198.1862335205078, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -1.8243210315704346, "rewards/margins": 3.4497530460357666, "rewards/rejected": -5.274074077606201, "step": 6007 }, { "epoch": 1.33, "learning_rate": 2.9411764705882356e-07, "logits/chosen": -0.9326098561286926, "logits/rejected": -1.0108989477157593, "logps/chosen": -189.00628662109375, "logps/rejected": -152.61212158203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.9019333124160767, "rewards/margins": 12.31197738647461, "rewards/rejected": -10.410043716430664, "step": 6008 }, { "epoch": 1.33, "learning_rate": 3.308823529411765e-07, "logits/chosen": -1.329456090927124, "logits/rejected": -1.3291176557540894, "logps/chosen": -77.60387420654297, "logps/rejected": -88.22784423828125, "loss": 0.517, "rewards/accuracies": 1.0, "rewards/chosen": -0.5702049136161804, "rewards/margins": 0.9010719656944275, "rewards/rejected": -1.471276879310608, "step": 6009 }, { "epoch": 1.33, "learning_rate": 3.6764705882352943e-07, "logits/chosen": -1.289297342300415, "logits/rejected": -1.3153572082519531, "logps/chosen": -108.08187866210938, "logps/rejected": -136.70046997070312, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -3.237727403640747, "rewards/margins": 5.383983612060547, "rewards/rejected": -8.621710777282715, "step": 6010 }, { "epoch": 1.33, "learning_rate": 4.044117647058824e-07, "logits/chosen": -1.0834726095199585, "logits/rejected": -1.0978649854660034, "logps/chosen": -62.171695709228516, "logps/rejected": -72.17375183105469, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.4353150129318237, "rewards/margins": 4.737265110015869, "rewards/rejected": -6.172580242156982, "step": 6011 }, { "epoch": 1.33, "learning_rate": 4.4117647058823536e-07, "logits/chosen": -1.171224594116211, "logits/rejected": -1.1670606136322021, "logps/chosen": -121.2037353515625, "logps/rejected": -137.93643188476562, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -0.07750701904296875, "rewards/margins": 3.687535047531128, "rewards/rejected": -3.7650420665740967, "step": 6012 }, { "epoch": 1.33, "learning_rate": 4.779411764705882e-07, "logits/chosen": -1.2455493211746216, "logits/rejected": -1.140207052230835, "logps/chosen": -191.06301879882812, "logps/rejected": -352.9773254394531, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -0.04985656961798668, "rewards/margins": 3.1769073009490967, "rewards/rejected": -3.226763963699341, "step": 6013 }, { "epoch": 1.33, "learning_rate": 5.147058823529412e-07, "logits/chosen": -0.9626243710517883, "logits/rejected": -0.8793470859527588, "logps/chosen": -142.56443786621094, "logps/rejected": -322.9556884765625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.7067596912384033, "rewards/margins": 16.155597686767578, "rewards/rejected": -17.86235809326172, "step": 6014 }, { "epoch": 1.33, "learning_rate": 5.514705882352942e-07, "logits/chosen": -1.386385202407837, "logits/rejected": -1.439253568649292, "logps/chosen": -99.09059143066406, "logps/rejected": -91.13594818115234, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -1.7735198736190796, "rewards/margins": 3.5384392738342285, "rewards/rejected": -5.311959266662598, "step": 6015 }, { "epoch": 1.33, "learning_rate": 5.882352941176471e-07, "logits/chosen": -1.3239072561264038, "logits/rejected": -1.284907579421997, "logps/chosen": -177.61563110351562, "logps/rejected": -227.0596923828125, "loss": 0.0947, "rewards/accuracies": 1.0, "rewards/chosen": -4.896420478820801, "rewards/margins": 1.5683012008666992, "rewards/rejected": -6.4647216796875, "step": 6016 }, { "epoch": 1.33, "learning_rate": 6.25e-07, "logits/chosen": -1.3938243389129639, "logits/rejected": -1.409355640411377, "logps/chosen": -126.5093765258789, "logps/rejected": -97.00276947021484, "loss": 0.0657, "rewards/accuracies": 1.0, "rewards/chosen": -2.07236647605896, "rewards/margins": 1.963010549545288, "rewards/rejected": -4.035377025604248, "step": 6017 }, { "epoch": 1.33, "learning_rate": 6.61764705882353e-07, "logits/chosen": -1.1260381937026978, "logits/rejected": -1.1183758974075317, "logps/chosen": -125.28169250488281, "logps/rejected": -307.95513916015625, "loss": 0.4063, "rewards/accuracies": 1.0, "rewards/chosen": -4.059727668762207, "rewards/margins": 6.807154655456543, "rewards/rejected": -10.86688232421875, "step": 6018 }, { "epoch": 1.33, "learning_rate": 6.985294117647059e-07, "logits/chosen": -1.5267785787582397, "logits/rejected": -1.4783796072006226, "logps/chosen": -130.44821166992188, "logps/rejected": -259.6674499511719, "loss": 0.2911, "rewards/accuracies": 1.0, "rewards/chosen": -1.2341957092285156, "rewards/margins": 8.042450904846191, "rewards/rejected": -9.276646614074707, "step": 6019 }, { "epoch": 1.33, "learning_rate": 7.352941176470589e-07, "logits/chosen": -1.2535927295684814, "logits/rejected": -1.298227310180664, "logps/chosen": -138.48013305664062, "logps/rejected": -105.54460144042969, "loss": 0.2873, "rewards/accuracies": 1.0, "rewards/chosen": -6.479373455047607, "rewards/margins": 0.253232479095459, "rewards/rejected": -6.732605934143066, "step": 6020 }, { "epoch": 1.33, "learning_rate": 7.720588235294119e-07, "logits/chosen": -0.9998055100440979, "logits/rejected": -1.069769263267517, "logps/chosen": -192.49700927734375, "logps/rejected": -145.50332641601562, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.5231353640556335, "rewards/margins": 5.468921184539795, "rewards/rejected": -5.992056369781494, "step": 6021 }, { "epoch": 1.33, "learning_rate": 8.088235294117648e-07, "logits/chosen": -0.9210607409477234, "logits/rejected": -0.8425470590591431, "logps/chosen": -97.75303649902344, "logps/rejected": -164.20632934570312, "loss": 0.3631, "rewards/accuracies": 1.0, "rewards/chosen": -0.8411774039268494, "rewards/margins": 3.392888069152832, "rewards/rejected": -4.234065532684326, "step": 6022 }, { "epoch": 1.33, "learning_rate": 8.455882352941178e-07, "logits/chosen": -0.9340097904205322, "logits/rejected": -0.4924425780773163, "logps/chosen": -123.50301361083984, "logps/rejected": -920.039306640625, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": -2.1365089416503906, "rewards/margins": 70.94975280761719, "rewards/rejected": -73.08626556396484, "step": 6023 }, { "epoch": 1.33, "learning_rate": 8.823529411764707e-07, "logits/chosen": -1.3164646625518799, "logits/rejected": -1.0861690044403076, "logps/chosen": -190.56455993652344, "logps/rejected": -645.4600830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.974859595298767, "rewards/margins": 32.39268112182617, "rewards/rejected": -30.41781997680664, "step": 6024 }, { "epoch": 1.33, "learning_rate": 9.191176470588237e-07, "logits/chosen": -1.1668174266815186, "logits/rejected": -1.1789445877075195, "logps/chosen": -116.51901245117188, "logps/rejected": -114.66072082519531, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.774359107017517, "rewards/margins": 7.56077241897583, "rewards/rejected": -9.335131645202637, "step": 6025 }, { "epoch": 1.33, "learning_rate": 9.558823529411764e-07, "logits/chosen": -1.5683019161224365, "logits/rejected": -1.4874135255813599, "logps/chosen": -85.89920043945312, "logps/rejected": -217.41461181640625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.1031121015548706, "rewards/margins": 5.08849573135376, "rewards/rejected": -6.19160795211792, "step": 6026 }, { "epoch": 1.33, "learning_rate": 9.926470588235295e-07, "logits/chosen": -1.2940926551818848, "logits/rejected": -1.3170360326766968, "logps/chosen": -116.93978118896484, "logps/rejected": -104.34169006347656, "loss": 0.0584, "rewards/accuracies": 1.0, "rewards/chosen": -4.043318271636963, "rewards/margins": 2.159266948699951, "rewards/rejected": -6.202585220336914, "step": 6027 }, { "epoch": 1.33, "learning_rate": 1.0294117647058825e-06, "logits/chosen": -1.3025007247924805, "logits/rejected": -1.3767122030258179, "logps/chosen": -179.75704956054688, "logps/rejected": -203.35580444335938, "loss": 0.0626, "rewards/accuracies": 1.0, "rewards/chosen": -5.1185150146484375, "rewards/margins": 3.4749889373779297, "rewards/rejected": -8.593503952026367, "step": 6028 }, { "epoch": 1.33, "learning_rate": 1.0661764705882354e-06, "logits/chosen": -1.2497700452804565, "logits/rejected": -1.1910330057144165, "logps/chosen": -102.00517272949219, "logps/rejected": -156.06321716308594, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": -1.7615174055099487, "rewards/margins": 2.1302361488342285, "rewards/rejected": -3.8917534351348877, "step": 6029 }, { "epoch": 1.33, "learning_rate": 1.1029411764705884e-06, "logits/chosen": -1.2542684078216553, "logits/rejected": -1.3263829946517944, "logps/chosen": -125.29336547851562, "logps/rejected": -115.21830749511719, "loss": 1.2108, "rewards/accuracies": 0.0, "rewards/chosen": -4.723313808441162, "rewards/margins": -2.321347713470459, "rewards/rejected": -2.401966094970703, "step": 6030 }, { "epoch": 1.33, "learning_rate": 1.1397058823529413e-06, "logits/chosen": -1.220774531364441, "logits/rejected": -1.2301015853881836, "logps/chosen": -76.48858642578125, "logps/rejected": -88.48815155029297, "loss": 0.0785, "rewards/accuracies": 1.0, "rewards/chosen": -0.972125232219696, "rewards/margins": 1.8141834735870361, "rewards/rejected": -2.786308765411377, "step": 6031 }, { "epoch": 1.34, "learning_rate": 1.1764705882352942e-06, "logits/chosen": -1.0391442775726318, "logits/rejected": -0.9793391227722168, "logps/chosen": -114.86438751220703, "logps/rejected": -320.0069885253906, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -1.028350830078125, "rewards/margins": 6.828525066375732, "rewards/rejected": -7.856875896453857, "step": 6032 }, { "epoch": 1.34, "learning_rate": 1.2132352941176472e-06, "logits/chosen": -1.27194082736969, "logits/rejected": -1.1618486642837524, "logps/chosen": -108.05256652832031, "logps/rejected": -289.7827453613281, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.631079077720642, "rewards/margins": 4.3434739112854, "rewards/rejected": -5.974553108215332, "step": 6033 }, { "epoch": 1.34, "learning_rate": 1.25e-06, "logits/chosen": -0.9432051181793213, "logits/rejected": -0.9432051181793213, "logps/chosen": -121.18875122070312, "logps/rejected": -121.18875122070312, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -2.674922227859497, "rewards/margins": 0.0, "rewards/rejected": -2.674922227859497, "step": 6034 }, { "epoch": 1.34, "learning_rate": 1.2867647058823528e-06, "logits/chosen": -1.7716338634490967, "logits/rejected": -1.880932092666626, "logps/chosen": -130.77725219726562, "logps/rejected": -130.76914978027344, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -2.5754692554473877, "rewards/margins": 7.074219703674316, "rewards/rejected": -9.649688720703125, "step": 6035 }, { "epoch": 1.34, "learning_rate": 1.323529411764706e-06, "logits/chosen": -1.439592957496643, "logits/rejected": -1.4543249607086182, "logps/chosen": -195.034423828125, "logps/rejected": -170.33941650390625, "loss": 0.0397, "rewards/accuracies": 1.0, "rewards/chosen": -1.8489716053009033, "rewards/margins": 2.4924256801605225, "rewards/rejected": -4.341397285461426, "step": 6036 }, { "epoch": 1.34, "learning_rate": 1.360294117647059e-06, "logits/chosen": -1.543574333190918, "logits/rejected": -1.5067239999771118, "logps/chosen": -90.81513977050781, "logps/rejected": -260.9503173828125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.3623062074184418, "rewards/margins": 8.192390441894531, "rewards/rejected": -8.554697036743164, "step": 6037 }, { "epoch": 1.34, "learning_rate": 1.3970588235294119e-06, "logits/chosen": -1.046072006225586, "logits/rejected": -0.9014422297477722, "logps/chosen": -126.10086822509766, "logps/rejected": -313.10321044921875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.3275680541992188, "rewards/margins": 6.887086868286133, "rewards/rejected": -9.214654922485352, "step": 6038 }, { "epoch": 1.34, "learning_rate": 1.4338235294117648e-06, "logits/chosen": -1.312408208847046, "logits/rejected": -1.3071129322052002, "logps/chosen": -102.45103454589844, "logps/rejected": -138.6360626220703, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5791732668876648, "rewards/margins": 7.1001200675964355, "rewards/rejected": -7.679293155670166, "step": 6039 }, { "epoch": 1.34, "learning_rate": 1.4705882352941177e-06, "logits/chosen": -1.214013934135437, "logits/rejected": -1.1569244861602783, "logps/chosen": -151.77294921875, "logps/rejected": -311.5639343261719, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -4.449594020843506, "rewards/margins": 5.961678981781006, "rewards/rejected": -10.411273002624512, "step": 6040 }, { "epoch": 1.34, "learning_rate": 1.5073529411764707e-06, "logits/chosen": -1.3409838676452637, "logits/rejected": -1.3467469215393066, "logps/chosen": -60.61321258544922, "logps/rejected": -176.63751220703125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.08754577487707138, "rewards/margins": 7.47910213470459, "rewards/rejected": -7.566648006439209, "step": 6041 }, { "epoch": 1.34, "learning_rate": 1.5441176470588238e-06, "logits/chosen": -1.2489293813705444, "logits/rejected": -1.218443751335144, "logps/chosen": -185.2024688720703, "logps/rejected": -279.818115234375, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": 2.267604112625122, "rewards/margins": 4.3300676345825195, "rewards/rejected": -2.0624635219573975, "step": 6042 }, { "epoch": 1.34, "learning_rate": 1.5808823529411765e-06, "logits/chosen": -1.612446665763855, "logits/rejected": -1.4374499320983887, "logps/chosen": -93.50738525390625, "logps/rejected": -250.9524383544922, "loss": 0.9445, "rewards/accuracies": 0.0, "rewards/chosen": -2.6162948608398438, "rewards/margins": -1.7249267101287842, "rewards/rejected": -0.8913680911064148, "step": 6043 }, { "epoch": 1.34, "learning_rate": 1.6176470588235297e-06, "logits/chosen": -1.5871413946151733, "logits/rejected": -1.7562052011489868, "logps/chosen": -220.63177490234375, "logps/rejected": -157.8753662109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.1932098865509033, "rewards/margins": 9.575990676879883, "rewards/rejected": -11.769200325012207, "step": 6044 }, { "epoch": 1.34, "learning_rate": 1.6544117647058824e-06, "logits/chosen": -1.3017210960388184, "logits/rejected": -1.319741129875183, "logps/chosen": -157.50146484375, "logps/rejected": -108.69203186035156, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.0538727045059204, "rewards/margins": 5.224268436431885, "rewards/rejected": -6.278141021728516, "step": 6045 }, { "epoch": 1.34, "learning_rate": 1.6911764705882356e-06, "logits/chosen": -1.44841468334198, "logits/rejected": -1.474900722503662, "logps/chosen": -181.83572387695312, "logps/rejected": -239.36172485351562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.3828827142715454, "rewards/margins": 12.982327461242676, "rewards/rejected": -11.599444389343262, "step": 6046 }, { "epoch": 1.34, "learning_rate": 1.7279411764705883e-06, "logits/chosen": -1.3813226222991943, "logits/rejected": -1.2423955202102661, "logps/chosen": -172.9121551513672, "logps/rejected": -274.67779541015625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.532073974609375, "rewards/margins": 4.692620754241943, "rewards/rejected": -6.224694728851318, "step": 6047 }, { "epoch": 1.34, "learning_rate": 1.7647058823529414e-06, "logits/chosen": -1.0393785238265991, "logits/rejected": -0.6301937103271484, "logps/chosen": -233.4775390625, "logps/rejected": -567.1450805664062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.5177230834960938, "rewards/margins": 23.562761306762695, "rewards/rejected": -22.0450382232666, "step": 6048 }, { "epoch": 1.34, "learning_rate": 1.8014705882352942e-06, "logits/chosen": -1.6756267547607422, "logits/rejected": -1.7123061418533325, "logps/chosen": -107.99010467529297, "logps/rejected": -86.72918701171875, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -0.9289504885673523, "rewards/margins": 2.99279522895813, "rewards/rejected": -3.921745777130127, "step": 6049 }, { "epoch": 1.34, "learning_rate": 1.8382352941176473e-06, "logits/chosen": -1.392619013786316, "logits/rejected": -1.385954737663269, "logps/chosen": -53.00338363647461, "logps/rejected": -64.99610900878906, "loss": 0.5174, "rewards/accuracies": 0.0, "rewards/chosen": -1.078009843826294, "rewards/margins": -0.5353195667266846, "rewards/rejected": -0.5426902770996094, "step": 6050 }, { "epoch": 1.34, "learning_rate": 1.8750000000000003e-06, "logits/chosen": -1.5041455030441284, "logits/rejected": -1.4153486490249634, "logps/chosen": -90.31399536132812, "logps/rejected": -230.64089965820312, "loss": 0.0638, "rewards/accuracies": 1.0, "rewards/chosen": -1.4398056268692017, "rewards/margins": 1.9973281621932983, "rewards/rejected": -3.4371337890625, "step": 6051 }, { "epoch": 1.34, "learning_rate": 1.9117647058823528e-06, "logits/chosen": -1.4859315156936646, "logits/rejected": -1.4993185997009277, "logps/chosen": -139.57669067382812, "logps/rejected": -111.02536010742188, "loss": 0.3007, "rewards/accuracies": 1.0, "rewards/chosen": -3.8073151111602783, "rewards/margins": 0.1931321620941162, "rewards/rejected": -4.0004472732543945, "step": 6052 }, { "epoch": 1.34, "learning_rate": 1.948529411764706e-06, "logits/chosen": -1.3518741130828857, "logits/rejected": -1.39509117603302, "logps/chosen": -208.42559814453125, "logps/rejected": -197.78778076171875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6178956031799316, "rewards/margins": 6.337243556976318, "rewards/rejected": -9.95513916015625, "step": 6053 }, { "epoch": 1.34, "learning_rate": 1.985294117647059e-06, "logits/chosen": -1.5302613973617554, "logits/rejected": -1.5097852945327759, "logps/chosen": -130.46688842773438, "logps/rejected": -165.72059631347656, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 0.7153381705284119, "rewards/margins": 4.609646797180176, "rewards/rejected": -3.894308567047119, "step": 6054 }, { "epoch": 1.34, "learning_rate": 2.022058823529412e-06, "logits/chosen": -1.4018975496292114, "logits/rejected": -1.405591607093811, "logps/chosen": -87.0106430053711, "logps/rejected": -121.91454315185547, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.5143524408340454, "rewards/margins": 6.44171142578125, "rewards/rejected": -7.956063747406006, "step": 6055 }, { "epoch": 1.34, "learning_rate": 2.058823529411765e-06, "logits/chosen": -1.2876945734024048, "logits/rejected": -1.3179491758346558, "logps/chosen": -212.03817749023438, "logps/rejected": -208.93101501464844, "loss": 0.8081, "rewards/accuracies": 0.0, "rewards/chosen": -4.5462493896484375, "rewards/margins": -0.41672801971435547, "rewards/rejected": -4.129521369934082, "step": 6056 }, { "epoch": 1.34, "learning_rate": 2.095588235294118e-06, "logits/chosen": -1.530749797821045, "logits/rejected": -1.5275323390960693, "logps/chosen": -169.09756469726562, "logps/rejected": -218.82662963867188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.103399634361267, "rewards/margins": 5.572038173675537, "rewards/rejected": -6.675437927246094, "step": 6057 }, { "epoch": 1.34, "learning_rate": 2.132352941176471e-06, "logits/chosen": -0.9798955917358398, "logits/rejected": -0.8868674635887146, "logps/chosen": -110.89118957519531, "logps/rejected": -284.4844970703125, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -2.4963624477386475, "rewards/margins": 3.7875092029571533, "rewards/rejected": -6.283871650695801, "step": 6058 }, { "epoch": 1.34, "learning_rate": 2.1691176470588238e-06, "logits/chosen": -1.2326611280441284, "logits/rejected": -1.2341853380203247, "logps/chosen": -87.96255493164062, "logps/rejected": -140.24427795410156, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.0219444036483765, "rewards/margins": 5.480847358703613, "rewards/rejected": -6.502791881561279, "step": 6059 }, { "epoch": 1.34, "learning_rate": 2.2058823529411767e-06, "logits/chosen": -0.8890336155891418, "logits/rejected": -0.8339029550552368, "logps/chosen": -187.04893493652344, "logps/rejected": -240.13619995117188, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.1641587018966675, "rewards/margins": 4.719430446624756, "rewards/rejected": -5.883589267730713, "step": 6060 }, { "epoch": 1.34, "learning_rate": 2.2426470588235296e-06, "logits/chosen": -1.574597716331482, "logits/rejected": -1.5992209911346436, "logps/chosen": -111.9297103881836, "logps/rejected": -172.3115234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8886985778808594, "rewards/margins": 9.223043441772461, "rewards/rejected": -10.11174201965332, "step": 6061 }, { "epoch": 1.34, "learning_rate": 2.2794117647058826e-06, "logits/chosen": -1.0951836109161377, "logits/rejected": -1.0737704038619995, "logps/chosen": -130.15530395507812, "logps/rejected": -154.7884063720703, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -1.1397933959960938, "rewards/margins": 3.0619874000549316, "rewards/rejected": -4.201780796051025, "step": 6062 }, { "epoch": 1.34, "learning_rate": 2.3161764705882355e-06, "logits/chosen": -1.319701910018921, "logits/rejected": -1.340213418006897, "logps/chosen": -80.30564880371094, "logps/rejected": -65.13722229003906, "loss": 0.0784, "rewards/accuracies": 1.0, "rewards/chosen": -0.5523406863212585, "rewards/margins": 1.990755558013916, "rewards/rejected": -2.5430963039398193, "step": 6063 }, { "epoch": 1.34, "learning_rate": 2.3529411764705885e-06, "logits/chosen": -1.562588095664978, "logits/rejected": -0.8200193643569946, "logps/chosen": -115.2290267944336, "logps/rejected": -1146.043212890625, "loss": 0.0498, "rewards/accuracies": 1.0, "rewards/chosen": -0.3885963559150696, "rewards/margins": 94.86715698242188, "rewards/rejected": -95.25575256347656, "step": 6064 }, { "epoch": 1.34, "learning_rate": 2.3897058823529414e-06, "logits/chosen": -1.1618729829788208, "logits/rejected": -1.1943403482437134, "logps/chosen": -157.54177856445312, "logps/rejected": -260.666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.8944916129112244, "rewards/margins": 13.050582885742188, "rewards/rejected": -12.156091690063477, "step": 6065 }, { "epoch": 1.34, "learning_rate": 2.4264705882352943e-06, "logits/chosen": -0.9526463747024536, "logits/rejected": -1.058140754699707, "logps/chosen": -250.697998046875, "logps/rejected": -406.4335632324219, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.283136010169983, "rewards/margins": 11.45068359375, "rewards/rejected": -12.733819961547852, "step": 6066 }, { "epoch": 1.34, "learning_rate": 2.4632352941176473e-06, "logits/chosen": -1.3419827222824097, "logits/rejected": -1.2960118055343628, "logps/chosen": -109.70721435546875, "logps/rejected": -203.3838653564453, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.3448410034179688, "rewards/margins": 5.3022356033325195, "rewards/rejected": -7.647076606750488, "step": 6067 }, { "epoch": 1.34, "learning_rate": 2.5e-06, "logits/chosen": -1.0659468173980713, "logits/rejected": -1.1745012998580933, "logps/chosen": -260.63812255859375, "logps/rejected": -237.6985626220703, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 2.239520311355591, "rewards/margins": 12.897672653198242, "rewards/rejected": -10.65815258026123, "step": 6068 }, { "epoch": 1.34, "learning_rate": 2.536764705882353e-06, "logits/chosen": -1.1920862197875977, "logits/rejected": -1.1920862197875977, "logps/chosen": -183.11329650878906, "logps/rejected": -183.11329650878906, "loss": 0.3473, "rewards/accuracies": 0.0, "rewards/chosen": -8.148272514343262, "rewards/margins": 0.0, "rewards/rejected": -8.148272514343262, "step": 6069 }, { "epoch": 1.34, "learning_rate": 2.5735294117647057e-06, "logits/chosen": -1.5737730264663696, "logits/rejected": -1.586444616317749, "logps/chosen": -83.82941436767578, "logps/rejected": -108.8449935913086, "loss": 0.444, "rewards/accuracies": 1.0, "rewards/chosen": -1.0229400396347046, "rewards/margins": 1.5364006757736206, "rewards/rejected": -2.559340715408325, "step": 6070 }, { "epoch": 1.34, "learning_rate": 2.610294117647059e-06, "logits/chosen": -1.301589846611023, "logits/rejected": -1.2258378267288208, "logps/chosen": -84.41105651855469, "logps/rejected": -302.7300109863281, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": 1.3447121381759644, "rewards/margins": 10.3880615234375, "rewards/rejected": -9.043349266052246, "step": 6071 }, { "epoch": 1.34, "learning_rate": 2.647058823529412e-06, "logits/chosen": -1.093393087387085, "logits/rejected": -1.0481886863708496, "logps/chosen": -285.88116455078125, "logps/rejected": -313.8882751464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.13220520317554474, "rewards/margins": 11.169682502746582, "rewards/rejected": -11.037477493286133, "step": 6072 }, { "epoch": 1.34, "learning_rate": 2.683823529411765e-06, "logits/chosen": -1.4052815437316895, "logits/rejected": -1.3044843673706055, "logps/chosen": -192.2694854736328, "logps/rejected": -344.80572509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.087568759918213, "rewards/margins": 14.834953308105469, "rewards/rejected": -12.747385025024414, "step": 6073 }, { "epoch": 1.34, "learning_rate": 2.720588235294118e-06, "logits/chosen": -1.3448957204818726, "logits/rejected": -1.3199083805084229, "logps/chosen": -54.602691650390625, "logps/rejected": -96.94631958007812, "loss": 0.2603, "rewards/accuracies": 1.0, "rewards/chosen": -2.8378424644470215, "rewards/margins": 0.38123631477355957, "rewards/rejected": -3.219078779220581, "step": 6074 }, { "epoch": 1.34, "learning_rate": 2.757352941176471e-06, "logits/chosen": -1.3492164611816406, "logits/rejected": -1.3660696744918823, "logps/chosen": -134.26849365234375, "logps/rejected": -195.80648803710938, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 1.325170874595642, "rewards/margins": 7.842864990234375, "rewards/rejected": -6.517693996429443, "step": 6075 }, { "epoch": 1.34, "learning_rate": 2.7941176470588237e-06, "logits/chosen": -1.0543711185455322, "logits/rejected": -1.075053334236145, "logps/chosen": -129.70372009277344, "logps/rejected": -130.94284057617188, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/chosen": -4.297597408294678, "rewards/margins": 4.4891180992126465, "rewards/rejected": -8.786715507507324, "step": 6076 }, { "epoch": 1.35, "learning_rate": 2.8308823529411766e-06, "logits/chosen": -1.2130769491195679, "logits/rejected": -1.2810102701187134, "logps/chosen": -211.44381713867188, "logps/rejected": -232.8112030029297, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8454697132110596, "rewards/margins": 13.424622535705566, "rewards/rejected": -17.270092010498047, "step": 6077 }, { "epoch": 1.35, "learning_rate": 2.8676470588235296e-06, "logits/chosen": -1.822637677192688, "logits/rejected": -1.7967398166656494, "logps/chosen": -117.2099838256836, "logps/rejected": -177.66384887695312, "loss": 0.3486, "rewards/accuracies": 1.0, "rewards/chosen": -1.2670692205429077, "rewards/margins": 5.525978088378906, "rewards/rejected": -6.7930474281311035, "step": 6078 }, { "epoch": 1.35, "learning_rate": 2.904411764705883e-06, "logits/chosen": -1.414595127105713, "logits/rejected": -1.5155733823776245, "logps/chosen": -143.93934631347656, "logps/rejected": -113.58319091796875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 2.679553270339966, "rewards/margins": 5.157637119293213, "rewards/rejected": -2.478083848953247, "step": 6079 }, { "epoch": 1.35, "learning_rate": 2.9411764705882355e-06, "logits/chosen": -1.0309001207351685, "logits/rejected": -1.1326560974121094, "logps/chosen": -215.00685119628906, "logps/rejected": -111.57952880859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.2530624866485596, "rewards/margins": 8.689432144165039, "rewards/rejected": -7.4363694190979, "step": 6080 }, { "epoch": 1.35, "learning_rate": 2.9779411764705884e-06, "logits/chosen": -1.5762757062911987, "logits/rejected": -1.6045913696289062, "logps/chosen": -117.71305847167969, "logps/rejected": -97.52365112304688, "loss": 0.059, "rewards/accuracies": 1.0, "rewards/chosen": -1.3020843267440796, "rewards/margins": 2.0856356620788574, "rewards/rejected": -3.3877201080322266, "step": 6081 }, { "epoch": 1.35, "learning_rate": 3.0147058823529413e-06, "logits/chosen": -1.4118577241897583, "logits/rejected": -1.3878331184387207, "logps/chosen": -126.58265686035156, "logps/rejected": -241.22239685058594, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.77532958984375, "rewards/margins": 7.820581436157227, "rewards/rejected": -8.595911026000977, "step": 6082 }, { "epoch": 1.35, "learning_rate": 3.0514705882352947e-06, "logits/chosen": -1.1487118005752563, "logits/rejected": -1.1055330038070679, "logps/chosen": -77.13994598388672, "logps/rejected": -125.25749206542969, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.3364067077636719, "rewards/margins": 7.116523742675781, "rewards/rejected": -8.452930450439453, "step": 6083 }, { "epoch": 1.35, "learning_rate": 3.0882352941176476e-06, "logits/chosen": -1.3705413341522217, "logits/rejected": -1.3498245477676392, "logps/chosen": -192.22100830078125, "logps/rejected": -264.7105407714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.083892822265625, "rewards/margins": 10.062106132507324, "rewards/rejected": -9.9782133102417, "step": 6084 }, { "epoch": 1.35, "learning_rate": 3.125e-06, "logits/chosen": -1.7977923154830933, "logits/rejected": -1.8120301961898804, "logps/chosen": -4.175388336181641, "logps/rejected": -44.429107666015625, "loss": 0.0572, "rewards/accuracies": 1.0, "rewards/chosen": -0.015559769235551357, "rewards/margins": 2.110504627227783, "rewards/rejected": -2.1260643005371094, "step": 6085 }, { "epoch": 1.35, "learning_rate": 3.161764705882353e-06, "logits/chosen": -1.646588921546936, "logits/rejected": -1.5986112356185913, "logps/chosen": -67.48735046386719, "logps/rejected": -166.1415252685547, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.1595138311386108, "rewards/margins": 5.519329071044922, "rewards/rejected": -6.678843021392822, "step": 6086 }, { "epoch": 1.35, "learning_rate": 3.198529411764706e-06, "logits/chosen": -1.0799022912979126, "logits/rejected": -1.0646618604660034, "logps/chosen": -178.18954467773438, "logps/rejected": -252.67974853515625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 1.3160873651504517, "rewards/margins": 16.732561111450195, "rewards/rejected": -15.416473388671875, "step": 6087 }, { "epoch": 1.35, "learning_rate": 3.2352941176470594e-06, "logits/chosen": -1.2964869737625122, "logits/rejected": -1.1651734113693237, "logps/chosen": -120.05686950683594, "logps/rejected": -318.1221618652344, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 1.496272325515747, "rewards/margins": 10.758642196655273, "rewards/rejected": -9.262370109558105, "step": 6088 }, { "epoch": 1.35, "learning_rate": 3.272058823529412e-06, "logits/chosen": -1.0850213766098022, "logits/rejected": -1.1014509201049805, "logps/chosen": -292.9949645996094, "logps/rejected": -145.85765075683594, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 1.2527436017990112, "rewards/margins": 4.755153179168701, "rewards/rejected": -3.5024094581604004, "step": 6089 }, { "epoch": 1.35, "learning_rate": 3.308823529411765e-06, "logits/chosen": -1.1428320407867432, "logits/rejected": -1.1428320407867432, "logps/chosen": -114.59178161621094, "logps/rejected": -114.59178161621094, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.130800724029541, "rewards/margins": 0.0, "rewards/rejected": -2.130800724029541, "step": 6090 }, { "epoch": 1.35, "learning_rate": 3.3455882352941178e-06, "logits/chosen": -1.6125993728637695, "logits/rejected": -1.6375292539596558, "logps/chosen": -97.05072021484375, "logps/rejected": -126.2820816040039, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.6555497646331787, "rewards/margins": 6.029053688049316, "rewards/rejected": -9.684603691101074, "step": 6091 }, { "epoch": 1.35, "learning_rate": 3.382352941176471e-06, "logits/chosen": -1.4658328294754028, "logits/rejected": -1.4626764059066772, "logps/chosen": -175.00289916992188, "logps/rejected": -264.76910400390625, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -6.331754207611084, "rewards/margins": 4.5208353996276855, "rewards/rejected": -10.85258960723877, "step": 6092 }, { "epoch": 1.35, "learning_rate": 3.419117647058824e-06, "logits/chosen": -1.6139689683914185, "logits/rejected": -1.6695091724395752, "logps/chosen": -116.3731460571289, "logps/rejected": -153.60890197753906, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.1858558654785156, "rewards/margins": 4.46515417098999, "rewards/rejected": -5.651010036468506, "step": 6093 }, { "epoch": 1.35, "learning_rate": 3.4558823529411766e-06, "logits/chosen": -1.48978853225708, "logits/rejected": -1.5034244060516357, "logps/chosen": -164.31016540527344, "logps/rejected": -217.12449645996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.41473695635795593, "rewards/margins": 12.181941032409668, "rewards/rejected": -12.596677780151367, "step": 6094 }, { "epoch": 1.35, "learning_rate": 3.4926470588235295e-06, "logits/chosen": -1.1798690557479858, "logits/rejected": -1.133700966835022, "logps/chosen": -127.17337799072266, "logps/rejected": -283.489501953125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.6503151655197144, "rewards/margins": 8.45614242553711, "rewards/rejected": -10.106457710266113, "step": 6095 }, { "epoch": 1.35, "learning_rate": 3.529411764705883e-06, "logits/chosen": -1.0996043682098389, "logits/rejected": -1.0049465894699097, "logps/chosen": -203.84585571289062, "logps/rejected": -398.24609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 6.745736598968506, "rewards/margins": 18.56683921813965, "rewards/rejected": -11.8211030960083, "step": 6096 }, { "epoch": 1.35, "learning_rate": 3.566176470588236e-06, "logits/chosen": -1.5813722610473633, "logits/rejected": -1.497002124786377, "logps/chosen": -139.65187072753906, "logps/rejected": -291.9940185546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.925746202468872, "rewards/margins": 6.067108154296875, "rewards/rejected": -7.992854595184326, "step": 6097 }, { "epoch": 1.35, "learning_rate": 3.6029411764705883e-06, "logits/chosen": -1.6708519458770752, "logits/rejected": -1.7219115495681763, "logps/chosen": -171.60496520996094, "logps/rejected": -158.8555145263672, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -1.7822479009628296, "rewards/margins": 4.092455863952637, "rewards/rejected": -5.874703884124756, "step": 6098 }, { "epoch": 1.35, "learning_rate": 3.6397058823529413e-06, "logits/chosen": -1.0956830978393555, "logits/rejected": -1.0667587518692017, "logps/chosen": -240.2838134765625, "logps/rejected": -282.3559875488281, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -2.5480988025665283, "rewards/margins": 3.981580972671509, "rewards/rejected": -6.529679775238037, "step": 6099 }, { "epoch": 1.35, "learning_rate": 3.6764705882352946e-06, "logits/chosen": -1.430889368057251, "logits/rejected": -1.4333996772766113, "logps/chosen": -79.09652709960938, "logps/rejected": -129.52630615234375, "loss": 0.0673, "rewards/accuracies": 1.0, "rewards/chosen": -2.2599034309387207, "rewards/margins": 1.9967312812805176, "rewards/rejected": -4.256634712219238, "step": 6100 }, { "epoch": 1.35, "learning_rate": 3.7132352941176476e-06, "logits/chosen": -1.3837330341339111, "logits/rejected": -0.7850023508071899, "logps/chosen": -112.39973449707031, "logps/rejected": -585.55517578125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -8.252140998840332, "rewards/margins": 31.75192642211914, "rewards/rejected": -40.004066467285156, "step": 6101 }, { "epoch": 1.35, "learning_rate": 3.7500000000000005e-06, "logits/chosen": -1.0814602375030518, "logits/rejected": -1.1082136631011963, "logps/chosen": -187.73692321777344, "logps/rejected": -246.54833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.1909377574920654, "rewards/margins": 14.312847137451172, "rewards/rejected": -12.121909141540527, "step": 6102 }, { "epoch": 1.35, "learning_rate": 3.786764705882353e-06, "logits/chosen": -1.1230077743530273, "logits/rejected": -0.9863462448120117, "logps/chosen": -280.94573974609375, "logps/rejected": -443.09954833984375, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": 0.31702882051467896, "rewards/margins": 8.234417915344238, "rewards/rejected": -7.917388916015625, "step": 6103 }, { "epoch": 1.35, "learning_rate": 3.8235294117647055e-06, "logits/chosen": -1.2664817571640015, "logits/rejected": -1.4097585678100586, "logps/chosen": -257.39117431640625, "logps/rejected": -131.04473876953125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.8191850185394287, "rewards/margins": 5.882804870605469, "rewards/rejected": -9.701990127563477, "step": 6104 }, { "epoch": 1.35, "learning_rate": 3.860294117647059e-06, "logits/chosen": -1.1512386798858643, "logits/rejected": -1.1512386798858643, "logps/chosen": -199.83648681640625, "logps/rejected": -199.83648681640625, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -7.616558074951172, "rewards/margins": 0.0, "rewards/rejected": -7.616558074951172, "step": 6105 }, { "epoch": 1.35, "learning_rate": 3.897058823529412e-06, "logits/chosen": -1.1063785552978516, "logits/rejected": -1.2906140089035034, "logps/chosen": -299.61669921875, "logps/rejected": -187.39126586914062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.656393527984619, "rewards/margins": 7.627866268157959, "rewards/rejected": -12.284259796142578, "step": 6106 }, { "epoch": 1.35, "learning_rate": 3.933823529411765e-06, "logits/chosen": -1.1790648698806763, "logits/rejected": -1.145622968673706, "logps/chosen": -193.94204711914062, "logps/rejected": -239.11451721191406, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.2245118618011475, "rewards/margins": 6.1960248947143555, "rewards/rejected": -8.420536994934082, "step": 6107 }, { "epoch": 1.35, "learning_rate": 3.970588235294118e-06, "logits/chosen": -1.2712287902832031, "logits/rejected": -1.2584809064865112, "logps/chosen": -165.16209411621094, "logps/rejected": -198.36575317382812, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.6201828122138977, "rewards/margins": 9.039237976074219, "rewards/rejected": -8.419054985046387, "step": 6108 }, { "epoch": 1.35, "learning_rate": 4.007352941176471e-06, "logits/chosen": -1.4723598957061768, "logits/rejected": -1.4107543230056763, "logps/chosen": -93.38809204101562, "logps/rejected": -179.32766723632812, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.05050506815314293, "rewards/margins": 10.43624496459961, "rewards/rejected": -10.385740280151367, "step": 6109 }, { "epoch": 1.35, "learning_rate": 4.044117647058824e-06, "logits/chosen": -1.6666724681854248, "logits/rejected": -1.7211408615112305, "logps/chosen": -92.64691162109375, "logps/rejected": -106.40465545654297, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.2410179227590561, "rewards/margins": 7.566445350646973, "rewards/rejected": -7.8074631690979, "step": 6110 }, { "epoch": 1.35, "learning_rate": 4.080882352941177e-06, "logits/chosen": -1.1177284717559814, "logits/rejected": -1.0059089660644531, "logps/chosen": -185.85610961914062, "logps/rejected": -461.76800537109375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 0.4468033015727997, "rewards/margins": 10.561945915222168, "rewards/rejected": -10.115142822265625, "step": 6111 }, { "epoch": 1.35, "learning_rate": 4.11764705882353e-06, "logits/chosen": -1.619467854499817, "logits/rejected": -1.8450287580490112, "logps/chosen": -130.2947540283203, "logps/rejected": -91.71061706542969, "loss": 0.0718, "rewards/accuracies": 1.0, "rewards/chosen": -4.465508460998535, "rewards/margins": 1.9071869850158691, "rewards/rejected": -6.372695446014404, "step": 6112 }, { "epoch": 1.35, "learning_rate": 4.154411764705883e-06, "logits/chosen": -1.0189650058746338, "logits/rejected": -0.7080717086791992, "logps/chosen": -170.3680419921875, "logps/rejected": -567.5985107421875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.170640707015991, "rewards/margins": 19.03665542602539, "rewards/rejected": -21.20729637145996, "step": 6113 }, { "epoch": 1.35, "learning_rate": 4.191176470588236e-06, "logits/chosen": -1.062129020690918, "logits/rejected": -1.062129020690918, "logps/chosen": -127.31208038330078, "logps/rejected": -127.31208038330078, "loss": 0.3508, "rewards/accuracies": 0.0, "rewards/chosen": -2.7725656032562256, "rewards/margins": 0.0, "rewards/rejected": -2.7725656032562256, "step": 6114 }, { "epoch": 1.35, "learning_rate": 4.227941176470589e-06, "logits/chosen": -1.569872498512268, "logits/rejected": -1.6432545185089111, "logps/chosen": -145.1478729248047, "logps/rejected": -211.69834899902344, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.8686020374298096, "rewards/margins": 7.539854049682617, "rewards/rejected": -9.408455848693848, "step": 6115 }, { "epoch": 1.35, "learning_rate": 4.264705882352942e-06, "logits/chosen": -1.313134789466858, "logits/rejected": -0.9190990924835205, "logps/chosen": -104.65975952148438, "logps/rejected": -462.25885009765625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -3.783625841140747, "rewards/margins": 34.08573913574219, "rewards/rejected": -37.86936569213867, "step": 6116 }, { "epoch": 1.35, "learning_rate": 4.301470588235295e-06, "logits/chosen": -1.1962183713912964, "logits/rejected": -1.2019257545471191, "logps/chosen": -134.54867553710938, "logps/rejected": -187.69906616210938, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -4.150928020477295, "rewards/margins": 3.526102066040039, "rewards/rejected": -7.677030086517334, "step": 6117 }, { "epoch": 1.35, "learning_rate": 4.3382352941176475e-06, "logits/chosen": -1.1107126474380493, "logits/rejected": -1.129683017730713, "logps/chosen": -108.37965393066406, "logps/rejected": -111.5090103149414, "loss": 0.0694, "rewards/accuracies": 1.0, "rewards/chosen": -2.3359787464141846, "rewards/margins": 1.905491590499878, "rewards/rejected": -4.2414703369140625, "step": 6118 }, { "epoch": 1.35, "learning_rate": 4.3750000000000005e-06, "logits/chosen": -1.6942743062973022, "logits/rejected": -1.7687137126922607, "logps/chosen": -134.23187255859375, "logps/rejected": -113.86516571044922, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.8173286318778992, "rewards/margins": 4.2893171310424805, "rewards/rejected": -5.106645584106445, "step": 6119 }, { "epoch": 1.35, "learning_rate": 4.411764705882353e-06, "logits/chosen": -1.4575814008712769, "logits/rejected": -1.4368423223495483, "logps/chosen": -86.31888580322266, "logps/rejected": -99.57848358154297, "loss": 0.1549, "rewards/accuracies": 1.0, "rewards/chosen": -0.6010581851005554, "rewards/margins": 1.0273559093475342, "rewards/rejected": -1.6284141540527344, "step": 6120 }, { "epoch": 1.35, "learning_rate": 4.448529411764706e-06, "logits/chosen": -1.100480318069458, "logits/rejected": -1.1386021375656128, "logps/chosen": -214.24307250976562, "logps/rejected": -172.18524169921875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 5.066037178039551, "rewards/margins": 10.80239200592041, "rewards/rejected": -5.736354827880859, "step": 6121 }, { "epoch": 1.36, "learning_rate": 4.485294117647059e-06, "logits/chosen": -1.373464584350586, "logits/rejected": -1.4301483631134033, "logps/chosen": -129.21620178222656, "logps/rejected": -210.78219604492188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.499550700187683, "rewards/margins": 9.510614395141602, "rewards/rejected": -11.010165214538574, "step": 6122 }, { "epoch": 1.36, "learning_rate": 4.522058823529412e-06, "logits/chosen": -1.0912480354309082, "logits/rejected": -1.0473970174789429, "logps/chosen": -102.21125793457031, "logps/rejected": -166.0399169921875, "loss": 0.1425, "rewards/accuracies": 1.0, "rewards/chosen": -3.5695016384124756, "rewards/margins": 1.1629316806793213, "rewards/rejected": -4.732433319091797, "step": 6123 }, { "epoch": 1.36, "learning_rate": 4.558823529411765e-06, "logits/chosen": -1.2361583709716797, "logits/rejected": -1.227639079093933, "logps/chosen": -123.75852966308594, "logps/rejected": -101.13819122314453, "loss": 0.0596, "rewards/accuracies": 1.0, "rewards/chosen": -2.6774659156799316, "rewards/margins": 2.1045260429382324, "rewards/rejected": -4.781991958618164, "step": 6124 }, { "epoch": 1.36, "learning_rate": 4.595588235294118e-06, "logits/chosen": -1.1910208463668823, "logits/rejected": -1.2051193714141846, "logps/chosen": -118.32485961914062, "logps/rejected": -140.86082458496094, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.773859441280365, "rewards/margins": 5.4644694328308105, "rewards/rejected": -4.690609931945801, "step": 6125 }, { "epoch": 1.36, "learning_rate": 4.632352941176471e-06, "logits/chosen": -1.6128848791122437, "logits/rejected": -1.5374170541763306, "logps/chosen": -148.10348510742188, "logps/rejected": -293.9376220703125, "loss": 0.6757, "rewards/accuracies": 1.0, "rewards/chosen": -2.1090774536132812, "rewards/margins": 4.227088928222656, "rewards/rejected": -6.3361663818359375, "step": 6126 }, { "epoch": 1.36, "learning_rate": 4.669117647058824e-06, "logits/chosen": -1.245660424232483, "logits/rejected": -1.2647788524627686, "logps/chosen": -133.75909423828125, "logps/rejected": -151.95697021484375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.7169532775878906, "rewards/margins": 6.000792980194092, "rewards/rejected": -7.717746257781982, "step": 6127 }, { "epoch": 1.36, "learning_rate": 4.705882352941177e-06, "logits/chosen": -1.2459625005722046, "logits/rejected": -1.2459625005722046, "logps/chosen": -77.74819946289062, "logps/rejected": -77.74819946289062, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.1727070808410645, "rewards/margins": 0.0, "rewards/rejected": -2.1727070808410645, "step": 6128 }, { "epoch": 1.36, "learning_rate": 4.74264705882353e-06, "logits/chosen": -1.2894352674484253, "logits/rejected": -1.2726315259933472, "logps/chosen": -114.88580322265625, "logps/rejected": -186.79405212402344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.41446763277053833, "rewards/margins": 7.799302101135254, "rewards/rejected": -8.213769912719727, "step": 6129 }, { "epoch": 1.36, "learning_rate": 4.779411764705883e-06, "logits/chosen": -1.5764540433883667, "logits/rejected": -1.5983126163482666, "logps/chosen": -196.1435546875, "logps/rejected": -208.27041625976562, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 3.3172576427459717, "rewards/margins": 7.106141567230225, "rewards/rejected": -3.788883924484253, "step": 6130 }, { "epoch": 1.36, "learning_rate": 4.816176470588236e-06, "logits/chosen": -1.2133930921554565, "logits/rejected": -1.081001877784729, "logps/chosen": -243.98568725585938, "logps/rejected": -359.002197265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.4626251459121704, "rewards/margins": 8.950400352478027, "rewards/rejected": -7.487774848937988, "step": 6131 }, { "epoch": 1.36, "learning_rate": 4.852941176470589e-06, "logits/chosen": -1.4053685665130615, "logits/rejected": -1.419761300086975, "logps/chosen": -92.36136627197266, "logps/rejected": -187.83087158203125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.9256432056427002, "rewards/margins": 6.703204154968262, "rewards/rejected": -8.628847122192383, "step": 6132 }, { "epoch": 1.36, "learning_rate": 4.889705882352942e-06, "logits/chosen": -1.3854026794433594, "logits/rejected": -1.3854026794433594, "logps/chosen": -134.84219360351562, "logps/rejected": -134.84219360351562, "loss": 0.4599, "rewards/accuracies": 0.0, "rewards/chosen": -5.108625888824463, "rewards/margins": 0.0, "rewards/rejected": -5.108625888824463, "step": 6133 }, { "epoch": 1.36, "learning_rate": 4.9264705882352945e-06, "logits/chosen": -1.3811469078063965, "logits/rejected": -1.4188491106033325, "logps/chosen": -128.65040588378906, "logps/rejected": -66.12505340576172, "loss": 0.1887, "rewards/accuracies": 1.0, "rewards/chosen": -3.371983289718628, "rewards/margins": 1.8221004009246826, "rewards/rejected": -5.1940836906433105, "step": 6134 }, { "epoch": 1.36, "learning_rate": 4.9632352941176475e-06, "logits/chosen": -1.2044633626937866, "logits/rejected": -1.2044633626937866, "logps/chosen": -127.71905517578125, "logps/rejected": -127.71905517578125, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -3.879429578781128, "rewards/margins": 0.0, "rewards/rejected": -3.879429578781128, "step": 6135 }, { "epoch": 1.36, "learning_rate": 5e-06, "logits/chosen": -0.8981449604034424, "logits/rejected": -0.2949395775794983, "logps/chosen": -118.17254638671875, "logps/rejected": -484.24371337890625, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -4.182011604309082, "rewards/margins": 36.92292022705078, "rewards/rejected": -41.10493087768555, "step": 6136 }, { "epoch": 1.36, "learning_rate": 5.036764705882353e-06, "logits/chosen": -1.527574062347412, "logits/rejected": -1.4691375494003296, "logps/chosen": -160.61207580566406, "logps/rejected": -316.00299072265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.132304668426514, "rewards/margins": 9.33517074584961, "rewards/rejected": -14.467475891113281, "step": 6137 }, { "epoch": 1.36, "learning_rate": 5.073529411764706e-06, "logits/chosen": -1.7112339735031128, "logits/rejected": -1.794172763824463, "logps/chosen": -143.81378173828125, "logps/rejected": -216.371337890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5769013166427612, "rewards/margins": 13.229734420776367, "rewards/rejected": -14.806635856628418, "step": 6138 }, { "epoch": 1.36, "learning_rate": 5.110294117647059e-06, "logits/chosen": -1.529636025428772, "logits/rejected": -1.455328345298767, "logps/chosen": -96.55919647216797, "logps/rejected": -210.50413513183594, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -2.625939130783081, "rewards/margins": 4.367396354675293, "rewards/rejected": -6.993335247039795, "step": 6139 }, { "epoch": 1.36, "learning_rate": 5.147058823529411e-06, "logits/chosen": -1.4539462327957153, "logits/rejected": -1.3872647285461426, "logps/chosen": -105.16111755371094, "logps/rejected": -247.2846221923828, "loss": 0.0485, "rewards/accuracies": 1.0, "rewards/chosen": 0.42837220430374146, "rewards/margins": 13.069297790527344, "rewards/rejected": -12.640925407409668, "step": 6140 }, { "epoch": 1.36, "learning_rate": 5.183823529411766e-06, "logits/chosen": -1.6277300119400024, "logits/rejected": -1.6631591320037842, "logps/chosen": -116.5291748046875, "logps/rejected": -245.5106964111328, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0013703107833862, "rewards/margins": 13.121981620788574, "rewards/rejected": -14.12335205078125, "step": 6141 }, { "epoch": 1.36, "learning_rate": 5.220588235294118e-06, "logits/chosen": -1.3934673070907593, "logits/rejected": -1.364888310432434, "logps/chosen": -118.12702178955078, "logps/rejected": -202.47535705566406, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -3.281224012374878, "rewards/margins": 4.188773155212402, "rewards/rejected": -7.469996929168701, "step": 6142 }, { "epoch": 1.36, "learning_rate": 5.257352941176471e-06, "logits/chosen": -1.4449909925460815, "logits/rejected": -1.488974928855896, "logps/chosen": -103.29634094238281, "logps/rejected": -150.231689453125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.5738540887832642, "rewards/margins": 5.586730480194092, "rewards/rejected": -6.160584449768066, "step": 6143 }, { "epoch": 1.36, "learning_rate": 5.294117647058824e-06, "logits/chosen": -1.4833848476409912, "logits/rejected": -1.4824351072311401, "logps/chosen": -94.42131042480469, "logps/rejected": -112.79501342773438, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.7995697259902954, "rewards/margins": 7.749194622039795, "rewards/rejected": -8.5487642288208, "step": 6144 }, { "epoch": 1.36, "learning_rate": 5.330882352941177e-06, "logits/chosen": -1.2345231771469116, "logits/rejected": -1.3348116874694824, "logps/chosen": -239.67616271972656, "logps/rejected": -143.05221557617188, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 0.4075027406215668, "rewards/margins": 10.130041122436523, "rewards/rejected": -9.722537994384766, "step": 6145 }, { "epoch": 1.36, "learning_rate": 5.36764705882353e-06, "logits/chosen": -1.0719503164291382, "logits/rejected": -0.9840334057807922, "logps/chosen": -164.91883850097656, "logps/rejected": -240.5723876953125, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": 0.1458694487810135, "rewards/margins": 4.1791791915893555, "rewards/rejected": -4.0333099365234375, "step": 6146 }, { "epoch": 1.36, "learning_rate": 5.404411764705883e-06, "logits/chosen": -1.2689200639724731, "logits/rejected": -1.289820909500122, "logps/chosen": -140.16622924804688, "logps/rejected": -165.56430053710938, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -2.327894687652588, "rewards/margins": 5.119729042053223, "rewards/rejected": -7.4476237297058105, "step": 6147 }, { "epoch": 1.36, "learning_rate": 5.441176470588236e-06, "logits/chosen": -1.198725700378418, "logits/rejected": -1.1903377771377563, "logps/chosen": -82.44493865966797, "logps/rejected": -99.53612518310547, "loss": 0.124, "rewards/accuracies": 1.0, "rewards/chosen": -6.020437717437744, "rewards/margins": 1.281477451324463, "rewards/rejected": -7.301915168762207, "step": 6148 }, { "epoch": 1.36, "learning_rate": 5.4779411764705894e-06, "logits/chosen": -1.7846909761428833, "logits/rejected": -1.8257848024368286, "logps/chosen": -162.20423889160156, "logps/rejected": -150.25677490234375, "loss": 0.0724, "rewards/accuracies": 1.0, "rewards/chosen": -4.0374579429626465, "rewards/margins": 1.8604226112365723, "rewards/rejected": -5.897880554199219, "step": 6149 }, { "epoch": 1.36, "learning_rate": 5.514705882352942e-06, "logits/chosen": -1.037326693534851, "logits/rejected": -0.9927835464477539, "logps/chosen": -151.675537109375, "logps/rejected": -254.74082946777344, "loss": 0.0372, "rewards/accuracies": 1.0, "rewards/chosen": -6.936143398284912, "rewards/margins": 3.254899501800537, "rewards/rejected": -10.19104290008545, "step": 6150 }, { "epoch": 1.36, "learning_rate": 5.5514705882352945e-06, "logits/chosen": -1.8635774850845337, "logits/rejected": -1.8419941663742065, "logps/chosen": -93.1793212890625, "logps/rejected": -146.57374572753906, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -1.3905471563339233, "rewards/margins": 3.0416259765625, "rewards/rejected": -4.432173252105713, "step": 6151 }, { "epoch": 1.36, "learning_rate": 5.588235294117647e-06, "logits/chosen": -1.4320778846740723, "logits/rejected": -1.5357310771942139, "logps/chosen": -302.4801025390625, "logps/rejected": -266.3266906738281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.2947998046875, "rewards/margins": 9.937726020812988, "rewards/rejected": -13.232525825500488, "step": 6152 }, { "epoch": 1.36, "learning_rate": 5.625e-06, "logits/chosen": -1.539375901222229, "logits/rejected": -1.395750880241394, "logps/chosen": -124.34791564941406, "logps/rejected": -252.80650329589844, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.6659523248672485, "rewards/margins": 5.092188358306885, "rewards/rejected": -5.758140563964844, "step": 6153 }, { "epoch": 1.36, "learning_rate": 5.661764705882353e-06, "logits/chosen": -1.223197340965271, "logits/rejected": -1.1878010034561157, "logps/chosen": -86.59599304199219, "logps/rejected": -139.79202270507812, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.6963623762130737, "rewards/margins": 3.7634382247924805, "rewards/rejected": -5.459800720214844, "step": 6154 }, { "epoch": 1.36, "learning_rate": 5.698529411764706e-06, "logits/chosen": -1.0756731033325195, "logits/rejected": -1.0678201913833618, "logps/chosen": -174.51763916015625, "logps/rejected": -167.71392822265625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.267160028219223, "rewards/margins": 7.06646203994751, "rewards/rejected": -6.799302101135254, "step": 6155 }, { "epoch": 1.36, "learning_rate": 5.735294117647059e-06, "logits/chosen": -1.2088569402694702, "logits/rejected": -1.2088569402694702, "logps/chosen": -258.57745361328125, "logps/rejected": -258.57745361328125, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -7.568719387054443, "rewards/margins": 0.0, "rewards/rejected": -7.568719387054443, "step": 6156 }, { "epoch": 1.36, "learning_rate": 5.772058823529412e-06, "logits/chosen": -1.420434594154358, "logits/rejected": -1.4350453615188599, "logps/chosen": -212.90415954589844, "logps/rejected": -198.363525390625, "loss": 0.0727, "rewards/accuracies": 1.0, "rewards/chosen": -3.5442612171173096, "rewards/margins": 1.8550093173980713, "rewards/rejected": -5.399270534515381, "step": 6157 }, { "epoch": 1.36, "learning_rate": 5.808823529411766e-06, "logits/chosen": -1.168582558631897, "logits/rejected": -1.2076787948608398, "logps/chosen": -214.2917022705078, "logps/rejected": -194.44911193847656, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 1.5318313837051392, "rewards/margins": 6.0067338943481445, "rewards/rejected": -4.474902629852295, "step": 6158 }, { "epoch": 1.36, "learning_rate": 5.845588235294119e-06, "logits/chosen": -1.3685362339019775, "logits/rejected": -1.2161600589752197, "logps/chosen": -154.75242614746094, "logps/rejected": -280.22833251953125, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -2.1972367763519287, "rewards/margins": 4.186039924621582, "rewards/rejected": -6.383276462554932, "step": 6159 }, { "epoch": 1.36, "learning_rate": 5.882352941176471e-06, "logits/chosen": -1.0297971963882446, "logits/rejected": -1.038474202156067, "logps/chosen": -182.1089324951172, "logps/rejected": -203.4596405029297, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.13280487060546875, "rewards/margins": 7.655131816864014, "rewards/rejected": -7.522326946258545, "step": 6160 }, { "epoch": 1.36, "learning_rate": 5.919117647058824e-06, "logits/chosen": -1.739212155342102, "logits/rejected": -1.6557724475860596, "logps/chosen": -121.95980072021484, "logps/rejected": -210.56442260742188, "loss": 0.0638, "rewards/accuracies": 1.0, "rewards/chosen": -3.0972297191619873, "rewards/margins": 4.96247673034668, "rewards/rejected": -8.059706687927246, "step": 6161 }, { "epoch": 1.36, "learning_rate": 5.955882352941177e-06, "logits/chosen": -1.496956467628479, "logits/rejected": -1.5525951385498047, "logps/chosen": -191.19912719726562, "logps/rejected": -183.61322021484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.9764251708984375, "rewards/margins": 8.22535514831543, "rewards/rejected": -6.248929500579834, "step": 6162 }, { "epoch": 1.36, "learning_rate": 5.99264705882353e-06, "logits/chosen": -1.1524080038070679, "logits/rejected": -0.8512036204338074, "logps/chosen": -182.06390380859375, "logps/rejected": -258.6122741699219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.108892798423767, "rewards/margins": 10.06363582611084, "rewards/rejected": -11.172528266906738, "step": 6163 }, { "epoch": 1.36, "learning_rate": 6.029411764705883e-06, "logits/chosen": -1.253883719444275, "logits/rejected": -1.2382303476333618, "logps/chosen": -113.59548950195312, "logps/rejected": -211.97671508789062, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.3926315307617188, "rewards/margins": 5.498816013336182, "rewards/rejected": -7.8914475440979, "step": 6164 }, { "epoch": 1.36, "learning_rate": 6.066176470588236e-06, "logits/chosen": -1.4433258771896362, "logits/rejected": -1.4235159158706665, "logps/chosen": -105.75265502929688, "logps/rejected": -106.76991271972656, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -2.820321798324585, "rewards/margins": 3.724835157394409, "rewards/rejected": -6.545156955718994, "step": 6165 }, { "epoch": 1.36, "learning_rate": 6.102941176470589e-06, "logits/chosen": -1.1509159803390503, "logits/rejected": -1.13888418674469, "logps/chosen": -154.71334838867188, "logps/rejected": -113.38188934326172, "loss": 0.049, "rewards/accuracies": 1.0, "rewards/chosen": -2.7342758178710938, "rewards/margins": 2.2745604515075684, "rewards/rejected": -5.008836269378662, "step": 6166 }, { "epoch": 1.36, "learning_rate": 6.139705882352942e-06, "logits/chosen": -1.460917353630066, "logits/rejected": -1.4656416177749634, "logps/chosen": -138.23513793945312, "logps/rejected": -153.4816131591797, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": -3.2875938415527344, "rewards/margins": 2.6466012001037598, "rewards/rejected": -5.934195041656494, "step": 6167 }, { "epoch": 1.37, "learning_rate": 6.176470588235295e-06, "logits/chosen": -1.1036455631256104, "logits/rejected": -1.1036455631256104, "logps/chosen": -204.14927673339844, "logps/rejected": -204.14927673339844, "loss": 0.3569, "rewards/accuracies": 0.0, "rewards/chosen": -6.760647773742676, "rewards/margins": 0.0, "rewards/rejected": -6.760647773742676, "step": 6168 }, { "epoch": 1.37, "learning_rate": 6.213235294117647e-06, "logits/chosen": -1.38431978225708, "logits/rejected": -1.4107599258422852, "logps/chosen": -119.66095733642578, "logps/rejected": -124.71483612060547, "loss": 0.6364, "rewards/accuracies": 1.0, "rewards/chosen": -3.486313581466675, "rewards/margins": 5.516674041748047, "rewards/rejected": -9.0029878616333, "step": 6169 }, { "epoch": 1.37, "learning_rate": 6.25e-06, "logits/chosen": -1.2820093631744385, "logits/rejected": -1.4217729568481445, "logps/chosen": -263.2868347167969, "logps/rejected": -206.97781372070312, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.9948532581329346, "rewards/margins": 7.5570831298828125, "rewards/rejected": -10.551936149597168, "step": 6170 }, { "epoch": 1.37, "learning_rate": 6.286764705882353e-06, "logits/chosen": -1.3569406270980835, "logits/rejected": -1.463420033454895, "logps/chosen": -195.52615356445312, "logps/rejected": -189.73385620117188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.389190673828125, "rewards/margins": 5.582364082336426, "rewards/rejected": -5.193173408508301, "step": 6171 }, { "epoch": 1.37, "learning_rate": 6.323529411764706e-06, "logits/chosen": -1.359198808670044, "logits/rejected": -0.7940488457679749, "logps/chosen": -302.88720703125, "logps/rejected": -376.659423828125, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": 1.450353980064392, "rewards/margins": 16.404895782470703, "rewards/rejected": -14.954541206359863, "step": 6172 }, { "epoch": 1.37, "learning_rate": 6.360294117647059e-06, "logits/chosen": -1.2217369079589844, "logits/rejected": -1.1995958089828491, "logps/chosen": -85.51512145996094, "logps/rejected": -105.13320922851562, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.8980690240859985, "rewards/margins": 5.555476665496826, "rewards/rejected": -7.453545570373535, "step": 6173 }, { "epoch": 1.37, "learning_rate": 6.397058823529412e-06, "logits/chosen": -1.4183669090270996, "logits/rejected": -1.4183669090270996, "logps/chosen": -200.27035522460938, "logps/rejected": -200.27035522460938, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -7.5277605056762695, "rewards/margins": 0.0, "rewards/rejected": -7.5277605056762695, "step": 6174 }, { "epoch": 1.37, "learning_rate": 6.433823529411766e-06, "logits/chosen": -1.6180144548416138, "logits/rejected": -1.6335920095443726, "logps/chosen": -104.20521545410156, "logps/rejected": -147.16522216796875, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": -2.521069288253784, "rewards/margins": 3.1851871013641357, "rewards/rejected": -5.70625638961792, "step": 6175 }, { "epoch": 1.37, "learning_rate": 6.470588235294119e-06, "logits/chosen": -1.5644543170928955, "logits/rejected": -1.1856130361557007, "logps/chosen": -110.5052719116211, "logps/rejected": -650.7841796875, "loss": 0.4838, "rewards/accuracies": 1.0, "rewards/chosen": -4.70272970199585, "rewards/margins": 54.10736083984375, "rewards/rejected": -58.810089111328125, "step": 6176 }, { "epoch": 1.37, "learning_rate": 6.507352941176472e-06, "logits/chosen": -1.6768230199813843, "logits/rejected": -1.6346756219863892, "logps/chosen": -137.40106201171875, "logps/rejected": -145.5082244873047, "loss": 0.5925, "rewards/accuracies": 1.0, "rewards/chosen": -6.869452953338623, "rewards/margins": 0.9383487701416016, "rewards/rejected": -7.807801723480225, "step": 6177 }, { "epoch": 1.37, "learning_rate": 6.544117647058824e-06, "logits/chosen": -1.4968516826629639, "logits/rejected": -1.5292110443115234, "logps/chosen": -97.60205078125, "logps/rejected": -115.9705581665039, "loss": 0.0882, "rewards/accuracies": 1.0, "rewards/chosen": -1.2562156915664673, "rewards/margins": 1.6747573614120483, "rewards/rejected": -2.9309730529785156, "step": 6178 }, { "epoch": 1.37, "learning_rate": 6.580882352941177e-06, "logits/chosen": -1.221657395362854, "logits/rejected": -1.2548969984054565, "logps/chosen": -226.75645446777344, "logps/rejected": -167.13156127929688, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 1.1760040521621704, "rewards/margins": 7.3895158767700195, "rewards/rejected": -6.213511943817139, "step": 6179 }, { "epoch": 1.37, "learning_rate": 6.61764705882353e-06, "logits/chosen": -1.2509868144989014, "logits/rejected": -1.306868553161621, "logps/chosen": -241.72610473632812, "logps/rejected": -221.1345672607422, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.296838402748108, "rewards/margins": 5.314845561981201, "rewards/rejected": -6.6116838455200195, "step": 6180 }, { "epoch": 1.37, "learning_rate": 6.654411764705883e-06, "logits/chosen": -1.484474778175354, "logits/rejected": -1.399458408355713, "logps/chosen": -37.188472747802734, "logps/rejected": -233.00653076171875, "loss": 0.3177, "rewards/accuracies": 1.0, "rewards/chosen": -0.6311653256416321, "rewards/margins": 5.966587066650391, "rewards/rejected": -6.597752571105957, "step": 6181 }, { "epoch": 1.37, "learning_rate": 6.6911764705882356e-06, "logits/chosen": -1.6361932754516602, "logits/rejected": -1.4846683740615845, "logps/chosen": -136.284912109375, "logps/rejected": -174.90744018554688, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.2895805835723877, "rewards/margins": 7.894534111022949, "rewards/rejected": -10.184114456176758, "step": 6182 }, { "epoch": 1.37, "learning_rate": 6.727941176470589e-06, "logits/chosen": -1.5714783668518066, "logits/rejected": -1.6188576221466064, "logps/chosen": -210.14773559570312, "logps/rejected": -148.6389923095703, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 3.5811798572540283, "rewards/margins": 7.043269157409668, "rewards/rejected": -3.4620895385742188, "step": 6183 }, { "epoch": 1.37, "learning_rate": 6.764705882352942e-06, "logits/chosen": -1.5612231492996216, "logits/rejected": -1.3557878732681274, "logps/chosen": -226.87591552734375, "logps/rejected": -437.88360595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4304412603378296, "rewards/margins": 10.131302833557129, "rewards/rejected": -11.56174373626709, "step": 6184 }, { "epoch": 1.37, "learning_rate": 6.801470588235295e-06, "logits/chosen": -1.4901716709136963, "logits/rejected": -1.436330795288086, "logps/chosen": -114.62522888183594, "logps/rejected": -96.68675994873047, "loss": 0.2529, "rewards/accuracies": 1.0, "rewards/chosen": -3.6269302368164062, "rewards/margins": 0.4178800582885742, "rewards/rejected": -4.0448102951049805, "step": 6185 }, { "epoch": 1.37, "learning_rate": 6.838235294117648e-06, "logits/chosen": -1.0530813932418823, "logits/rejected": -1.0233988761901855, "logps/chosen": -196.33212280273438, "logps/rejected": -256.0066833496094, "loss": 0.1345, "rewards/accuracies": 1.0, "rewards/chosen": 0.02148590050637722, "rewards/margins": 1.3184937238693237, "rewards/rejected": -1.2970077991485596, "step": 6186 }, { "epoch": 1.37, "learning_rate": 6.875e-06, "logits/chosen": -1.2892171144485474, "logits/rejected": -1.3811099529266357, "logps/chosen": -183.277099609375, "logps/rejected": -165.8650665283203, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": -3.7653870582580566, "rewards/margins": 2.2894506454467773, "rewards/rejected": -6.054837703704834, "step": 6187 }, { "epoch": 1.37, "learning_rate": 6.911764705882353e-06, "logits/chosen": -1.522641897201538, "logits/rejected": -1.5001957416534424, "logps/chosen": -102.43356323242188, "logps/rejected": -149.44268798828125, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -3.580329179763794, "rewards/margins": 3.820815324783325, "rewards/rejected": -7.401144504547119, "step": 6188 }, { "epoch": 1.37, "learning_rate": 6.948529411764706e-06, "logits/chosen": -1.034204363822937, "logits/rejected": -1.0626354217529297, "logps/chosen": -171.0980987548828, "logps/rejected": -196.9209747314453, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.9616684317588806, "rewards/margins": 5.5487871170043945, "rewards/rejected": -6.51045560836792, "step": 6189 }, { "epoch": 1.37, "learning_rate": 6.985294117647059e-06, "logits/chosen": -1.3796182870864868, "logits/rejected": -1.4109547138214111, "logps/chosen": -192.17819213867188, "logps/rejected": -157.8119659423828, "loss": 0.2113, "rewards/accuracies": 1.0, "rewards/chosen": -3.8187806606292725, "rewards/margins": 0.6427633762359619, "rewards/rejected": -4.461544036865234, "step": 6190 }, { "epoch": 1.37, "learning_rate": 7.022058823529412e-06, "logits/chosen": -1.4265860319137573, "logits/rejected": -1.517103910446167, "logps/chosen": -276.15179443359375, "logps/rejected": -176.32229614257812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 2.3006227016448975, "rewards/margins": 5.704648017883301, "rewards/rejected": -3.4040253162384033, "step": 6191 }, { "epoch": 1.37, "learning_rate": 7.058823529411766e-06, "logits/chosen": -1.4282879829406738, "logits/rejected": -1.4016631841659546, "logps/chosen": -141.00125122070312, "logps/rejected": -136.9532928466797, "loss": 0.7882, "rewards/accuracies": 0.0, "rewards/chosen": -4.08074951171875, "rewards/margins": -0.6939818859100342, "rewards/rejected": -3.386767625808716, "step": 6192 }, { "epoch": 1.37, "learning_rate": 7.095588235294119e-06, "logits/chosen": -1.522206425666809, "logits/rejected": -1.3763645887374878, "logps/chosen": -141.5352783203125, "logps/rejected": -249.03823852539062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 2.6326050758361816, "rewards/margins": 7.317163467407227, "rewards/rejected": -4.684558391571045, "step": 6193 }, { "epoch": 1.37, "learning_rate": 7.132352941176472e-06, "logits/chosen": -1.40958571434021, "logits/rejected": -1.473636507987976, "logps/chosen": -252.84774780273438, "logps/rejected": -166.22296142578125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 1.7288910150527954, "rewards/margins": 5.949000358581543, "rewards/rejected": -4.220109462738037, "step": 6194 }, { "epoch": 1.37, "learning_rate": 7.169117647058825e-06, "logits/chosen": -1.4388691186904907, "logits/rejected": -1.4818589687347412, "logps/chosen": -48.11746597290039, "logps/rejected": -725.3880004882812, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.6525730490684509, "rewards/margins": 54.96232604980469, "rewards/rejected": -55.614898681640625, "step": 6195 }, { "epoch": 1.37, "learning_rate": 7.205882352941177e-06, "logits/chosen": -1.3683348894119263, "logits/rejected": -1.2538080215454102, "logps/chosen": -114.9107666015625, "logps/rejected": -195.17706298828125, "loss": 0.1815, "rewards/accuracies": 1.0, "rewards/chosen": -0.6917938590049744, "rewards/margins": 0.8260741829872131, "rewards/rejected": -1.5178680419921875, "step": 6196 }, { "epoch": 1.37, "learning_rate": 7.24264705882353e-06, "logits/chosen": -1.3336865901947021, "logits/rejected": -1.2634296417236328, "logps/chosen": -201.5142822265625, "logps/rejected": -254.84536743164062, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -1.6509277820587158, "rewards/margins": 3.8375399112701416, "rewards/rejected": -5.488467693328857, "step": 6197 }, { "epoch": 1.37, "learning_rate": 7.2794117647058826e-06, "logits/chosen": -1.6039278507232666, "logits/rejected": -1.645606279373169, "logps/chosen": -137.895263671875, "logps/rejected": -129.19839477539062, "loss": 0.065, "rewards/accuracies": 1.0, "rewards/chosen": -1.1306167840957642, "rewards/margins": 1.9747382402420044, "rewards/rejected": -3.1053550243377686, "step": 6198 }, { "epoch": 1.37, "learning_rate": 7.3161764705882355e-06, "logits/chosen": -1.517198085784912, "logits/rejected": -1.4779512882232666, "logps/chosen": -132.99819946289062, "logps/rejected": -214.28126525878906, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/chosen": 0.11145477741956711, "rewards/margins": 4.8157124519348145, "rewards/rejected": -4.704257488250732, "step": 6199 }, { "epoch": 1.37, "learning_rate": 7.352941176470589e-06, "logits/chosen": -1.1955270767211914, "logits/rejected": -1.2644526958465576, "logps/chosen": -146.34613037109375, "logps/rejected": -176.92593383789062, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.1795929670333862, "rewards/margins": 8.518810272216797, "rewards/rejected": -7.339217662811279, "step": 6200 }, { "epoch": 1.37, "learning_rate": 7.389705882352942e-06, "logits/chosen": -1.6778364181518555, "logits/rejected": -1.6778364181518555, "logps/chosen": -195.055908203125, "logps/rejected": -195.055908203125, "loss": 0.3502, "rewards/accuracies": 0.0, "rewards/chosen": -9.798428535461426, "rewards/margins": 0.0, "rewards/rejected": -9.798428535461426, "step": 6201 }, { "epoch": 1.37, "learning_rate": 7.426470588235295e-06, "logits/chosen": -1.5441813468933105, "logits/rejected": -1.5211610794067383, "logps/chosen": -97.79594421386719, "logps/rejected": -219.4503173828125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.0205764770507812, "rewards/margins": 5.644738674163818, "rewards/rejected": -6.6653151512146, "step": 6202 }, { "epoch": 1.37, "learning_rate": 7.463235294117648e-06, "logits/chosen": -1.3040682077407837, "logits/rejected": -1.309617042541504, "logps/chosen": -112.16697692871094, "logps/rejected": -119.41033935546875, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -1.2697380781173706, "rewards/margins": 8.33273696899414, "rewards/rejected": -9.6024751663208, "step": 6203 }, { "epoch": 1.37, "learning_rate": 7.500000000000001e-06, "logits/chosen": -1.4251904487609863, "logits/rejected": -1.4023946523666382, "logps/chosen": -140.12393188476562, "logps/rejected": -216.38558959960938, "loss": 0.0513, "rewards/accuracies": 1.0, "rewards/chosen": -1.60216224193573, "rewards/margins": 2.2259902954101562, "rewards/rejected": -3.8281524181365967, "step": 6204 }, { "epoch": 1.37, "learning_rate": 7.536764705882353e-06, "logits/chosen": -1.5358798503875732, "logits/rejected": -1.693253517150879, "logps/chosen": -176.17535400390625, "logps/rejected": -216.17547607421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9557907581329346, "rewards/margins": 14.091734886169434, "rewards/rejected": -17.04752540588379, "step": 6205 }, { "epoch": 1.37, "learning_rate": 7.573529411764706e-06, "logits/chosen": -1.4464856386184692, "logits/rejected": -1.5175971984863281, "logps/chosen": -171.4169921875, "logps/rejected": -296.1857604980469, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 1.2646347284317017, "rewards/margins": 17.658178329467773, "rewards/rejected": -16.393543243408203, "step": 6206 }, { "epoch": 1.37, "learning_rate": 7.610294117647059e-06, "logits/chosen": -1.351279616355896, "logits/rejected": -1.3907169103622437, "logps/chosen": -103.73184967041016, "logps/rejected": -185.281494140625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.07030029594898224, "rewards/margins": 5.844873905181885, "rewards/rejected": -5.9151740074157715, "step": 6207 }, { "epoch": 1.37, "learning_rate": 7.647058823529411e-06, "logits/chosen": -1.5447815656661987, "logits/rejected": -1.5622484683990479, "logps/chosen": -125.34651947021484, "logps/rejected": -178.7863311767578, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.4109947681427002, "rewards/margins": 9.466439247131348, "rewards/rejected": -10.877433776855469, "step": 6208 }, { "epoch": 1.37, "learning_rate": 7.683823529411766e-06, "logits/chosen": -0.9912931323051453, "logits/rejected": -0.9912931323051453, "logps/chosen": -104.96829223632812, "logps/rejected": -104.96829223632812, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -6.581413269042969, "rewards/margins": 0.0, "rewards/rejected": -6.581413269042969, "step": 6209 }, { "epoch": 1.37, "learning_rate": 7.720588235294119e-06, "logits/chosen": -1.379005789756775, "logits/rejected": -1.4232356548309326, "logps/chosen": -126.35369110107422, "logps/rejected": -189.8944854736328, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.4510353803634644, "rewards/margins": 5.4642510414123535, "rewards/rejected": -6.915286540985107, "step": 6210 }, { "epoch": 1.37, "learning_rate": 7.757352941176472e-06, "logits/chosen": -1.2484533786773682, "logits/rejected": -1.361602783203125, "logps/chosen": -258.2779846191406, "logps/rejected": -178.18426513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9032883644104004, "rewards/margins": 10.279596328735352, "rewards/rejected": -14.182884216308594, "step": 6211 }, { "epoch": 1.37, "learning_rate": 7.794117647058825e-06, "logits/chosen": -1.1784857511520386, "logits/rejected": -1.2215968370437622, "logps/chosen": -133.68408203125, "logps/rejected": -122.97523498535156, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -0.731793224811554, "rewards/margins": 7.990771293640137, "rewards/rejected": -8.722564697265625, "step": 6212 }, { "epoch": 1.38, "learning_rate": 7.830882352941177e-06, "logits/chosen": -1.0429387092590332, "logits/rejected": -1.1249282360076904, "logps/chosen": -260.5132751464844, "logps/rejected": -147.46958923339844, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": 0.5622040033340454, "rewards/margins": 7.663748264312744, "rewards/rejected": -7.101544380187988, "step": 6213 }, { "epoch": 1.38, "learning_rate": 7.86764705882353e-06, "logits/chosen": -1.323651909828186, "logits/rejected": -1.3566161394119263, "logps/chosen": -105.14398193359375, "logps/rejected": -114.4462890625, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/chosen": -0.49843522906303406, "rewards/margins": 2.5272819995880127, "rewards/rejected": -3.025717258453369, "step": 6214 }, { "epoch": 1.38, "learning_rate": 7.904411764705883e-06, "logits/chosen": -1.2043769359588623, "logits/rejected": -1.219028353691101, "logps/chosen": -128.3404998779297, "logps/rejected": -89.11238861083984, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.129705786705017, "rewards/margins": 5.256890296936035, "rewards/rejected": -6.386596202850342, "step": 6215 }, { "epoch": 1.38, "learning_rate": 7.941176470588236e-06, "logits/chosen": -1.4216768741607666, "logits/rejected": -1.4316151142120361, "logps/chosen": -78.54342651367188, "logps/rejected": -79.04195404052734, "loss": 0.266, "rewards/accuracies": 1.0, "rewards/chosen": -6.497151851654053, "rewards/margins": 0.3534250259399414, "rewards/rejected": -6.850576877593994, "step": 6216 }, { "epoch": 1.38, "learning_rate": 7.97794117647059e-06, "logits/chosen": -1.2807234525680542, "logits/rejected": -1.2807234525680542, "logps/chosen": -67.33699035644531, "logps/rejected": -67.33699035644531, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -0.8733699917793274, "rewards/margins": 0.0, "rewards/rejected": -0.8733699917793274, "step": 6217 }, { "epoch": 1.38, "learning_rate": 8.014705882352942e-06, "logits/chosen": -1.4913564920425415, "logits/rejected": -1.508640170097351, "logps/chosen": -136.91162109375, "logps/rejected": -180.27093505859375, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": 0.04310150071978569, "rewards/margins": 3.604710578918457, "rewards/rejected": -3.5616090297698975, "step": 6218 }, { "epoch": 1.38, "learning_rate": 8.051470588235295e-06, "logits/chosen": -1.4056583642959595, "logits/rejected": -1.3655496835708618, "logps/chosen": -92.76184844970703, "logps/rejected": -250.95535278320312, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.5709983706474304, "rewards/margins": 8.955177307128906, "rewards/rejected": -9.526175498962402, "step": 6219 }, { "epoch": 1.38, "learning_rate": 8.088235294117648e-06, "logits/chosen": -1.3718444108963013, "logits/rejected": -1.30718994140625, "logps/chosen": -189.8081512451172, "logps/rejected": -252.75140380859375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 1.1413697004318237, "rewards/margins": 6.277449131011963, "rewards/rejected": -5.13607931137085, "step": 6220 }, { "epoch": 1.38, "learning_rate": 8.125000000000001e-06, "logits/chosen": -1.4727450609207153, "logits/rejected": -1.261810064315796, "logps/chosen": -234.28594970703125, "logps/rejected": -477.689697265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.9128021001815796, "rewards/margins": 12.308822631835938, "rewards/rejected": -10.396020889282227, "step": 6221 }, { "epoch": 1.38, "learning_rate": 8.161764705882354e-06, "logits/chosen": -1.4026201963424683, "logits/rejected": -1.5041053295135498, "logps/chosen": -171.73951721191406, "logps/rejected": -83.94711303710938, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": 0.5418838858604431, "rewards/margins": 2.80619215965271, "rewards/rejected": -2.264308214187622, "step": 6222 }, { "epoch": 1.38, "learning_rate": 8.198529411764707e-06, "logits/chosen": -0.8443350195884705, "logits/rejected": -0.8763246536254883, "logps/chosen": -139.60470581054688, "logps/rejected": -111.10307312011719, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 3.6216065883636475, "rewards/margins": 7.455270767211914, "rewards/rejected": -3.8336639404296875, "step": 6223 }, { "epoch": 1.38, "learning_rate": 8.23529411764706e-06, "logits/chosen": -1.3266282081604004, "logits/rejected": -1.3266282081604004, "logps/chosen": -65.37273406982422, "logps/rejected": -65.37273406982422, "loss": 0.3489, "rewards/accuracies": 0.0, "rewards/chosen": -4.806372165679932, "rewards/margins": 0.0, "rewards/rejected": -4.806372165679932, "step": 6224 }, { "epoch": 1.38, "learning_rate": 8.272058823529413e-06, "logits/chosen": -1.2593828439712524, "logits/rejected": -1.2654517889022827, "logps/chosen": -82.15312957763672, "logps/rejected": -74.90330505371094, "loss": 1.1385, "rewards/accuracies": 0.0, "rewards/chosen": -3.454540252685547, "rewards/margins": -1.3542377948760986, "rewards/rejected": -2.1003024578094482, "step": 6225 }, { "epoch": 1.38, "learning_rate": 8.308823529411766e-06, "logits/chosen": -1.404860019683838, "logits/rejected": -1.410046935081482, "logps/chosen": -221.87210083007812, "logps/rejected": -194.34661865234375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 1.0062897205352783, "rewards/margins": 8.361858367919922, "rewards/rejected": -7.355568885803223, "step": 6226 }, { "epoch": 1.38, "learning_rate": 8.345588235294119e-06, "logits/chosen": -1.2934054136276245, "logits/rejected": -1.263636589050293, "logps/chosen": -67.09786224365234, "logps/rejected": -161.62533569335938, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.9206836819648743, "rewards/margins": 3.7812435626983643, "rewards/rejected": -4.701927185058594, "step": 6227 }, { "epoch": 1.38, "learning_rate": 8.382352941176472e-06, "logits/chosen": -1.4307323694229126, "logits/rejected": -1.4041786193847656, "logps/chosen": -121.0491714477539, "logps/rejected": -151.45956420898438, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/chosen": -0.9555725455284119, "rewards/margins": 2.760546922683716, "rewards/rejected": -3.7161195278167725, "step": 6228 }, { "epoch": 1.38, "learning_rate": 8.419117647058824e-06, "logits/chosen": -1.1652415990829468, "logits/rejected": -1.1283085346221924, "logps/chosen": -193.57659912109375, "logps/rejected": -219.75173950195312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.28778383135795593, "rewards/margins": 10.40276050567627, "rewards/rejected": -10.690544128417969, "step": 6229 }, { "epoch": 1.38, "learning_rate": 8.455882352941177e-06, "logits/chosen": -1.1675746440887451, "logits/rejected": -1.1675746440887451, "logps/chosen": -134.58551025390625, "logps/rejected": -134.58551025390625, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -3.089923143386841, "rewards/margins": 0.0, "rewards/rejected": -3.089923143386841, "step": 6230 }, { "epoch": 1.38, "learning_rate": 8.49264705882353e-06, "logits/chosen": -1.2952998876571655, "logits/rejected": -1.3541995286941528, "logps/chosen": -107.146728515625, "logps/rejected": -165.27249145507812, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -2.0980803966522217, "rewards/margins": 10.897735595703125, "rewards/rejected": -12.995816230773926, "step": 6231 }, { "epoch": 1.38, "learning_rate": 8.529411764705883e-06, "logits/chosen": -1.1384934186935425, "logits/rejected": -1.1384934186935425, "logps/chosen": -246.45465087890625, "logps/rejected": -246.45465087890625, "loss": 0.3493, "rewards/accuracies": 0.0, "rewards/chosen": -8.625041007995605, "rewards/margins": 0.0, "rewards/rejected": -8.625041007995605, "step": 6232 }, { "epoch": 1.38, "learning_rate": 8.566176470588236e-06, "logits/chosen": -1.2117431163787842, "logits/rejected": -1.2726984024047852, "logps/chosen": -118.31837463378906, "logps/rejected": -134.117919921875, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -2.1158225536346436, "rewards/margins": 8.392327308654785, "rewards/rejected": -10.508150100708008, "step": 6233 }, { "epoch": 1.38, "learning_rate": 8.60294117647059e-06, "logits/chosen": -1.4170163869857788, "logits/rejected": -1.343065619468689, "logps/chosen": -127.028564453125, "logps/rejected": -205.5989990234375, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -0.4268127381801605, "rewards/margins": 3.312100410461426, "rewards/rejected": -3.738913059234619, "step": 6234 }, { "epoch": 1.38, "learning_rate": 8.639705882352942e-06, "logits/chosen": -1.3835171461105347, "logits/rejected": -1.491934061050415, "logps/chosen": -250.95196533203125, "logps/rejected": -142.29290771484375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8485779166221619, "rewards/margins": 6.2851881980896, "rewards/rejected": -7.133766174316406, "step": 6235 }, { "epoch": 1.38, "learning_rate": 8.676470588235295e-06, "logits/chosen": -1.3207553625106812, "logits/rejected": -1.338357925415039, "logps/chosen": -134.58523559570312, "logps/rejected": -123.20608520507812, "loss": 0.122, "rewards/accuracies": 1.0, "rewards/chosen": -1.7546204328536987, "rewards/margins": 1.2893203496932983, "rewards/rejected": -3.043940782546997, "step": 6236 }, { "epoch": 1.38, "learning_rate": 8.713235294117648e-06, "logits/chosen": -1.453309178352356, "logits/rejected": -1.597166657447815, "logps/chosen": -115.931884765625, "logps/rejected": -97.9167709350586, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.104528784751892, "rewards/margins": 5.15678596496582, "rewards/rejected": -6.261314868927002, "step": 6237 }, { "epoch": 1.38, "learning_rate": 8.750000000000001e-06, "logits/chosen": -0.9373690485954285, "logits/rejected": -0.7483705878257751, "logps/chosen": -138.0535888671875, "logps/rejected": -906.8809814453125, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -2.567892551422119, "rewards/margins": 78.47622680664062, "rewards/rejected": -81.04412078857422, "step": 6238 }, { "epoch": 1.38, "learning_rate": 8.786764705882354e-06, "logits/chosen": -1.6156609058380127, "logits/rejected": -1.8205993175506592, "logps/chosen": -235.17274475097656, "logps/rejected": -101.69467163085938, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -2.1526505947113037, "rewards/margins": 5.607355117797852, "rewards/rejected": -7.760005950927734, "step": 6239 }, { "epoch": 1.38, "learning_rate": 8.823529411764707e-06, "logits/chosen": -1.1756528615951538, "logits/rejected": -1.0774446725845337, "logps/chosen": -102.27291107177734, "logps/rejected": -231.22552490234375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 1.962044596672058, "rewards/margins": 5.056232452392578, "rewards/rejected": -3.0941879749298096, "step": 6240 }, { "epoch": 1.38, "learning_rate": 8.86029411764706e-06, "logits/chosen": -1.4864481687545776, "logits/rejected": -1.4976279735565186, "logps/chosen": -121.2278823852539, "logps/rejected": -134.2003936767578, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -1.590399980545044, "rewards/margins": 3.651026964187622, "rewards/rejected": -5.241426944732666, "step": 6241 }, { "epoch": 1.38, "learning_rate": 8.897058823529413e-06, "logits/chosen": -1.1395050287246704, "logits/rejected": -1.136535406112671, "logps/chosen": -106.92245483398438, "logps/rejected": -112.98478698730469, "loss": 0.0816, "rewards/accuracies": 1.0, "rewards/chosen": -0.5754295587539673, "rewards/margins": 2.4636435508728027, "rewards/rejected": -3.0390732288360596, "step": 6242 }, { "epoch": 1.38, "learning_rate": 8.933823529411766e-06, "logits/chosen": -1.3592603206634521, "logits/rejected": -1.3539941310882568, "logps/chosen": -181.17616271972656, "logps/rejected": -165.42628479003906, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.5664260387420654, "rewards/margins": 5.291328430175781, "rewards/rejected": -7.857754707336426, "step": 6243 }, { "epoch": 1.38, "learning_rate": 8.970588235294119e-06, "logits/chosen": -1.1356251239776611, "logits/rejected": -1.1413257122039795, "logps/chosen": -287.69891357421875, "logps/rejected": -265.83868408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.899449110031128, "rewards/margins": 11.631452560424805, "rewards/rejected": -15.530901908874512, "step": 6244 }, { "epoch": 1.38, "learning_rate": 9.007352941176471e-06, "logits/chosen": -1.1272388696670532, "logits/rejected": -1.1856062412261963, "logps/chosen": -166.0653533935547, "logps/rejected": -153.42306518554688, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.0341262817382812, "rewards/margins": 7.761371612548828, "rewards/rejected": -9.79549789428711, "step": 6245 }, { "epoch": 1.38, "learning_rate": 9.044117647058824e-06, "logits/chosen": -1.693773627281189, "logits/rejected": -1.693773627281189, "logps/chosen": -96.35879516601562, "logps/rejected": -96.35879516601562, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.261271953582764, "rewards/margins": 0.0, "rewards/rejected": -4.261271953582764, "step": 6246 }, { "epoch": 1.38, "learning_rate": 9.080882352941177e-06, "logits/chosen": -1.2784017324447632, "logits/rejected": -1.2784017324447632, "logps/chosen": -150.89385986328125, "logps/rejected": -150.89385986328125, "loss": 0.3515, "rewards/accuracies": 0.0, "rewards/chosen": -5.602086067199707, "rewards/margins": 0.0, "rewards/rejected": -5.602086067199707, "step": 6247 }, { "epoch": 1.38, "learning_rate": 9.11764705882353e-06, "logits/chosen": -0.9551590085029602, "logits/rejected": -0.919381856918335, "logps/chosen": -224.87387084960938, "logps/rejected": -254.78192138671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 3.237936496734619, "rewards/margins": 7.107853889465332, "rewards/rejected": -3.869917392730713, "step": 6248 }, { "epoch": 1.38, "learning_rate": 9.154411764705883e-06, "logits/chosen": -1.3581639528274536, "logits/rejected": -1.2745835781097412, "logps/chosen": -111.17664337158203, "logps/rejected": -277.4830017089844, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.072089433670044, "rewards/margins": 7.046194076538086, "rewards/rejected": -8.11828327178955, "step": 6249 }, { "epoch": 1.38, "learning_rate": 9.191176470588236e-06, "logits/chosen": -1.282913327217102, "logits/rejected": -1.2712031602859497, "logps/chosen": -122.96700286865234, "logps/rejected": -134.1591796875, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": -1.2312614917755127, "rewards/margins": 3.4897825717926025, "rewards/rejected": -4.721044063568115, "step": 6250 }, { "epoch": 1.38, "learning_rate": 9.227941176470589e-06, "logits/chosen": -1.2887675762176514, "logits/rejected": -1.2665518522262573, "logps/chosen": -175.62261962890625, "logps/rejected": -359.04327392578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 3.755110263824463, "rewards/margins": 28.581769943237305, "rewards/rejected": -24.82666015625, "step": 6251 }, { "epoch": 1.38, "learning_rate": 9.264705882352942e-06, "logits/chosen": -1.276463508605957, "logits/rejected": -1.1975295543670654, "logps/chosen": -156.5166778564453, "logps/rejected": -336.3935546875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.779832601547241, "rewards/margins": 7.467679977416992, "rewards/rejected": -10.247512817382812, "step": 6252 }, { "epoch": 1.38, "learning_rate": 9.301470588235295e-06, "logits/chosen": -1.2286144495010376, "logits/rejected": -1.2450997829437256, "logps/chosen": -196.39134216308594, "logps/rejected": -129.79937744140625, "loss": 0.0989, "rewards/accuracies": 1.0, "rewards/chosen": -2.265150547027588, "rewards/margins": 1.5204620361328125, "rewards/rejected": -3.7856125831604004, "step": 6253 }, { "epoch": 1.38, "learning_rate": 9.338235294117648e-06, "logits/chosen": -1.3434232473373413, "logits/rejected": -1.3434232473373413, "logps/chosen": -87.11282348632812, "logps/rejected": -87.11282348632812, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -0.4847969114780426, "rewards/margins": 0.0, "rewards/rejected": -0.4847969114780426, "step": 6254 }, { "epoch": 1.38, "learning_rate": 9.375000000000001e-06, "logits/chosen": -1.6544370651245117, "logits/rejected": -1.7306495904922485, "logps/chosen": -114.55664825439453, "logps/rejected": -94.33375549316406, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.9188088178634644, "rewards/margins": 5.486797332763672, "rewards/rejected": -7.405606269836426, "step": 6255 }, { "epoch": 1.38, "learning_rate": 9.411764705882354e-06, "logits/chosen": -1.5011711120605469, "logits/rejected": -1.4306986331939697, "logps/chosen": -57.35563659667969, "logps/rejected": -165.18228149414062, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.6607246398925781, "rewards/margins": 4.515880584716797, "rewards/rejected": -6.176605224609375, "step": 6256 }, { "epoch": 1.38, "learning_rate": 9.448529411764707e-06, "logits/chosen": -1.140282154083252, "logits/rejected": -1.140282154083252, "logps/chosen": -155.69534301757812, "logps/rejected": -155.69534301757812, "loss": 0.5023, "rewards/accuracies": 0.0, "rewards/chosen": -7.755219459533691, "rewards/margins": 0.0, "rewards/rejected": -7.755219459533691, "step": 6257 }, { "epoch": 1.39, "learning_rate": 9.48529411764706e-06, "logits/chosen": -1.6276674270629883, "logits/rejected": -1.4991167783737183, "logps/chosen": -140.4168701171875, "logps/rejected": -266.82415771484375, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -4.761984348297119, "rewards/margins": 5.049478054046631, "rewards/rejected": -9.81146240234375, "step": 6258 }, { "epoch": 1.39, "learning_rate": 9.522058823529413e-06, "logits/chosen": -1.103291630744934, "logits/rejected": -1.103291630744934, "logps/chosen": -96.4798812866211, "logps/rejected": -96.4798812866211, "loss": 0.3601, "rewards/accuracies": 0.0, "rewards/chosen": -5.104986667633057, "rewards/margins": 0.0, "rewards/rejected": -5.104986667633057, "step": 6259 }, { "epoch": 1.39, "learning_rate": 9.558823529411766e-06, "logits/chosen": -1.6000062227249146, "logits/rejected": -1.6150816679000854, "logps/chosen": -96.76923370361328, "logps/rejected": -105.25831604003906, "loss": 0.1276, "rewards/accuracies": 1.0, "rewards/chosen": -2.323842763900757, "rewards/margins": 2.1699321269989014, "rewards/rejected": -4.493774890899658, "step": 6260 }, { "epoch": 1.39, "learning_rate": 9.595588235294119e-06, "logits/chosen": -1.6378395557403564, "logits/rejected": -1.6540690660476685, "logps/chosen": -166.25863647460938, "logps/rejected": -120.15589904785156, "loss": 0.0697, "rewards/accuracies": 1.0, "rewards/chosen": -1.5082015991210938, "rewards/margins": 1.8999192714691162, "rewards/rejected": -3.40812087059021, "step": 6261 }, { "epoch": 1.39, "learning_rate": 9.632352941176471e-06, "logits/chosen": -1.2609827518463135, "logits/rejected": -1.30930757522583, "logps/chosen": -193.39596557617188, "logps/rejected": -186.844482421875, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -3.542105197906494, "rewards/margins": 3.097198486328125, "rewards/rejected": -6.639303684234619, "step": 6262 }, { "epoch": 1.39, "learning_rate": 9.669117647058824e-06, "logits/chosen": -1.294100284576416, "logits/rejected": -1.3049843311309814, "logps/chosen": -103.59310913085938, "logps/rejected": -144.67803955078125, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -0.41406556963920593, "rewards/margins": 4.627545356750488, "rewards/rejected": -5.0416107177734375, "step": 6263 }, { "epoch": 1.39, "learning_rate": 9.705882352941177e-06, "logits/chosen": -1.3161884546279907, "logits/rejected": -1.3221793174743652, "logps/chosen": -58.73513412475586, "logps/rejected": -87.0810546875, "loss": 0.0538, "rewards/accuracies": 1.0, "rewards/chosen": 0.42641565203666687, "rewards/margins": 2.175760269165039, "rewards/rejected": -1.7493447065353394, "step": 6264 }, { "epoch": 1.39, "learning_rate": 9.74264705882353e-06, "logits/chosen": -1.358567237854004, "logits/rejected": -1.365726351737976, "logps/chosen": -70.59184265136719, "logps/rejected": -91.40525817871094, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/chosen": -0.19125671684741974, "rewards/margins": 2.8784499168395996, "rewards/rejected": -3.069706678390503, "step": 6265 }, { "epoch": 1.39, "learning_rate": 9.779411764705883e-06, "logits/chosen": -1.3386836051940918, "logits/rejected": -1.312626838684082, "logps/chosen": -65.54373168945312, "logps/rejected": -80.92735290527344, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.3089134693145752, "rewards/margins": 4.536322593688965, "rewards/rejected": -5.845235824584961, "step": 6266 }, { "epoch": 1.39, "learning_rate": 9.816176470588236e-06, "logits/chosen": -1.3824403285980225, "logits/rejected": -1.324906587600708, "logps/chosen": -118.41505432128906, "logps/rejected": -277.62286376953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.00421142578125, "rewards/margins": 9.836554527282715, "rewards/rejected": -9.840765953063965, "step": 6267 }, { "epoch": 1.39, "learning_rate": 9.852941176470589e-06, "logits/chosen": -1.490660309791565, "logits/rejected": -1.46480393409729, "logps/chosen": -99.83516693115234, "logps/rejected": -147.59927368164062, "loss": 0.5603, "rewards/accuracies": 0.0, "rewards/chosen": -3.0656821727752686, "rewards/margins": -0.6840276718139648, "rewards/rejected": -2.3816545009613037, "step": 6268 }, { "epoch": 1.39, "learning_rate": 9.889705882352942e-06, "logits/chosen": -1.3301918506622314, "logits/rejected": -1.4101728200912476, "logps/chosen": -117.28395080566406, "logps/rejected": -190.13575744628906, "loss": 0.1086, "rewards/accuracies": 1.0, "rewards/chosen": -1.3415313959121704, "rewards/margins": 5.541018962860107, "rewards/rejected": -6.882550239562988, "step": 6269 }, { "epoch": 1.39, "learning_rate": 9.926470588235295e-06, "logits/chosen": -1.3618940114974976, "logits/rejected": -1.3790308237075806, "logps/chosen": -88.2335205078125, "logps/rejected": -106.34635162353516, "loss": 0.2397, "rewards/accuracies": 1.0, "rewards/chosen": -0.5576645135879517, "rewards/margins": 5.9861297607421875, "rewards/rejected": -6.54379415512085, "step": 6270 }, { "epoch": 1.39, "learning_rate": 9.963235294117648e-06, "logits/chosen": -1.3696645498275757, "logits/rejected": -1.4647018909454346, "logps/chosen": -264.745849609375, "logps/rejected": -322.1372985839844, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": -0.806506335735321, "rewards/margins": 6.720900058746338, "rewards/rejected": -7.527406215667725, "step": 6271 }, { "epoch": 1.39, "learning_rate": 1e-05, "logits/chosen": -1.356919288635254, "logits/rejected": -1.356919288635254, "logps/chosen": -114.45425415039062, "logps/rejected": -114.45425415039062, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -3.8589141368865967, "rewards/margins": 0.0, "rewards/rejected": -3.8589141368865967, "step": 6272 }, { "epoch": 1.39, "learning_rate": 9.99999967875601e-06, "logits/chosen": -1.7923493385314941, "logits/rejected": -1.8880491256713867, "logps/chosen": -183.0513153076172, "logps/rejected": -140.85797119140625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.333796739578247, "rewards/margins": 5.383420944213867, "rewards/rejected": -8.717217445373535, "step": 6273 }, { "epoch": 1.39, "learning_rate": 9.999998715024082e-06, "logits/chosen": -1.010838508605957, "logits/rejected": -0.6333467364311218, "logps/chosen": -184.97232055664062, "logps/rejected": -689.8052978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6719406843185425, "rewards/margins": 45.03532409667969, "rewards/rejected": -46.7072639465332, "step": 6274 }, { "epoch": 1.39, "learning_rate": 9.999997108804337e-06, "logits/chosen": -1.5416747331619263, "logits/rejected": -1.4813392162322998, "logps/chosen": -235.69183349609375, "logps/rejected": -332.65093994140625, "loss": 0.0487, "rewards/accuracies": 1.0, "rewards/chosen": -3.0570647716522217, "rewards/margins": 9.14538860321045, "rewards/rejected": -12.20245361328125, "step": 6275 }, { "epoch": 1.39, "learning_rate": 9.999994860096985e-06, "logits/chosen": -1.3449069261550903, "logits/rejected": -1.3247610330581665, "logps/chosen": -76.57759857177734, "logps/rejected": -122.48698425292969, "loss": 0.0645, "rewards/accuracies": 1.0, "rewards/chosen": -2.8594398498535156, "rewards/margins": 1.9828615188598633, "rewards/rejected": -4.842301368713379, "step": 6276 }, { "epoch": 1.39, "learning_rate": 9.99999196890231e-06, "logits/chosen": -1.272675633430481, "logits/rejected": -1.272675633430481, "logps/chosen": -88.26526641845703, "logps/rejected": -88.26526641845703, "loss": 0.365, "rewards/accuracies": 0.0, "rewards/chosen": -2.817793369293213, "rewards/margins": 0.0, "rewards/rejected": -2.817793369293213, "step": 6277 }, { "epoch": 1.39, "learning_rate": 9.999988435220688e-06, "logits/chosen": -1.228410005569458, "logits/rejected": -1.228410005569458, "logps/chosen": -191.34397888183594, "logps/rejected": -191.34397888183594, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.541273593902588, "rewards/margins": 0.0, "rewards/rejected": -5.541273593902588, "step": 6278 }, { "epoch": 1.39, "learning_rate": 9.999984259052573e-06, "logits/chosen": -1.2121680974960327, "logits/rejected": -1.2121680974960327, "logps/chosen": -110.2408218383789, "logps/rejected": -110.2408218383789, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -6.664714813232422, "rewards/margins": 0.0, "rewards/rejected": -6.664714813232422, "step": 6279 }, { "epoch": 1.39, "learning_rate": 9.9999794403985e-06, "logits/chosen": -1.6100648641586304, "logits/rejected": -1.6100648641586304, "logps/chosen": -236.04159545898438, "logps/rejected": -236.04159545898438, "loss": 0.4971, "rewards/accuracies": 0.0, "rewards/chosen": -12.329297065734863, "rewards/margins": 0.0, "rewards/rejected": -12.329297065734863, "step": 6280 }, { "epoch": 1.39, "learning_rate": 9.999973979259088e-06, "logits/chosen": -1.1879160404205322, "logits/rejected": -0.8816013336181641, "logps/chosen": -93.64534759521484, "logps/rejected": -1084.322265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0817139148712158, "rewards/margins": 100.27859497070312, "rewards/rejected": -101.36030578613281, "step": 6281 }, { "epoch": 1.39, "learning_rate": 9.99996787563504e-06, "logits/chosen": -1.7438989877700806, "logits/rejected": -1.8481217622756958, "logps/chosen": -77.94073486328125, "logps/rejected": -85.15421295166016, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.23830337822437286, "rewards/margins": 6.5899128913879395, "rewards/rejected": -6.828216075897217, "step": 6282 }, { "epoch": 1.39, "learning_rate": 9.999961129527139e-06, "logits/chosen": -1.6074965000152588, "logits/rejected": -1.5732934474945068, "logps/chosen": -90.84439086914062, "logps/rejected": -111.43209838867188, "loss": 0.2135, "rewards/accuracies": 1.0, "rewards/chosen": -1.1013336181640625, "rewards/margins": 0.6583389043807983, "rewards/rejected": -1.7596725225448608, "step": 6283 }, { "epoch": 1.39, "learning_rate": 9.999953740936252e-06, "logits/chosen": -1.4602293968200684, "logits/rejected": -1.2506040334701538, "logps/chosen": -87.990966796875, "logps/rejected": -298.6470947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.35971909761428833, "rewards/margins": 9.543145179748535, "rewards/rejected": -9.183425903320312, "step": 6284 }, { "epoch": 1.39, "learning_rate": 9.99994570986333e-06, "logits/chosen": -1.211192011833191, "logits/rejected": -1.1193110942840576, "logps/chosen": -173.85079956054688, "logps/rejected": -226.62774658203125, "loss": 1.0596, "rewards/accuracies": 0.0, "rewards/chosen": -2.584460496902466, "rewards/margins": -1.9833221435546875, "rewards/rejected": -0.6011382937431335, "step": 6285 }, { "epoch": 1.39, "learning_rate": 9.999937036309402e-06, "logits/chosen": -1.5778037309646606, "logits/rejected": -1.5778037309646606, "logps/chosen": -95.39576721191406, "logps/rejected": -95.39576721191406, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.8001441955566406, "rewards/margins": 0.0, "rewards/rejected": -2.8001441955566406, "step": 6286 }, { "epoch": 1.39, "learning_rate": 9.999927720275586e-06, "logits/chosen": -1.6265522241592407, "logits/rejected": -1.6265522241592407, "logps/chosen": -129.52606201171875, "logps/rejected": -129.52606201171875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -5.391275882720947, "rewards/margins": 0.0, "rewards/rejected": -5.391275882720947, "step": 6287 }, { "epoch": 1.39, "learning_rate": 9.999917761763076e-06, "logits/chosen": -1.3618353605270386, "logits/rejected": -1.3742122650146484, "logps/chosen": -215.6142578125, "logps/rejected": -202.90155029296875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.7370590567588806, "rewards/margins": 6.593614101409912, "rewards/rejected": -7.3306732177734375, "step": 6288 }, { "epoch": 1.39, "learning_rate": 9.999907160773155e-06, "logits/chosen": -1.143070936203003, "logits/rejected": -1.1433099508285522, "logps/chosen": -134.96713256835938, "logps/rejected": -274.30487060546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.09514313191175461, "rewards/margins": 8.514108657836914, "rewards/rejected": -8.609251976013184, "step": 6289 }, { "epoch": 1.39, "learning_rate": 9.99989591730718e-06, "logits/chosen": -1.7010942697525024, "logits/rejected": -1.7435839176177979, "logps/chosen": -196.71461486816406, "logps/rejected": -158.0003662109375, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -2.674546957015991, "rewards/margins": 2.775244951248169, "rewards/rejected": -5.44979190826416, "step": 6290 }, { "epoch": 1.39, "learning_rate": 9.999884031366603e-06, "logits/chosen": -1.1603301763534546, "logits/rejected": -1.1532224416732788, "logps/chosen": -108.67819213867188, "logps/rejected": -139.71995544433594, "loss": 0.17, "rewards/accuracies": 1.0, "rewards/chosen": -1.5469757318496704, "rewards/margins": 0.9108794927597046, "rewards/rejected": -2.457855224609375, "step": 6291 }, { "epoch": 1.39, "learning_rate": 9.999871502952944e-06, "logits/chosen": -1.2146395444869995, "logits/rejected": -1.2377216815948486, "logps/chosen": -106.56845092773438, "logps/rejected": -131.85443115234375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.5699447393417358, "rewards/margins": 7.158481121063232, "rewards/rejected": -8.728425979614258, "step": 6292 }, { "epoch": 1.39, "learning_rate": 9.99985833206782e-06, "logits/chosen": -1.2431766986846924, "logits/rejected": -1.2350913286209106, "logps/chosen": -103.11448669433594, "logps/rejected": -222.05923461914062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.733544945716858, "rewards/margins": 8.972382545471191, "rewards/rejected": -10.705927848815918, "step": 6293 }, { "epoch": 1.39, "learning_rate": 9.999844518712917e-06, "logits/chosen": -1.802202820777893, "logits/rejected": -2.005054235458374, "logps/chosen": -161.39598083496094, "logps/rejected": -140.83834838867188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.1201248168945312, "rewards/margins": 7.72382926940918, "rewards/rejected": -9.843954086303711, "step": 6294 }, { "epoch": 1.39, "learning_rate": 9.999830062890012e-06, "logits/chosen": -1.6785054206848145, "logits/rejected": -1.548451542854309, "logps/chosen": -129.54202270507812, "logps/rejected": -225.120361328125, "loss": 0.315, "rewards/accuracies": 1.0, "rewards/chosen": -3.3633484840393066, "rewards/margins": 0.13690495491027832, "rewards/rejected": -3.500253438949585, "step": 6295 }, { "epoch": 1.39, "learning_rate": 9.999814964600965e-06, "logits/chosen": -1.2059913873672485, "logits/rejected": -1.1547850370407104, "logps/chosen": -183.99668884277344, "logps/rejected": -260.6838073730469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5876815915107727, "rewards/margins": 13.172393798828125, "rewards/rejected": -13.760075569152832, "step": 6296 }, { "epoch": 1.39, "learning_rate": 9.999799223847714e-06, "logits/chosen": -1.5948981046676636, "logits/rejected": -1.614109992980957, "logps/chosen": -108.1221923828125, "logps/rejected": -134.85113525390625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.8656761050224304, "rewards/margins": 4.799154281616211, "rewards/rejected": -5.664830207824707, "step": 6297 }, { "epoch": 1.39, "learning_rate": 9.999782840632281e-06, "logits/chosen": -1.5574562549591064, "logits/rejected": -1.5381700992584229, "logps/chosen": -147.81004333496094, "logps/rejected": -189.39608764648438, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.8121955394744873, "rewards/margins": 7.602521896362305, "rewards/rejected": -10.414717674255371, "step": 6298 }, { "epoch": 1.39, "learning_rate": 9.999765814956771e-06, "logits/chosen": -1.2080774307250977, "logits/rejected": -1.1700934171676636, "logps/chosen": -103.76404571533203, "logps/rejected": -122.95835876464844, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.5896560549736023, "rewards/margins": 4.715307712554932, "rewards/rejected": -5.3049635887146, "step": 6299 }, { "epoch": 1.39, "learning_rate": 9.999748146823376e-06, "logits/chosen": -1.3603073358535767, "logits/rejected": -1.3049155473709106, "logps/chosen": -164.39736938476562, "logps/rejected": -300.2394104003906, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -3.0308380126953125, "rewards/margins": 8.194811820983887, "rewards/rejected": -11.2256498336792, "step": 6300 }, { "epoch": 1.39, "learning_rate": 9.999729836234363e-06, "logits/chosen": -1.3876287937164307, "logits/rejected": -1.306318998336792, "logps/chosen": -157.96815490722656, "logps/rejected": -290.332763671875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.6930832266807556, "rewards/margins": 5.03012228012085, "rewards/rejected": -5.72320556640625, "step": 6301 }, { "epoch": 1.39, "learning_rate": 9.999710883192082e-06, "logits/chosen": -1.5822480916976929, "logits/rejected": -0.9056723117828369, "logps/chosen": -96.10366821289062, "logps/rejected": -954.8148193359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.8354287147521973, "rewards/margins": 77.02394104003906, "rewards/rejected": -80.85936737060547, "step": 6302 }, { "epoch": 1.4, "learning_rate": 9.999691287698975e-06, "logits/chosen": -1.1177033185958862, "logits/rejected": -1.1208033561706543, "logps/chosen": -185.52862548828125, "logps/rejected": -213.14002990722656, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -5.257989406585693, "rewards/margins": 3.8011908531188965, "rewards/rejected": -9.05918025970459, "step": 6303 }, { "epoch": 1.4, "learning_rate": 9.999671049757554e-06, "logits/chosen": -1.2814241647720337, "logits/rejected": -1.2664929628372192, "logps/chosen": -202.96719360351562, "logps/rejected": -226.37403869628906, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -2.9667999744415283, "rewards/margins": 7.552972793579102, "rewards/rejected": -10.51977252960205, "step": 6304 }, { "epoch": 1.4, "learning_rate": 9.999650169370423e-06, "logits/chosen": -1.5443341732025146, "logits/rejected": -1.4865388870239258, "logps/chosen": -136.07791137695312, "logps/rejected": -145.8479461669922, "loss": 0.7486, "rewards/accuracies": 0.0, "rewards/chosen": -2.1005241870880127, "rewards/margins": -1.2439522743225098, "rewards/rejected": -0.8565719723701477, "step": 6305 }, { "epoch": 1.4, "learning_rate": 9.999628646540262e-06, "logits/chosen": -1.4686167240142822, "logits/rejected": -1.4149836301803589, "logps/chosen": -92.43330383300781, "logps/rejected": -238.0218505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8633743524551392, "rewards/margins": 10.284798622131348, "rewards/rejected": -11.148173332214355, "step": 6306 }, { "epoch": 1.4, "learning_rate": 9.999606481269841e-06, "logits/chosen": -1.504740834236145, "logits/rejected": -1.5419268608093262, "logps/chosen": -125.49888610839844, "logps/rejected": -100.41708374023438, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.616164445877075, "rewards/margins": 4.744394302368164, "rewards/rejected": -7.36055850982666, "step": 6307 }, { "epoch": 1.4, "learning_rate": 9.999583673562006e-06, "logits/chosen": -1.3841661214828491, "logits/rejected": -1.3590409755706787, "logps/chosen": -115.5448989868164, "logps/rejected": -222.42428588867188, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -2.3656060695648193, "rewards/margins": 4.351286888122559, "rewards/rejected": -6.716893196105957, "step": 6308 }, { "epoch": 1.4, "learning_rate": 9.999560223419687e-06, "logits/chosen": -1.501621961593628, "logits/rejected": -0.8732498288154602, "logps/chosen": -151.14114379882812, "logps/rejected": -745.87158203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.987313747406006, "rewards/margins": 60.638755798339844, "rewards/rejected": -65.62606811523438, "step": 6309 }, { "epoch": 1.4, "learning_rate": 9.999536130845897e-06, "logits/chosen": -1.6268342733383179, "logits/rejected": -1.5239343643188477, "logps/chosen": -210.54147338867188, "logps/rejected": -377.18597412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.1056580543518066, "rewards/margins": 14.48371696472168, "rewards/rejected": -11.378058433532715, "step": 6310 }, { "epoch": 1.4, "learning_rate": 9.999511395843734e-06, "logits/chosen": -1.389499306678772, "logits/rejected": -1.4110081195831299, "logps/chosen": -97.11813354492188, "logps/rejected": -109.71842193603516, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.9657676815986633, "rewards/margins": 7.563192367553711, "rewards/rejected": -8.528960227966309, "step": 6311 }, { "epoch": 1.4, "learning_rate": 9.999486018416375e-06, "logits/chosen": -1.5373655557632446, "logits/rejected": -1.6613447666168213, "logps/chosen": -212.87359619140625, "logps/rejected": -136.95606994628906, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -4.016000270843506, "rewards/margins": 4.345080852508545, "rewards/rejected": -8.36108112335205, "step": 6312 }, { "epoch": 1.4, "learning_rate": 9.99945999856708e-06, "logits/chosen": -1.4100884199142456, "logits/rejected": -1.3572123050689697, "logps/chosen": -87.54399108886719, "logps/rejected": -119.89950561523438, "loss": 0.2311, "rewards/accuracies": 1.0, "rewards/chosen": -1.0253006219863892, "rewards/margins": 0.5315574407577515, "rewards/rejected": -1.5568580627441406, "step": 6313 }, { "epoch": 1.4, "learning_rate": 9.999433336299195e-06, "logits/chosen": -1.0974596738815308, "logits/rejected": -1.1453099250793457, "logps/chosen": -161.250732421875, "logps/rejected": -92.53146362304688, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 1.5865296125411987, "rewards/margins": 6.88424015045166, "rewards/rejected": -5.297710418701172, "step": 6314 }, { "epoch": 1.4, "learning_rate": 9.999406031616143e-06, "logits/chosen": -1.1047850847244263, "logits/rejected": -1.1185967922210693, "logps/chosen": -198.47146606445312, "logps/rejected": -409.0460205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.4878250062465668, "rewards/margins": 18.673574447631836, "rewards/rejected": -18.185749053955078, "step": 6315 }, { "epoch": 1.4, "learning_rate": 9.999378084521436e-06, "logits/chosen": -1.4858671426773071, "logits/rejected": -1.5743361711502075, "logps/chosen": -195.40087890625, "logps/rejected": -273.4714050292969, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 2.5324037075042725, "rewards/margins": 17.22249412536621, "rewards/rejected": -14.690091133117676, "step": 6316 }, { "epoch": 1.4, "learning_rate": 9.999349495018662e-06, "logits/chosen": -1.5731855630874634, "logits/rejected": -1.5342625379562378, "logps/chosen": -95.84403991699219, "logps/rejected": -152.16854858398438, "loss": 0.0348, "rewards/accuracies": 1.0, "rewards/chosen": -2.059354543685913, "rewards/margins": 2.630452871322632, "rewards/rejected": -4.689807415008545, "step": 6317 }, { "epoch": 1.4, "learning_rate": 9.999320263111495e-06, "logits/chosen": -0.7414993047714233, "logits/rejected": -0.8396175503730774, "logps/chosen": -221.489501953125, "logps/rejected": -118.28091430664062, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 3.8210999965667725, "rewards/margins": 6.49877405166626, "rewards/rejected": -2.6776740550994873, "step": 6318 }, { "epoch": 1.4, "learning_rate": 9.999290388803695e-06, "logits/chosen": -1.036683201789856, "logits/rejected": -1.0938682556152344, "logps/chosen": -255.17205810546875, "logps/rejected": -178.03860473632812, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.4202209413051605, "rewards/margins": 6.750709533691406, "rewards/rejected": -7.1709303855896, "step": 6319 }, { "epoch": 1.4, "learning_rate": 9.999259872099095e-06, "logits/chosen": -1.3485065698623657, "logits/rejected": -1.368805170059204, "logps/chosen": -93.55296325683594, "logps/rejected": -117.14404296875, "loss": 0.0664, "rewards/accuracies": 1.0, "rewards/chosen": -0.9420081973075867, "rewards/margins": 1.9515266418457031, "rewards/rejected": -2.8935348987579346, "step": 6320 }, { "epoch": 1.4, "learning_rate": 9.999228713001622e-06, "logits/chosen": -1.1898391246795654, "logits/rejected": -1.1918833255767822, "logps/chosen": -204.9499969482422, "logps/rejected": -257.1020202636719, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 3.028468370437622, "rewards/margins": 9.42487621307373, "rewards/rejected": -6.3964080810546875, "step": 6321 }, { "epoch": 1.4, "learning_rate": 9.999196911515277e-06, "logits/chosen": -1.0538195371627808, "logits/rejected": -0.8774319291114807, "logps/chosen": -266.3022155761719, "logps/rejected": -385.08184814453125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.8789001703262329, "rewards/margins": 10.8953218460083, "rewards/rejected": -10.0164213180542, "step": 6322 }, { "epoch": 1.4, "learning_rate": 9.999164467644146e-06, "logits/chosen": -1.4572334289550781, "logits/rejected": -1.5015729665756226, "logps/chosen": -123.13096618652344, "logps/rejected": -255.0242919921875, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 0.2183387726545334, "rewards/margins": 11.003350257873535, "rewards/rejected": -10.785011291503906, "step": 6323 }, { "epoch": 1.4, "learning_rate": 9.999131381392397e-06, "logits/chosen": -1.592339277267456, "logits/rejected": -1.489661693572998, "logps/chosen": -187.09197998046875, "logps/rejected": -219.84823608398438, "loss": 0.1047, "rewards/accuracies": 1.0, "rewards/chosen": -0.7936187982559204, "rewards/margins": 3.1018190383911133, "rewards/rejected": -3.895437717437744, "step": 6324 }, { "epoch": 1.4, "learning_rate": 9.999097652764285e-06, "logits/chosen": -1.6000545024871826, "logits/rejected": -1.585006594657898, "logps/chosen": -88.66058349609375, "logps/rejected": -104.48690795898438, "loss": 0.2471, "rewards/accuracies": 1.0, "rewards/chosen": -6.912274360656738, "rewards/margins": 0.4550590515136719, "rewards/rejected": -7.36733341217041, "step": 6325 }, { "epoch": 1.4, "learning_rate": 9.999063281764142e-06, "logits/chosen": -1.3698216676712036, "logits/rejected": -1.333211064338684, "logps/chosen": -164.75132751464844, "logps/rejected": -387.6606750488281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 2.773097276687622, "rewards/margins": 16.765724182128906, "rewards/rejected": -13.992627143859863, "step": 6326 }, { "epoch": 1.4, "learning_rate": 9.999028268396384e-06, "logits/chosen": -1.2142597436904907, "logits/rejected": -1.3025189638137817, "logps/chosen": -243.93246459960938, "logps/rejected": -128.33299255371094, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.186596632003784, "rewards/margins": 6.196556091308594, "rewards/rejected": -9.383152961730957, "step": 6327 }, { "epoch": 1.4, "learning_rate": 9.99899261266551e-06, "logits/chosen": -0.8801696300506592, "logits/rejected": -0.8680029511451721, "logps/chosen": -88.74402618408203, "logps/rejected": -103.55032348632812, "loss": 0.2067, "rewards/accuracies": 1.0, "rewards/chosen": -2.677570343017578, "rewards/margins": 0.7053565979003906, "rewards/rejected": -3.3829269409179688, "step": 6328 }, { "epoch": 1.4, "learning_rate": 9.998956314576105e-06, "logits/chosen": -1.2179008722305298, "logits/rejected": -1.2014237642288208, "logps/chosen": -242.91506958007812, "logps/rejected": -265.68402099609375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 4.1776885986328125, "rewards/margins": 5.522528171539307, "rewards/rejected": -1.3448394536972046, "step": 6329 }, { "epoch": 1.4, "learning_rate": 9.998919374132829e-06, "logits/chosen": -1.1929473876953125, "logits/rejected": -1.1828960180282593, "logps/chosen": -97.76683807373047, "logps/rejected": -140.82276916503906, "loss": 0.0685, "rewards/accuracies": 1.0, "rewards/chosen": -5.064995288848877, "rewards/margins": 2.01603364944458, "rewards/rejected": -7.081028938293457, "step": 6330 }, { "epoch": 1.4, "learning_rate": 9.99888179134043e-06, "logits/chosen": -1.4998779296875, "logits/rejected": -1.4365447759628296, "logps/chosen": -126.15293884277344, "logps/rejected": -165.1470947265625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.4284820556640625, "rewards/margins": 4.8858642578125, "rewards/rejected": -5.3143463134765625, "step": 6331 }, { "epoch": 1.4, "learning_rate": 9.99884356620374e-06, "logits/chosen": -1.1685184240341187, "logits/rejected": -1.1472800970077515, "logps/chosen": -173.72732543945312, "logps/rejected": -191.84146118164062, "loss": 0.1082, "rewards/accuracies": 1.0, "rewards/chosen": -1.3837356567382812, "rewards/margins": 1.4701554775238037, "rewards/rejected": -2.853891134262085, "step": 6332 }, { "epoch": 1.4, "learning_rate": 9.998804698727667e-06, "logits/chosen": -1.3196613788604736, "logits/rejected": -1.1538864374160767, "logps/chosen": -133.3649444580078, "logps/rejected": -141.62351989746094, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.9284515380859375, "rewards/margins": 5.865732192993164, "rewards/rejected": -8.794183731079102, "step": 6333 }, { "epoch": 1.4, "learning_rate": 9.998765188917206e-06, "logits/chosen": -1.390023112297058, "logits/rejected": -1.390023112297058, "logps/chosen": -198.48460388183594, "logps/rejected": -198.48460388183594, "loss": 0.3498, "rewards/accuracies": 0.0, "rewards/chosen": -7.867079257965088, "rewards/margins": 0.0, "rewards/rejected": -7.867079257965088, "step": 6334 }, { "epoch": 1.4, "learning_rate": 9.998725036777437e-06, "logits/chosen": -1.346912145614624, "logits/rejected": -1.3244798183441162, "logps/chosen": -120.53810119628906, "logps/rejected": -129.68679809570312, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": 0.11130142211914062, "rewards/margins": 3.810405731201172, "rewards/rejected": -3.6991043090820312, "step": 6335 }, { "epoch": 1.4, "learning_rate": 9.998684242313516e-06, "logits/chosen": -1.2632564306259155, "logits/rejected": -1.2428666353225708, "logps/chosen": -55.68057632446289, "logps/rejected": -77.1063232421875, "loss": 0.0732, "rewards/accuracies": 1.0, "rewards/chosen": -0.7339527010917664, "rewards/margins": 1.850978136062622, "rewards/rejected": -2.584930896759033, "step": 6336 }, { "epoch": 1.4, "learning_rate": 9.998642805530687e-06, "logits/chosen": -1.1619679927825928, "logits/rejected": -1.1619679927825928, "logps/chosen": -193.13023376464844, "logps/rejected": -193.13023376464844, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": 1.6492661237716675, "rewards/margins": 0.0, "rewards/rejected": 1.6492661237716675, "step": 6337 }, { "epoch": 1.4, "learning_rate": 9.998600726434274e-06, "logits/chosen": -1.435468316078186, "logits/rejected": -0.7132580280303955, "logps/chosen": -99.11665344238281, "logps/rejected": -797.5812377929688, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -1.4798249006271362, "rewards/margins": 49.778587341308594, "rewards/rejected": -51.2584114074707, "step": 6338 }, { "epoch": 1.4, "learning_rate": 9.998558005029685e-06, "logits/chosen": -1.2350618839263916, "logits/rejected": -1.1176551580429077, "logps/chosen": -135.51612854003906, "logps/rejected": -311.3927001953125, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -2.92974853515625, "rewards/margins": 3.6844820976257324, "rewards/rejected": -6.614230632781982, "step": 6339 }, { "epoch": 1.4, "learning_rate": 9.998514641322406e-06, "logits/chosen": -1.3542494773864746, "logits/rejected": -1.251247525215149, "logps/chosen": -240.17254638671875, "logps/rejected": -532.1084594726562, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -4.243438720703125, "rewards/margins": 13.089071273803711, "rewards/rejected": -17.332509994506836, "step": 6340 }, { "epoch": 1.4, "learning_rate": 9.998470635318015e-06, "logits/chosen": -1.2506403923034668, "logits/rejected": -1.283625841140747, "logps/chosen": -76.56788635253906, "logps/rejected": -95.4680404663086, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": -1.562947154045105, "rewards/margins": 2.1609878540039062, "rewards/rejected": -3.7239348888397217, "step": 6341 }, { "epoch": 1.4, "learning_rate": 9.99842598702216e-06, "logits/chosen": -1.3467603921890259, "logits/rejected": -1.3261535167694092, "logps/chosen": -98.09784698486328, "logps/rejected": -230.67852783203125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 1.253156304359436, "rewards/margins": 9.707356452941895, "rewards/rejected": -8.45419979095459, "step": 6342 }, { "epoch": 1.4, "learning_rate": 9.998380696440582e-06, "logits/chosen": -1.0702356100082397, "logits/rejected": -1.0768179893493652, "logps/chosen": -179.890869140625, "logps/rejected": -243.149169921875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 4.119311809539795, "rewards/margins": 12.711002349853516, "rewards/rejected": -8.591690063476562, "step": 6343 }, { "epoch": 1.4, "learning_rate": 9.998334763579103e-06, "logits/chosen": -1.2131701707839966, "logits/rejected": -1.4538562297821045, "logps/chosen": -223.18106079101562, "logps/rejected": -99.23030853271484, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.7595489025115967, "rewards/margins": 11.093852043151855, "rewards/rejected": -7.334303379058838, "step": 6344 }, { "epoch": 1.4, "learning_rate": 9.998288188443619e-06, "logits/chosen": -1.2838420867919922, "logits/rejected": -1.1177314519882202, "logps/chosen": -233.15576171875, "logps/rejected": -404.9422607421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.0124969482421875, "rewards/margins": 11.795724868774414, "rewards/rejected": -11.808221817016602, "step": 6345 }, { "epoch": 1.4, "learning_rate": 9.99824097104012e-06, "logits/chosen": -1.264148235321045, "logits/rejected": -1.340847373008728, "logps/chosen": -313.59613037109375, "logps/rejected": -156.34869384765625, "loss": 0.1047, "rewards/accuracies": 1.0, "rewards/chosen": -2.8518218994140625, "rewards/margins": 1.4773879051208496, "rewards/rejected": -4.329209804534912, "step": 6346 }, { "epoch": 1.4, "learning_rate": 9.998193111374673e-06, "logits/chosen": -1.2720115184783936, "logits/rejected": -1.3305835723876953, "logps/chosen": -162.6137237548828, "logps/rejected": -155.62437438964844, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -2.186270236968994, "rewards/margins": 4.009091377258301, "rewards/rejected": -6.195361614227295, "step": 6347 }, { "epoch": 1.41, "learning_rate": 9.998144609453425e-06, "logits/chosen": -1.1585525274276733, "logits/rejected": -1.1609864234924316, "logps/chosen": -123.51773834228516, "logps/rejected": -80.86539459228516, "loss": 1.533, "rewards/accuracies": 0.0, "rewards/chosen": -2.9553229808807373, "rewards/margins": -3.0143821239471436, "rewards/rejected": 0.05905914306640625, "step": 6348 }, { "epoch": 1.41, "learning_rate": 9.99809546528261e-06, "logits/chosen": -1.2284327745437622, "logits/rejected": -1.2541415691375732, "logps/chosen": -104.73120880126953, "logps/rejected": -151.5367431640625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 0.6368667483329773, "rewards/margins": 6.4513092041015625, "rewards/rejected": -5.8144426345825195, "step": 6349 }, { "epoch": 1.41, "learning_rate": 9.998045678868541e-06, "logits/chosen": -1.237985610961914, "logits/rejected": -1.2947945594787598, "logps/chosen": -125.64220428466797, "logps/rejected": -194.73388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1596016883850098, "rewards/margins": 13.537734985351562, "rewards/rejected": -15.697336196899414, "step": 6350 }, { "epoch": 1.41, "learning_rate": 9.99799525021762e-06, "logits/chosen": -1.501452088356018, "logits/rejected": -1.454426884651184, "logps/chosen": -166.28634643554688, "logps/rejected": -211.523681640625, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": -0.7070556879043579, "rewards/margins": 2.6462159156799316, "rewards/rejected": -3.353271484375, "step": 6351 }, { "epoch": 1.41, "learning_rate": 9.997944179336323e-06, "logits/chosen": -1.1414605379104614, "logits/rejected": -1.1167535781860352, "logps/chosen": -195.65762329101562, "logps/rejected": -179.16439819335938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2467803955078125, "rewards/margins": 8.868385314941406, "rewards/rejected": -8.621604919433594, "step": 6352 }, { "epoch": 1.41, "learning_rate": 9.997892466231215e-06, "logits/chosen": -1.0981305837631226, "logits/rejected": -1.175951600074768, "logps/chosen": -302.716064453125, "logps/rejected": -223.47213745117188, "loss": 0.0993, "rewards/accuracies": 1.0, "rewards/chosen": -5.921731472015381, "rewards/margins": 1.630228042602539, "rewards/rejected": -7.55195951461792, "step": 6353 }, { "epoch": 1.41, "learning_rate": 9.997840110908938e-06, "logits/chosen": -1.2400263547897339, "logits/rejected": -1.1683745384216309, "logps/chosen": -77.89917755126953, "logps/rejected": -192.62326049804688, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.2915581464767456, "rewards/margins": 4.877309322357178, "rewards/rejected": -6.168867588043213, "step": 6354 }, { "epoch": 1.41, "learning_rate": 9.997787113376223e-06, "logits/chosen": -1.3808555603027344, "logits/rejected": -1.4106279611587524, "logps/chosen": -117.8514404296875, "logps/rejected": -118.9093017578125, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.215186357498169, "rewards/margins": 4.450656890869141, "rewards/rejected": -5.665843486785889, "step": 6355 }, { "epoch": 1.41, "learning_rate": 9.997733473639876e-06, "logits/chosen": -1.1925145387649536, "logits/rejected": -1.1615489721298218, "logps/chosen": -99.72083282470703, "logps/rejected": -171.85723876953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.0595115423202515, "rewards/margins": 7.7490410804748535, "rewards/rejected": -8.808552742004395, "step": 6356 }, { "epoch": 1.41, "learning_rate": 9.997679191706794e-06, "logits/chosen": -1.2744619846343994, "logits/rejected": -1.3268896341323853, "logps/chosen": -165.5867919921875, "logps/rejected": -190.87631225585938, "loss": 0.1829, "rewards/accuracies": 1.0, "rewards/chosen": -5.09466552734375, "rewards/margins": 3.5547399520874023, "rewards/rejected": -8.649405479431152, "step": 6357 }, { "epoch": 1.41, "learning_rate": 9.99762426758395e-06, "logits/chosen": -1.2034565210342407, "logits/rejected": -0.6639071106910706, "logps/chosen": -101.45882415771484, "logps/rejected": -487.7181396484375, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -7.306268215179443, "rewards/margins": 30.86619758605957, "rewards/rejected": -38.17246627807617, "step": 6358 }, { "epoch": 1.41, "learning_rate": 9.997568701278399e-06, "logits/chosen": -1.2851884365081787, "logits/rejected": -1.319311261177063, "logps/chosen": -98.47189331054688, "logps/rejected": -162.67677307128906, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.9039856195449829, "rewards/margins": 8.775123596191406, "rewards/rejected": -7.871138095855713, "step": 6359 }, { "epoch": 1.41, "learning_rate": 9.997512492797285e-06, "logits/chosen": -0.9114115238189697, "logits/rejected": -0.8741133809089661, "logps/chosen": -135.4292755126953, "logps/rejected": -249.28915405273438, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.3126907348632812, "rewards/margins": 11.192238807678223, "rewards/rejected": -12.504929542541504, "step": 6360 }, { "epoch": 1.41, "learning_rate": 9.997455642147831e-06, "logits/chosen": -1.2260737419128418, "logits/rejected": -1.2169297933578491, "logps/chosen": -144.14596557617188, "logps/rejected": -194.54998779296875, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -2.6017587184906006, "rewards/margins": 3.7973806858062744, "rewards/rejected": -6.399139404296875, "step": 6361 }, { "epoch": 1.41, "learning_rate": 9.997398149337338e-06, "logits/chosen": -1.5357364416122437, "logits/rejected": -1.5123182535171509, "logps/chosen": -211.7695770263672, "logps/rejected": -313.3870544433594, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -1.7898972034454346, "rewards/margins": 3.843126058578491, "rewards/rejected": -5.633023262023926, "step": 6362 }, { "epoch": 1.41, "learning_rate": 9.997340014373198e-06, "logits/chosen": -1.0362602472305298, "logits/rejected": -1.0879950523376465, "logps/chosen": -187.45033264160156, "logps/rejected": -242.3359832763672, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": 1.8060592412948608, "rewards/margins": 16.610572814941406, "rewards/rejected": -14.804513931274414, "step": 6363 }, { "epoch": 1.41, "learning_rate": 9.99728123726288e-06, "logits/chosen": -1.3040285110473633, "logits/rejected": -1.2998133897781372, "logps/chosen": -148.94691467285156, "logps/rejected": -162.4339141845703, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -2.341780185699463, "rewards/margins": 3.3741774559020996, "rewards/rejected": -5.7159576416015625, "step": 6364 }, { "epoch": 1.41, "learning_rate": 9.997221818013933e-06, "logits/chosen": -1.0729855298995972, "logits/rejected": -1.0809992551803589, "logps/chosen": -245.14552307128906, "logps/rejected": -402.06463623046875, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 8.36451244354248, "rewards/margins": 24.84579086303711, "rewards/rejected": -16.481277465820312, "step": 6365 }, { "epoch": 1.41, "learning_rate": 9.997161756633998e-06, "logits/chosen": -1.3372505903244019, "logits/rejected": -1.305284857749939, "logps/chosen": -164.95648193359375, "logps/rejected": -201.70480346679688, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": 0.2177734375, "rewards/margins": 5.358590602874756, "rewards/rejected": -5.140817165374756, "step": 6366 }, { "epoch": 1.41, "learning_rate": 9.99710105313079e-06, "logits/chosen": -1.2849807739257812, "logits/rejected": -1.2928293943405151, "logps/chosen": -143.42027282714844, "logps/rejected": -162.3649139404297, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -4.373091220855713, "rewards/margins": 3.6904797554016113, "rewards/rejected": -8.063570976257324, "step": 6367 }, { "epoch": 1.41, "learning_rate": 9.997039707512109e-06, "logits/chosen": -1.3279812335968018, "logits/rejected": -1.2557950019836426, "logps/chosen": -98.67823791503906, "logps/rejected": -231.39129638671875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -2.784809112548828, "rewards/margins": 4.925243377685547, "rewards/rejected": -7.710052490234375, "step": 6368 }, { "epoch": 1.41, "learning_rate": 9.996977719785837e-06, "logits/chosen": -0.9877650141716003, "logits/rejected": -1.0824209451675415, "logps/chosen": -237.99427795410156, "logps/rejected": -156.7866668701172, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.12134247273206711, "rewards/margins": 7.5354766845703125, "rewards/rejected": -7.6568193435668945, "step": 6369 }, { "epoch": 1.41, "learning_rate": 9.996915089959942e-06, "logits/chosen": -1.3007001876831055, "logits/rejected": -1.3001704216003418, "logps/chosen": -87.66387939453125, "logps/rejected": -165.15267944335938, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": -1.1356308460235596, "rewards/margins": 4.586268424987793, "rewards/rejected": -5.721899509429932, "step": 6370 }, { "epoch": 1.41, "learning_rate": 9.99685181804247e-06, "logits/chosen": -1.2792248725891113, "logits/rejected": -0.8163487315177917, "logps/chosen": -158.7787628173828, "logps/rejected": -370.8885498046875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.428760528564453, "rewards/margins": 19.515077590942383, "rewards/rejected": -22.943838119506836, "step": 6371 }, { "epoch": 1.41, "learning_rate": 9.996787904041551e-06, "logits/chosen": -1.268842339515686, "logits/rejected": -1.3565630912780762, "logps/chosen": -126.71134185791016, "logps/rejected": -110.3249282836914, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -2.4830994606018066, "rewards/margins": 6.4564595222473145, "rewards/rejected": -8.939558982849121, "step": 6372 }, { "epoch": 1.41, "learning_rate": 9.996723347965399e-06, "logits/chosen": -1.171863317489624, "logits/rejected": -1.1479787826538086, "logps/chosen": -124.99669647216797, "logps/rejected": -181.53033447265625, "loss": 0.6189, "rewards/accuracies": 1.0, "rewards/chosen": -0.052999116480350494, "rewards/margins": 9.780977249145508, "rewards/rejected": -9.833976745605469, "step": 6373 }, { "epoch": 1.41, "learning_rate": 9.996658149822307e-06, "logits/chosen": -1.3382833003997803, "logits/rejected": -1.346463680267334, "logps/chosen": -132.72698974609375, "logps/rejected": -181.490966796875, "loss": 0.3481, "rewards/accuracies": 1.0, "rewards/chosen": -1.845190405845642, "rewards/margins": 5.7776594161987305, "rewards/rejected": -7.622849941253662, "step": 6374 }, { "epoch": 1.41, "learning_rate": 9.996592309620656e-06, "logits/chosen": -1.313152551651001, "logits/rejected": -1.4238163232803345, "logps/chosen": -212.86428833007812, "logps/rejected": -119.11454010009766, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -0.339987188577652, "rewards/margins": 3.452786922454834, "rewards/rejected": -3.792774200439453, "step": 6375 }, { "epoch": 1.41, "learning_rate": 9.996525827368903e-06, "logits/chosen": -1.1601250171661377, "logits/rejected": -1.054465413093567, "logps/chosen": -159.3861083984375, "logps/rejected": -274.7820739746094, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 1.8983337879180908, "rewards/margins": 5.1854753494262695, "rewards/rejected": -3.2871415615081787, "step": 6376 }, { "epoch": 1.41, "learning_rate": 9.996458703075593e-06, "logits/chosen": -1.4722355604171753, "logits/rejected": -1.4379974603652954, "logps/chosen": -89.84977722167969, "logps/rejected": -158.69444274902344, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": -4.052190780639648, "rewards/margins": 2.93424654006958, "rewards/rejected": -6.9864373207092285, "step": 6377 }, { "epoch": 1.41, "learning_rate": 9.996390936749351e-06, "logits/chosen": -1.3777951002120972, "logits/rejected": -1.5164839029312134, "logps/chosen": -160.607177734375, "logps/rejected": -200.56979370117188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.578259289264679, "rewards/margins": 10.790599822998047, "rewards/rejected": -10.212340354919434, "step": 6378 }, { "epoch": 1.41, "learning_rate": 9.996322528398886e-06, "logits/chosen": -1.1543383598327637, "logits/rejected": -1.1169581413269043, "logps/chosen": -117.2876968383789, "logps/rejected": -175.19158935546875, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -0.5621040463447571, "rewards/margins": 6.776975154876709, "rewards/rejected": -7.3390793800354, "step": 6379 }, { "epoch": 1.41, "learning_rate": 9.996253478032987e-06, "logits/chosen": -1.2065271139144897, "logits/rejected": -1.2268955707550049, "logps/chosen": -162.80078125, "logps/rejected": -170.77505493164062, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -2.2558090686798096, "rewards/margins": 4.375885963439941, "rewards/rejected": -6.631694793701172, "step": 6380 }, { "epoch": 1.41, "learning_rate": 9.996183785660526e-06, "logits/chosen": -0.9920913577079773, "logits/rejected": -1.021671175956726, "logps/chosen": -126.62539672851562, "logps/rejected": -112.432861328125, "loss": 0.2304, "rewards/accuracies": 1.0, "rewards/chosen": -4.911505222320557, "rewards/margins": 0.5929198265075684, "rewards/rejected": -5.504425048828125, "step": 6381 }, { "epoch": 1.41, "learning_rate": 9.996113451290457e-06, "logits/chosen": -0.8437130451202393, "logits/rejected": -0.8950300812721252, "logps/chosen": -252.166015625, "logps/rejected": -174.13430786132812, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": 0.46865540742874146, "rewards/margins": 3.713055372238159, "rewards/rejected": -3.2444000244140625, "step": 6382 }, { "epoch": 1.41, "learning_rate": 9.996042474931821e-06, "logits/chosen": -1.1310231685638428, "logits/rejected": -1.206744909286499, "logps/chosen": -165.53634643554688, "logps/rejected": -291.8576965332031, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": 1.7521682977676392, "rewards/margins": 12.70995807647705, "rewards/rejected": -10.957789421081543, "step": 6383 }, { "epoch": 1.41, "learning_rate": 9.995970856593739e-06, "logits/chosen": -1.5736384391784668, "logits/rejected": -1.6134337186813354, "logps/chosen": -208.29837036132812, "logps/rejected": -129.40953063964844, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -4.690985202789307, "rewards/margins": 3.2152442932128906, "rewards/rejected": -7.906229496002197, "step": 6384 }, { "epoch": 1.41, "learning_rate": 9.99589859628541e-06, "logits/chosen": -1.3951241970062256, "logits/rejected": -1.3584553003311157, "logps/chosen": -115.36241149902344, "logps/rejected": -137.79751586914062, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.8866539001464844, "rewards/margins": 4.826702117919922, "rewards/rejected": -6.713356018066406, "step": 6385 }, { "epoch": 1.41, "learning_rate": 9.995825694016122e-06, "logits/chosen": -1.290074110031128, "logits/rejected": -1.39291250705719, "logps/chosen": -87.95988464355469, "logps/rejected": -99.64844512939453, "loss": 0.0584, "rewards/accuracies": 1.0, "rewards/chosen": 0.6570579409599304, "rewards/margins": 6.802695274353027, "rewards/rejected": -6.145637512207031, "step": 6386 }, { "epoch": 1.41, "learning_rate": 9.995752149795241e-06, "logits/chosen": -1.106693148612976, "logits/rejected": -1.1810632944107056, "logps/chosen": -144.9019317626953, "logps/rejected": -154.1199188232422, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.299066185951233, "rewards/margins": 4.343616008758545, "rewards/rejected": -5.642682075500488, "step": 6387 }, { "epoch": 1.41, "learning_rate": 9.99567796363222e-06, "logits/chosen": -1.2152613401412964, "logits/rejected": -1.2152613401412964, "logps/chosen": -146.6533203125, "logps/rejected": -146.6533203125, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -3.173271894454956, "rewards/margins": 0.0, "rewards/rejected": -3.173271894454956, "step": 6388 }, { "epoch": 1.41, "learning_rate": 9.995603135536587e-06, "logits/chosen": -1.687731385231018, "logits/rejected": -1.6332179307937622, "logps/chosen": -87.8201675415039, "logps/rejected": -155.29690551757812, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.9860870242118835, "rewards/margins": 3.4113733768463135, "rewards/rejected": -4.397460460662842, "step": 6389 }, { "epoch": 1.41, "learning_rate": 9.995527665517964e-06, "logits/chosen": -1.1791757345199585, "logits/rejected": -1.158552885055542, "logps/chosen": -114.50692749023438, "logps/rejected": -152.378662109375, "loss": 0.2594, "rewards/accuracies": 1.0, "rewards/chosen": 0.12817764282226562, "rewards/margins": 0.3857612609863281, "rewards/rejected": -0.2575836181640625, "step": 6390 }, { "epoch": 1.41, "learning_rate": 9.995451553586042e-06, "logits/chosen": -1.3404752016067505, "logits/rejected": -1.4073113203048706, "logps/chosen": -203.70361328125, "logps/rejected": -176.8092498779297, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.15557555854320526, "rewards/margins": 6.84191370010376, "rewards/rejected": -6.9974894523620605, "step": 6391 }, { "epoch": 1.41, "learning_rate": 9.995374799750606e-06, "logits/chosen": -1.038564920425415, "logits/rejected": -1.0073716640472412, "logps/chosen": -80.23710632324219, "logps/rejected": -186.4803924560547, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.031816840171814, "rewards/margins": 4.194939613342285, "rewards/rejected": -5.226756572723389, "step": 6392 }, { "epoch": 1.42, "learning_rate": 9.995297404021515e-06, "logits/chosen": -1.6139556169509888, "logits/rejected": -1.3329511880874634, "logps/chosen": -131.32061767578125, "logps/rejected": -849.4891357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5353118777275085, "rewards/margins": 74.0564193725586, "rewards/rejected": -74.59172821044922, "step": 6393 }, { "epoch": 1.42, "learning_rate": 9.995219366408717e-06, "logits/chosen": -1.3196218013763428, "logits/rejected": -1.0494818687438965, "logps/chosen": -145.6465301513672, "logps/rejected": -366.17034912109375, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.5122482180595398, "rewards/margins": 9.461455345153809, "rewards/rejected": -9.973703384399414, "step": 6394 }, { "epoch": 1.42, "learning_rate": 9.995140686922237e-06, "logits/chosen": -1.270903468132019, "logits/rejected": -1.243642807006836, "logps/chosen": -89.1497802734375, "logps/rejected": -171.43533325195312, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5766868591308594, "rewards/margins": 6.919003486633301, "rewards/rejected": -7.49569034576416, "step": 6395 }, { "epoch": 1.42, "learning_rate": 9.995061365572188e-06, "logits/chosen": -1.194105863571167, "logits/rejected": -1.191207766532898, "logps/chosen": -125.69129943847656, "logps/rejected": -193.7418212890625, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -0.5537247061729431, "rewards/margins": 11.139472007751465, "rewards/rejected": -11.693196296691895, "step": 6396 }, { "epoch": 1.42, "learning_rate": 9.994981402368763e-06, "logits/chosen": -1.0374250411987305, "logits/rejected": -0.926291286945343, "logps/chosen": -157.09710693359375, "logps/rejected": -325.37548828125, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/chosen": -0.591265857219696, "rewards/margins": 7.50836181640625, "rewards/rejected": -8.099627494812012, "step": 6397 }, { "epoch": 1.42, "learning_rate": 9.994900797322233e-06, "logits/chosen": -0.8821818828582764, "logits/rejected": -0.8768059611320496, "logps/chosen": -310.3316345214844, "logps/rejected": -314.93609619140625, "loss": 0.369, "rewards/accuracies": 0.0, "rewards/chosen": -6.225775241851807, "rewards/margins": -0.08795166015625, "rewards/rejected": -6.137823581695557, "step": 6398 }, { "epoch": 1.42, "learning_rate": 9.994819550442958e-06, "logits/chosen": -1.449445128440857, "logits/rejected": -1.432729959487915, "logps/chosen": -161.643798828125, "logps/rejected": -176.411376953125, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -3.1616897583007812, "rewards/margins": 3.2560153007507324, "rewards/rejected": -6.417705059051514, "step": 6399 }, { "epoch": 1.42, "learning_rate": 9.994737661741379e-06, "logits/chosen": -1.049689769744873, "logits/rejected": -1.0493886470794678, "logps/chosen": -166.17181396484375, "logps/rejected": -192.88693237304688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.4676452577114105, "rewards/margins": 9.664615631103516, "rewards/rejected": -9.196969985961914, "step": 6400 }, { "epoch": 1.42, "learning_rate": 9.994655131228017e-06, "logits/chosen": -1.3535842895507812, "logits/rejected": -1.384804368019104, "logps/chosen": -235.54653930664062, "logps/rejected": -163.97064208984375, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.6754593253135681, "rewards/margins": 4.491140842437744, "rewards/rejected": -5.166600227355957, "step": 6401 }, { "epoch": 1.42, "learning_rate": 9.994571958913477e-06, "logits/chosen": -1.3969837427139282, "logits/rejected": -1.4752907752990723, "logps/chosen": -225.61868286132812, "logps/rejected": -185.26731872558594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.9206207990646362, "rewards/margins": 8.038095474243164, "rewards/rejected": -6.1174750328063965, "step": 6402 }, { "epoch": 1.42, "learning_rate": 9.994488144808449e-06, "logits/chosen": -1.1241064071655273, "logits/rejected": -1.2109018564224243, "logps/chosen": -232.4925537109375, "logps/rejected": -174.89938354492188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5318328738212585, "rewards/margins": 7.951733589172363, "rewards/rejected": -8.483566284179688, "step": 6403 }, { "epoch": 1.42, "learning_rate": 9.994403688923699e-06, "logits/chosen": -1.4580638408660889, "logits/rejected": -1.4432127475738525, "logps/chosen": -93.12899017333984, "logps/rejected": -131.98291015625, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": -1.7900856733322144, "rewards/margins": 4.471782684326172, "rewards/rejected": -6.261868476867676, "step": 6404 }, { "epoch": 1.42, "learning_rate": 9.994318591270081e-06, "logits/chosen": -1.2766495943069458, "logits/rejected": -1.1410999298095703, "logps/chosen": -87.12890625, "logps/rejected": -284.7549133300781, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.7220298647880554, "rewards/margins": 6.6653618812561035, "rewards/rejected": -5.943332195281982, "step": 6405 }, { "epoch": 1.42, "learning_rate": 9.99423285185853e-06, "logits/chosen": -1.3111133575439453, "logits/rejected": -1.32627272605896, "logps/chosen": -187.11569213867188, "logps/rejected": -208.01556396484375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": 1.049407958984375, "rewards/margins": 11.31639575958252, "rewards/rejected": -10.266987800598145, "step": 6406 }, { "epoch": 1.42, "learning_rate": 9.994146470700065e-06, "logits/chosen": -1.3488843441009521, "logits/rejected": -1.333357810974121, "logps/chosen": -90.8250732421875, "logps/rejected": -171.7982635498047, "loss": 0.1551, "rewards/accuracies": 1.0, "rewards/chosen": -2.500811815261841, "rewards/margins": 4.765499114990234, "rewards/rejected": -7.266311168670654, "step": 6407 }, { "epoch": 1.42, "learning_rate": 9.994059447805781e-06, "logits/chosen": -1.0020109415054321, "logits/rejected": -0.9801586270332336, "logps/chosen": -86.955810546875, "logps/rejected": -97.38775634765625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -2.1024835109710693, "rewards/margins": 4.717023849487305, "rewards/rejected": -6.819507122039795, "step": 6408 }, { "epoch": 1.42, "learning_rate": 9.993971783186867e-06, "logits/chosen": -1.1669799089431763, "logits/rejected": -1.1362817287445068, "logps/chosen": -186.31021118164062, "logps/rejected": -283.1173095703125, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -0.27504274249076843, "rewards/margins": 5.286825656890869, "rewards/rejected": -5.561868190765381, "step": 6409 }, { "epoch": 1.42, "learning_rate": 9.993883476854582e-06, "logits/chosen": -1.4438146352767944, "logits/rejected": -1.419291615486145, "logps/chosen": -127.41749572753906, "logps/rejected": -148.39297485351562, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -3.227553606033325, "rewards/margins": 3.6475822925567627, "rewards/rejected": -6.875135898590088, "step": 6410 }, { "epoch": 1.42, "learning_rate": 9.993794528820275e-06, "logits/chosen": -1.2050822973251343, "logits/rejected": -1.1124646663665771, "logps/chosen": -124.97700500488281, "logps/rejected": -167.2924346923828, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 0.29340362548828125, "rewards/margins": 4.888366222381592, "rewards/rejected": -4.5949625968933105, "step": 6411 }, { "epoch": 1.42, "learning_rate": 9.993704939095376e-06, "logits/chosen": -1.2055513858795166, "logits/rejected": -1.17661714553833, "logps/chosen": -213.41494750976562, "logps/rejected": -122.27322387695312, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -0.39599609375, "rewards/margins": 5.533195495605469, "rewards/rejected": -5.929191589355469, "step": 6412 }, { "epoch": 1.42, "learning_rate": 9.9936147076914e-06, "logits/chosen": -1.2064142227172852, "logits/rejected": -1.0119529962539673, "logps/chosen": -228.80343627929688, "logps/rejected": -336.7180480957031, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 2.334301710128784, "rewards/margins": 5.5108795166015625, "rewards/rejected": -3.1765778064727783, "step": 6413 }, { "epoch": 1.42, "learning_rate": 9.993523834619933e-06, "logits/chosen": -1.0627143383026123, "logits/rejected": -1.0627143383026123, "logps/chosen": -185.08795166015625, "logps/rejected": -185.08795166015625, "loss": 0.3756, "rewards/accuracies": 0.0, "rewards/chosen": -5.813251495361328, "rewards/margins": 0.0, "rewards/rejected": -5.813251495361328, "step": 6414 }, { "epoch": 1.42, "learning_rate": 9.99343231989266e-06, "logits/chosen": -1.541618824005127, "logits/rejected": -1.5875577926635742, "logps/chosen": -127.67832946777344, "logps/rejected": -156.40283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7296539545059204, "rewards/margins": 9.494221687316895, "rewards/rejected": -11.223875999450684, "step": 6415 }, { "epoch": 1.42, "learning_rate": 9.99334016352134e-06, "logits/chosen": -1.2189794778823853, "logits/rejected": -1.1635292768478394, "logps/chosen": -78.1213607788086, "logps/rejected": -146.61912536621094, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": -1.2595055103302002, "rewards/margins": 5.499907493591309, "rewards/rejected": -6.759413242340088, "step": 6416 }, { "epoch": 1.42, "learning_rate": 9.993247365517808e-06, "logits/chosen": -1.406924843788147, "logits/rejected": -1.3915132284164429, "logps/chosen": -212.39590454101562, "logps/rejected": -208.950439453125, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -0.8358474969863892, "rewards/margins": 3.369171142578125, "rewards/rejected": -4.205018520355225, "step": 6417 }, { "epoch": 1.42, "learning_rate": 9.993153925893997e-06, "logits/chosen": -1.541160225868225, "logits/rejected": -1.5516893863677979, "logps/chosen": -94.669189453125, "logps/rejected": -190.28518676757812, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": 0.5818344354629517, "rewards/margins": 6.247593879699707, "rewards/rejected": -5.665759563446045, "step": 6418 }, { "epoch": 1.42, "learning_rate": 9.993059844661908e-06, "logits/chosen": -1.2952991724014282, "logits/rejected": -0.8101401925086975, "logps/chosen": -182.45733642578125, "logps/rejected": -920.4156494140625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -2.211068868637085, "rewards/margins": 66.73442840576172, "rewards/rejected": -68.94549560546875, "step": 6419 }, { "epoch": 1.42, "learning_rate": 9.992965121833631e-06, "logits/chosen": -1.4465476274490356, "logits/rejected": -1.4282058477401733, "logps/chosen": -124.71778869628906, "logps/rejected": -162.27755737304688, "loss": 0.0796, "rewards/accuracies": 1.0, "rewards/chosen": -0.7955352663993835, "rewards/margins": 2.0861709117889404, "rewards/rejected": -2.8817062377929688, "step": 6420 }, { "epoch": 1.42, "learning_rate": 9.99286975742134e-06, "logits/chosen": -1.5480878353118896, "logits/rejected": -2.2660231590270996, "logps/chosen": -90.01156616210938, "logps/rejected": -214.42318725585938, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -6.6772685050964355, "rewards/margins": 6.464940547943115, "rewards/rejected": -13.14220905303955, "step": 6421 }, { "epoch": 1.42, "learning_rate": 9.992773751437288e-06, "logits/chosen": -1.1969003677368164, "logits/rejected": -1.181251883506775, "logps/chosen": -153.29734802246094, "logps/rejected": -125.391845703125, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 0.5038223266601562, "rewards/margins": 3.930098056793213, "rewards/rejected": -3.4262757301330566, "step": 6422 }, { "epoch": 1.42, "learning_rate": 9.99267710389381e-06, "logits/chosen": -1.3955590724945068, "logits/rejected": -1.3768818378448486, "logps/chosen": -108.82020568847656, "logps/rejected": -82.00511932373047, "loss": 0.0342, "rewards/accuracies": 1.0, "rewards/chosen": -2.989187002182007, "rewards/margins": 2.763129949569702, "rewards/rejected": -5.752316951751709, "step": 6423 }, { "epoch": 1.42, "learning_rate": 9.992579814803327e-06, "logits/chosen": -1.1333112716674805, "logits/rejected": -1.1667128801345825, "logps/chosen": -220.61244201660156, "logps/rejected": -223.95590209960938, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.16878509521484375, "rewards/margins": 6.428248882293701, "rewards/rejected": -6.259463787078857, "step": 6424 }, { "epoch": 1.42, "learning_rate": 9.992481884178338e-06, "logits/chosen": -1.1306369304656982, "logits/rejected": -1.0677282810211182, "logps/chosen": -125.25929260253906, "logps/rejected": -167.15931701660156, "loss": 0.0588, "rewards/accuracies": 1.0, "rewards/chosen": -1.1038169860839844, "rewards/margins": 3.4230637550354004, "rewards/rejected": -4.526880741119385, "step": 6425 }, { "epoch": 1.42, "learning_rate": 9.99238331203143e-06, "logits/chosen": -1.1011518239974976, "logits/rejected": -1.1011518239974976, "logps/chosen": -47.83167266845703, "logps/rejected": -47.83167266845703, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -0.6778320670127869, "rewards/margins": 0.0, "rewards/rejected": -0.6778320670127869, "step": 6426 }, { "epoch": 1.42, "learning_rate": 9.99228409837527e-06, "logits/chosen": -1.3675116300582886, "logits/rejected": -1.3345905542373657, "logps/chosen": -74.28488159179688, "logps/rejected": -110.31486511230469, "loss": 0.0654, "rewards/accuracies": 1.0, "rewards/chosen": -2.0543694496154785, "rewards/margins": 1.9810166358947754, "rewards/rejected": -4.035386085510254, "step": 6427 }, { "epoch": 1.42, "learning_rate": 9.9921842432226e-06, "logits/chosen": -1.3571546077728271, "logits/rejected": -1.3571546077728271, "logps/chosen": -274.5899658203125, "logps/rejected": -274.5899658203125, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": 1.0960174798965454, "rewards/margins": 0.0, "rewards/rejected": 1.0960174798965454, "step": 6428 }, { "epoch": 1.42, "learning_rate": 9.992083746586258e-06, "logits/chosen": -1.2775822877883911, "logits/rejected": -1.288070797920227, "logps/chosen": -204.79071044921875, "logps/rejected": -205.92837524414062, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -4.238757610321045, "rewards/margins": 4.7071213722229, "rewards/rejected": -8.945878982543945, "step": 6429 }, { "epoch": 1.42, "learning_rate": 9.991982608479156e-06, "logits/chosen": -1.6328198909759521, "logits/rejected": -1.4686203002929688, "logps/chosen": -143.82373046875, "logps/rejected": -200.03213500976562, "loss": 0.3416, "rewards/accuracies": 1.0, "rewards/chosen": -7.473343849182129, "rewards/margins": 0.020641326904296875, "rewards/rejected": -7.493985176086426, "step": 6430 }, { "epoch": 1.42, "learning_rate": 9.991880828914288e-06, "logits/chosen": -1.2638617753982544, "logits/rejected": -1.2373238801956177, "logps/chosen": -102.70875549316406, "logps/rejected": -172.84603881835938, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.111619710922241, "rewards/margins": 6.466463088989258, "rewards/rejected": -8.578083038330078, "step": 6431 }, { "epoch": 1.42, "learning_rate": 9.991778407904733e-06, "logits/chosen": -1.3642895221710205, "logits/rejected": -1.3595118522644043, "logps/chosen": -96.33458709716797, "logps/rejected": -128.84320068359375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.4863839149475098, "rewards/margins": 5.278963565826416, "rewards/rejected": -7.765347480773926, "step": 6432 }, { "epoch": 1.42, "learning_rate": 9.991675345463654e-06, "logits/chosen": -1.3571903705596924, "logits/rejected": -1.3035856485366821, "logps/chosen": -123.75801086425781, "logps/rejected": -246.55294799804688, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.2586502134799957, "rewards/margins": 6.451118469238281, "rewards/rejected": -6.709768772125244, "step": 6433 }, { "epoch": 1.42, "learning_rate": 9.991571641604291e-06, "logits/chosen": -1.6317970752716064, "logits/rejected": -1.6444649696350098, "logps/chosen": -107.13185119628906, "logps/rejected": -124.64533996582031, "loss": 0.0636, "rewards/accuracies": 1.0, "rewards/chosen": -2.0559937953948975, "rewards/margins": 2.011646032333374, "rewards/rejected": -4.0676398277282715, "step": 6434 }, { "epoch": 1.42, "learning_rate": 9.991467296339973e-06, "logits/chosen": -1.5995256900787354, "logits/rejected": -1.694276213645935, "logps/chosen": -170.44546508789062, "logps/rejected": -217.9882354736328, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.5632599592208862, "rewards/margins": 7.126185417175293, "rewards/rejected": -8.689445495605469, "step": 6435 }, { "epoch": 1.42, "learning_rate": 9.991362309684105e-06, "logits/chosen": -1.255875587463379, "logits/rejected": -1.1304374933242798, "logps/chosen": -174.30068969726562, "logps/rejected": -406.7584228515625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.1208527088165283, "rewards/margins": 8.990848541259766, "rewards/rejected": -11.111701011657715, "step": 6436 }, { "epoch": 1.42, "learning_rate": 9.991256681650181e-06, "logits/chosen": -1.350361943244934, "logits/rejected": -1.3055285215377808, "logps/chosen": -95.96322631835938, "logps/rejected": -108.60842895507812, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -0.31256791949272156, "rewards/margins": 2.524094343185425, "rewards/rejected": -2.8366622924804688, "step": 6437 }, { "epoch": 1.42, "learning_rate": 9.99115041225177e-06, "logits/chosen": -1.1105983257293701, "logits/rejected": -1.1136549711227417, "logps/chosen": -97.61737823486328, "logps/rejected": -122.03181457519531, "loss": 0.7342, "rewards/accuracies": 1.0, "rewards/chosen": -0.6874839663505554, "rewards/margins": 1.598395586013794, "rewards/rejected": -2.285879611968994, "step": 6438 }, { "epoch": 1.43, "learning_rate": 9.991043501502532e-06, "logits/chosen": -0.9784319400787354, "logits/rejected": -0.9784319400787354, "logps/chosen": -65.47816467285156, "logps/rejected": -65.47816467285156, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -5.131660461425781, "rewards/margins": 0.0, "rewards/rejected": -5.131660461425781, "step": 6439 }, { "epoch": 1.43, "learning_rate": 9.9909359494162e-06, "logits/chosen": -0.9525759220123291, "logits/rejected": -0.9542748928070068, "logps/chosen": -104.95375061035156, "logps/rejected": -335.5673522949219, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": 3.016932725906372, "rewards/margins": 13.92393684387207, "rewards/rejected": -10.907004356384277, "step": 6440 }, { "epoch": 1.43, "learning_rate": 9.990827756006599e-06, "logits/chosen": -1.4483554363250732, "logits/rejected": -1.4589519500732422, "logps/chosen": -114.23565673828125, "logps/rejected": -123.55422973632812, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.5496071577072144, "rewards/margins": 4.520946979522705, "rewards/rejected": -6.070554256439209, "step": 6441 }, { "epoch": 1.43, "learning_rate": 9.990718921287625e-06, "logits/chosen": -1.3970377445220947, "logits/rejected": -1.3970377445220947, "logps/chosen": -235.1985626220703, "logps/rejected": -235.1985626220703, "loss": 1.6498, "rewards/accuracies": 0.0, "rewards/chosen": -12.81103515625, "rewards/margins": 0.0, "rewards/rejected": -12.81103515625, "step": 6442 }, { "epoch": 1.43, "learning_rate": 9.99060944527327e-06, "logits/chosen": -1.32135808467865, "logits/rejected": -1.1944423913955688, "logps/chosen": -154.38987731933594, "logps/rejected": -287.4066162109375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -4.186000823974609, "rewards/margins": 5.516604423522949, "rewards/rejected": -9.702605247497559, "step": 6443 }, { "epoch": 1.43, "learning_rate": 9.990499327977599e-06, "logits/chosen": -1.479770302772522, "logits/rejected": -1.4571068286895752, "logps/chosen": -129.8012237548828, "logps/rejected": -114.64110565185547, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": 0.9249435663223267, "rewards/margins": 4.018024444580078, "rewards/rejected": -3.093080997467041, "step": 6444 }, { "epoch": 1.43, "learning_rate": 9.990388569414759e-06, "logits/chosen": -1.4767547845840454, "logits/rejected": -1.5095243453979492, "logps/chosen": -80.288330078125, "logps/rejected": -99.54629516601562, "loss": 0.2242, "rewards/accuracies": 1.0, "rewards/chosen": -0.4829147458076477, "rewards/margins": 0.6132530570030212, "rewards/rejected": -1.096167802810669, "step": 6445 }, { "epoch": 1.43, "learning_rate": 9.990277169598985e-06, "logits/chosen": -1.1460261344909668, "logits/rejected": -1.1404931545257568, "logps/chosen": -81.97337341308594, "logps/rejected": -120.85383605957031, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": 0.6698822379112244, "rewards/margins": 6.6692399978637695, "rewards/rejected": -5.9993577003479, "step": 6446 }, { "epoch": 1.43, "learning_rate": 9.99016512854459e-06, "logits/chosen": -1.1024154424667358, "logits/rejected": -1.1541508436203003, "logps/chosen": -210.89820861816406, "logps/rejected": -227.74757385253906, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 1.3092330694198608, "rewards/margins": 13.784941673278809, "rewards/rejected": -12.475708961486816, "step": 6447 }, { "epoch": 1.43, "learning_rate": 9.990052446265974e-06, "logits/chosen": -1.3622277975082397, "logits/rejected": -1.3731324672698975, "logps/chosen": -108.29985046386719, "logps/rejected": -82.72431945800781, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -3.290956974029541, "rewards/margins": 3.2442755699157715, "rewards/rejected": -6.5352325439453125, "step": 6448 }, { "epoch": 1.43, "learning_rate": 9.989939122777614e-06, "logits/chosen": -1.5562744140625, "logits/rejected": -1.5587024688720703, "logps/chosen": -79.3572006225586, "logps/rejected": -101.30905151367188, "loss": 0.2912, "rewards/accuracies": 1.0, "rewards/chosen": -1.9383114576339722, "rewards/margins": 0.2364727258682251, "rewards/rejected": -2.1747841835021973, "step": 6449 }, { "epoch": 1.43, "learning_rate": 9.98982515809407e-06, "logits/chosen": -1.1940484046936035, "logits/rejected": -1.0659985542297363, "logps/chosen": -118.28469848632812, "logps/rejected": -194.46958923339844, "loss": 1.1005, "rewards/accuracies": 0.0, "rewards/chosen": -3.487408399581909, "rewards/margins": -2.0749404430389404, "rewards/rejected": -1.4124679565429688, "step": 6450 }, { "epoch": 1.43, "learning_rate": 9.989710552229992e-06, "logits/chosen": -1.5573065280914307, "logits/rejected": -0.897048830986023, "logps/chosen": -116.7439193725586, "logps/rejected": -1213.470947265625, "loss": 0.4575, "rewards/accuracies": 1.0, "rewards/chosen": -2.5039734840393066, "rewards/margins": 110.74702453613281, "rewards/rejected": -113.2509994506836, "step": 6451 }, { "epoch": 1.43, "learning_rate": 9.9895953052001e-06, "logits/chosen": -1.4836390018463135, "logits/rejected": -1.4555706977844238, "logps/chosen": -73.80577850341797, "logps/rejected": -125.78260040283203, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": -0.29582902789115906, "rewards/margins": 6.763704776763916, "rewards/rejected": -7.059533596038818, "step": 6452 }, { "epoch": 1.43, "learning_rate": 9.989479417019208e-06, "logits/chosen": -0.9920833110809326, "logits/rejected": -0.9401967525482178, "logps/chosen": -198.21824645996094, "logps/rejected": -270.34173583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.8451766967773438, "rewards/margins": 14.538103103637695, "rewards/rejected": -10.692926406860352, "step": 6453 }, { "epoch": 1.43, "learning_rate": 9.989362887702203e-06, "logits/chosen": -1.380910873413086, "logits/rejected": -1.5547871589660645, "logps/chosen": -227.68463134765625, "logps/rejected": -211.62005615234375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.9033294916152954, "rewards/margins": 9.60673999786377, "rewards/rejected": -10.510069847106934, "step": 6454 }, { "epoch": 1.43, "learning_rate": 9.989245717264063e-06, "logits/chosen": -1.0880763530731201, "logits/rejected": -1.1749372482299805, "logps/chosen": -212.6510009765625, "logps/rejected": -246.93496704101562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.0251877307891846, "rewards/margins": 13.307147026062012, "rewards/rejected": -10.281959533691406, "step": 6455 }, { "epoch": 1.43, "learning_rate": 9.989127905719841e-06, "logits/chosen": -1.1478748321533203, "logits/rejected": -1.1721301078796387, "logps/chosen": -208.4805908203125, "logps/rejected": -163.02557373046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8334075808525085, "rewards/margins": 7.823932647705078, "rewards/rejected": -8.657340049743652, "step": 6456 }, { "epoch": 1.43, "learning_rate": 9.989009453084678e-06, "logits/chosen": -1.049523949623108, "logits/rejected": -1.0216479301452637, "logps/chosen": -94.56372833251953, "logps/rejected": -175.91070556640625, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.506751298904419, "rewards/margins": 3.568300485610962, "rewards/rejected": -5.075051784515381, "step": 6457 }, { "epoch": 1.43, "learning_rate": 9.988890359373794e-06, "logits/chosen": -1.5173900127410889, "logits/rejected": -1.364689826965332, "logps/chosen": -88.65011596679688, "logps/rejected": -591.9942626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.5504783987998962, "rewards/margins": 47.54920959472656, "rewards/rejected": -46.99872970581055, "step": 6458 }, { "epoch": 1.43, "learning_rate": 9.988770624602488e-06, "logits/chosen": -1.5705872774124146, "logits/rejected": -1.6264251470565796, "logps/chosen": -80.77671813964844, "logps/rejected": -114.1951904296875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.9985527396202087, "rewards/margins": 6.8046770095825195, "rewards/rejected": -7.803229808807373, "step": 6459 }, { "epoch": 1.43, "learning_rate": 9.988650248786153e-06, "logits/chosen": -1.0043095350265503, "logits/rejected": -1.0655031204223633, "logps/chosen": -191.68304443359375, "logps/rejected": -150.6213836669922, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.9932464957237244, "rewards/margins": 9.648362159729004, "rewards/rejected": -8.655116081237793, "step": 6460 }, { "epoch": 1.43, "learning_rate": 9.988529231940252e-06, "logits/chosen": -1.5873456001281738, "logits/rejected": -1.6075173616409302, "logps/chosen": -102.6187744140625, "logps/rejected": -137.0733642578125, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -1.719384789466858, "rewards/margins": 8.300777435302734, "rewards/rejected": -10.020162582397461, "step": 6461 }, { "epoch": 1.43, "learning_rate": 9.988407574080337e-06, "logits/chosen": -1.573708176612854, "logits/rejected": -1.6112719774246216, "logps/chosen": -103.69969177246094, "logps/rejected": -142.5651397705078, "loss": 0.0465, "rewards/accuracies": 1.0, "rewards/chosen": -1.5966004133224487, "rewards/margins": 2.3294501304626465, "rewards/rejected": -3.9260506629943848, "step": 6462 }, { "epoch": 1.43, "learning_rate": 9.988285275222041e-06, "logits/chosen": -1.466192603111267, "logits/rejected": -1.455245018005371, "logps/chosen": -112.07457733154297, "logps/rejected": -168.6806640625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.9618110656738281, "rewards/margins": 7.351528167724609, "rewards/rejected": -9.313339233398438, "step": 6463 }, { "epoch": 1.43, "learning_rate": 9.988162335381077e-06, "logits/chosen": -1.211634635925293, "logits/rejected": -1.4317314624786377, "logps/chosen": -351.9323425292969, "logps/rejected": -207.62979125976562, "loss": 0.1999, "rewards/accuracies": 1.0, "rewards/chosen": -2.563006639480591, "rewards/margins": 0.7104172706604004, "rewards/rejected": -3.273423910140991, "step": 6464 }, { "epoch": 1.43, "learning_rate": 9.988038754573245e-06, "logits/chosen": -1.1409811973571777, "logits/rejected": -1.1602144241333008, "logps/chosen": -81.36954498291016, "logps/rejected": -160.2148895263672, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.3269248902797699, "rewards/margins": 6.155509948730469, "rewards/rejected": -6.4824347496032715, "step": 6465 }, { "epoch": 1.43, "learning_rate": 9.987914532814425e-06, "logits/chosen": -1.0809344053268433, "logits/rejected": -1.0809344053268433, "logps/chosen": -162.41592407226562, "logps/rejected": -162.41592407226562, "loss": 0.3469, "rewards/accuracies": 0.0, "rewards/chosen": -3.8327882289886475, "rewards/margins": 0.0, "rewards/rejected": -3.8327882289886475, "step": 6466 }, { "epoch": 1.43, "learning_rate": 9.987789670120578e-06, "logits/chosen": -1.4531993865966797, "logits/rejected": -1.3889353275299072, "logps/chosen": -80.68456268310547, "logps/rejected": -163.81561279296875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.26923295855522156, "rewards/margins": 4.5996623039245605, "rewards/rejected": -4.868895053863525, "step": 6467 }, { "epoch": 1.43, "learning_rate": 9.987664166507749e-06, "logits/chosen": -1.5492935180664062, "logits/rejected": -1.5367993116378784, "logps/chosen": -206.85069274902344, "logps/rejected": -265.2481384277344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.4377762079238892, "rewards/margins": 12.189824104309082, "rewards/rejected": -10.752047538757324, "step": 6468 }, { "epoch": 1.43, "learning_rate": 9.987538021992063e-06, "logits/chosen": -1.1636638641357422, "logits/rejected": -0.7200765609741211, "logps/chosen": -159.18344116210938, "logps/rejected": -465.6965026855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.194868564605713, "rewards/margins": 19.363025665283203, "rewards/rejected": -23.557893753051758, "step": 6469 }, { "epoch": 1.43, "learning_rate": 9.987411236589733e-06, "logits/chosen": -1.3867595195770264, "logits/rejected": -1.3867595195770264, "logps/chosen": -260.5660400390625, "logps/rejected": -260.5660400390625, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -7.056396484375, "rewards/margins": 0.0, "rewards/rejected": -7.056396484375, "step": 6470 }, { "epoch": 1.43, "learning_rate": 9.987283810317046e-06, "logits/chosen": -1.5632342100143433, "logits/rejected": -1.7375891208648682, "logps/chosen": -141.3883056640625, "logps/rejected": -123.0343017578125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -2.7286453247070312, "rewards/margins": 5.993049621582031, "rewards/rejected": -8.721694946289062, "step": 6471 }, { "epoch": 1.43, "learning_rate": 9.987155743190379e-06, "logits/chosen": -1.510114312171936, "logits/rejected": -1.5411250591278076, "logps/chosen": -98.38162231445312, "logps/rejected": -112.55668640136719, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": 0.9122055172920227, "rewards/margins": 3.7799460887908936, "rewards/rejected": -2.8677406311035156, "step": 6472 }, { "epoch": 1.43, "learning_rate": 9.98702703522619e-06, "logits/chosen": -1.3581249713897705, "logits/rejected": -1.3601902723312378, "logps/chosen": -95.12266540527344, "logps/rejected": -119.18186950683594, "loss": 0.1416, "rewards/accuracies": 1.0, "rewards/chosen": -0.9231918454170227, "rewards/margins": 1.1194748878479004, "rewards/rejected": -2.0426666736602783, "step": 6473 }, { "epoch": 1.43, "learning_rate": 9.986897686441012e-06, "logits/chosen": -1.3190113306045532, "logits/rejected": -1.3037632703781128, "logps/chosen": -110.2186279296875, "logps/rejected": -195.88296508789062, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.4938865900039673, "rewards/margins": 3.9529762268066406, "rewards/rejected": -5.446862697601318, "step": 6474 }, { "epoch": 1.43, "learning_rate": 9.986767696851472e-06, "logits/chosen": -1.3236219882965088, "logits/rejected": -1.3503154516220093, "logps/chosen": -154.42050170898438, "logps/rejected": -113.97686767578125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.971624732017517, "rewards/margins": 5.9181952476501465, "rewards/rejected": -7.889820098876953, "step": 6475 }, { "epoch": 1.43, "learning_rate": 9.98663706647427e-06, "logits/chosen": -1.3676397800445557, "logits/rejected": -1.3334388732910156, "logps/chosen": -196.39797973632812, "logps/rejected": -386.5295104980469, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": 0.22832946479320526, "rewards/margins": 21.536163330078125, "rewards/rejected": -21.30783462524414, "step": 6476 }, { "epoch": 1.43, "learning_rate": 9.986505795326194e-06, "logits/chosen": -1.127578616142273, "logits/rejected": -1.1423920392990112, "logps/chosen": -112.3470458984375, "logps/rejected": -184.1502227783203, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": -3.558955430984497, "rewards/margins": 2.1135613918304443, "rewards/rejected": -5.672516822814941, "step": 6477 }, { "epoch": 1.43, "learning_rate": 9.986373883424108e-06, "logits/chosen": -1.1243822574615479, "logits/rejected": -1.1243822574615479, "logps/chosen": -192.22613525390625, "logps/rejected": -192.22613525390625, "loss": 0.3492, "rewards/accuracies": 0.0, "rewards/chosen": -8.226763725280762, "rewards/margins": 0.0, "rewards/rejected": -8.226763725280762, "step": 6478 }, { "epoch": 1.43, "learning_rate": 9.986241330784967e-06, "logits/chosen": -1.4957094192504883, "logits/rejected": -1.4729797840118408, "logps/chosen": -81.56239318847656, "logps/rejected": -159.15737915039062, "loss": 0.368, "rewards/accuracies": 1.0, "rewards/chosen": -0.9978912472724915, "rewards/margins": 3.1277377605438232, "rewards/rejected": -4.12562894821167, "step": 6479 }, { "epoch": 1.43, "learning_rate": 9.9861081374258e-06, "logits/chosen": -1.2738516330718994, "logits/rejected": -1.2835272550582886, "logps/chosen": -147.45556640625, "logps/rejected": -178.61106872558594, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 2.176495313644409, "rewards/margins": 13.3636474609375, "rewards/rejected": -11.187151908874512, "step": 6480 }, { "epoch": 1.43, "learning_rate": 9.985974303363723e-06, "logits/chosen": -1.4777294397354126, "logits/rejected": -1.3867155313491821, "logps/chosen": -110.77838134765625, "logps/rejected": -306.663330078125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.43063125014305115, "rewards/margins": 6.3339104652404785, "rewards/rejected": -6.7645416259765625, "step": 6481 }, { "epoch": 1.43, "learning_rate": 9.985839828615937e-06, "logits/chosen": -1.3708897829055786, "logits/rejected": -1.4292770624160767, "logps/chosen": -177.71107482910156, "logps/rejected": -228.02487182617188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.04609527811408043, "rewards/margins": 8.205202102661133, "rewards/rejected": -8.251296997070312, "step": 6482 }, { "epoch": 1.43, "learning_rate": 9.985704713199715e-06, "logits/chosen": -0.9773018956184387, "logits/rejected": -1.0265586376190186, "logps/chosen": -196.962890625, "logps/rejected": -102.18563079833984, "loss": 0.14, "rewards/accuracies": 1.0, "rewards/chosen": -2.095202684402466, "rewards/margins": 1.1676032543182373, "rewards/rejected": -3.262805938720703, "step": 6483 }, { "epoch": 1.44, "learning_rate": 9.985568957132425e-06, "logits/chosen": -1.6084344387054443, "logits/rejected": -1.6665631532669067, "logps/chosen": -88.87623596191406, "logps/rejected": -126.85011291503906, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.44851455092430115, "rewards/margins": 9.757637023925781, "rewards/rejected": -9.309122085571289, "step": 6484 }, { "epoch": 1.44, "learning_rate": 9.98543256043151e-06, "logits/chosen": -1.4815027713775635, "logits/rejected": -1.4769208431243896, "logps/chosen": -87.84159851074219, "logps/rejected": -216.00193786621094, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.3816086053848267, "rewards/margins": 8.605112075805664, "rewards/rejected": -9.98672103881836, "step": 6485 }, { "epoch": 1.44, "learning_rate": 9.985295523114492e-06, "logits/chosen": -1.1124078035354614, "logits/rejected": -1.1124078035354614, "logps/chosen": -159.4077606201172, "logps/rejected": -159.4077606201172, "loss": 0.3851, "rewards/accuracies": 0.0, "rewards/chosen": -3.0522401332855225, "rewards/margins": 0.0, "rewards/rejected": -3.0522401332855225, "step": 6486 }, { "epoch": 1.44, "learning_rate": 9.985157845198987e-06, "logits/chosen": -1.3348287343978882, "logits/rejected": -1.3718595504760742, "logps/chosen": -144.55516052246094, "logps/rejected": -132.3102569580078, "loss": 0.0474, "rewards/accuracies": 1.0, "rewards/chosen": -3.654181718826294, "rewards/margins": 2.3092939853668213, "rewards/rejected": -5.963475704193115, "step": 6487 }, { "epoch": 1.44, "learning_rate": 9.985019526702682e-06, "logits/chosen": -1.399277687072754, "logits/rejected": -1.4123854637145996, "logps/chosen": -154.43299865722656, "logps/rejected": -218.0780792236328, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.9099831581115723, "rewards/margins": 10.352821350097656, "rewards/rejected": -13.262804985046387, "step": 6488 }, { "epoch": 1.44, "learning_rate": 9.984880567643351e-06, "logits/chosen": -1.3026922941207886, "logits/rejected": -1.1332303285598755, "logps/chosen": -152.62631225585938, "logps/rejected": -670.1881713867188, "loss": 0.0361, "rewards/accuracies": 1.0, "rewards/chosen": -2.57132887840271, "rewards/margins": 56.40642166137695, "rewards/rejected": -58.97774887084961, "step": 6489 }, { "epoch": 1.44, "learning_rate": 9.984740968038852e-06, "logits/chosen": -1.0828726291656494, "logits/rejected": -0.9793913960456848, "logps/chosen": -139.97474670410156, "logps/rejected": -216.86264038085938, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -4.168618202209473, "rewards/margins": 3.1177754402160645, "rewards/rejected": -7.286393642425537, "step": 6490 }, { "epoch": 1.44, "learning_rate": 9.984600727907119e-06, "logits/chosen": -1.2075564861297607, "logits/rejected": -1.1242018938064575, "logps/chosen": -220.64791870117188, "logps/rejected": -290.6797790527344, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.8200165033340454, "rewards/margins": 6.191453456878662, "rewards/rejected": -8.011469841003418, "step": 6491 }, { "epoch": 1.44, "learning_rate": 9.984459847266176e-06, "logits/chosen": -1.3652455806732178, "logits/rejected": -1.340071439743042, "logps/chosen": -241.25015258789062, "logps/rejected": -278.98974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0364516973495483, "rewards/margins": 11.9899263381958, "rewards/rejected": -13.02637767791748, "step": 6492 }, { "epoch": 1.44, "learning_rate": 9.984318326134125e-06, "logits/chosen": -1.550485610961914, "logits/rejected": -1.495036244392395, "logps/chosen": -81.86776733398438, "logps/rejected": -188.55679321289062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.5652183890342712, "rewards/margins": 7.361082553863525, "rewards/rejected": -6.795864105224609, "step": 6493 }, { "epoch": 1.44, "learning_rate": 9.984176164529151e-06, "logits/chosen": -1.7606545686721802, "logits/rejected": -1.7128721475601196, "logps/chosen": -92.07273864746094, "logps/rejected": -174.64642333984375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.8653137683868408, "rewards/margins": 7.802878379821777, "rewards/rejected": -9.668191909790039, "step": 6494 }, { "epoch": 1.44, "learning_rate": 9.984033362469522e-06, "logits/chosen": -1.4085148572921753, "logits/rejected": -1.3091650009155273, "logps/chosen": -133.6569366455078, "logps/rejected": -202.29318237304688, "loss": 1.089, "rewards/accuracies": 1.0, "rewards/chosen": -4.753231048583984, "rewards/margins": 1.8619012832641602, "rewards/rejected": -6.6151323318481445, "step": 6495 }, { "epoch": 1.44, "learning_rate": 9.983889919973586e-06, "logits/chosen": -1.6154725551605225, "logits/rejected": -1.6739959716796875, "logps/chosen": -125.52921295166016, "logps/rejected": -191.51976013183594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.23414002358913422, "rewards/margins": 11.714637756347656, "rewards/rejected": -11.94877815246582, "step": 6496 }, { "epoch": 1.44, "learning_rate": 9.983745837059777e-06, "logits/chosen": -1.3493847846984863, "logits/rejected": -1.3435276746749878, "logps/chosen": -120.88609313964844, "logps/rejected": -210.07681274414062, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -1.2875884771347046, "rewards/margins": 3.931208610534668, "rewards/rejected": -5.218797206878662, "step": 6497 }, { "epoch": 1.44, "learning_rate": 9.98360111374661e-06, "logits/chosen": -1.3557153940200806, "logits/rejected": -1.287233591079712, "logps/chosen": -183.22933959960938, "logps/rejected": -296.9176025390625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.5943832397460938, "rewards/margins": 6.304191589355469, "rewards/rejected": -5.709808349609375, "step": 6498 }, { "epoch": 1.44, "learning_rate": 9.983455750052678e-06, "logits/chosen": -1.192550539970398, "logits/rejected": -1.1414600610733032, "logps/chosen": -163.52423095703125, "logps/rejected": -277.091552734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.23853150010108948, "rewards/margins": 11.995368957519531, "rewards/rejected": -11.756837844848633, "step": 6499 }, { "epoch": 1.44, "learning_rate": 9.983309745996663e-06, "logits/chosen": -1.1281911134719849, "logits/rejected": -1.0804423093795776, "logps/chosen": -173.91622924804688, "logps/rejected": -250.0653076171875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.5773255825042725, "rewards/margins": 7.4560441970825195, "rewards/rejected": -10.033370018005371, "step": 6500 }, { "epoch": 1.44, "learning_rate": 9.983163101597325e-06, "logits/chosen": -1.446544885635376, "logits/rejected": -1.5112426280975342, "logps/chosen": -183.63214111328125, "logps/rejected": -175.09927368164062, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.3795959949493408, "rewards/margins": 3.7550699710845947, "rewards/rejected": -5.1346659660339355, "step": 6501 }, { "epoch": 1.44, "learning_rate": 9.983015816873508e-06, "logits/chosen": -1.2311816215515137, "logits/rejected": -1.3103690147399902, "logps/chosen": -263.1396484375, "logps/rejected": -85.36788177490234, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.7954315543174744, "rewards/margins": 6.748787879943848, "rewards/rejected": -5.9533562660217285, "step": 6502 }, { "epoch": 1.44, "learning_rate": 9.982867891844136e-06, "logits/chosen": -1.2781054973602295, "logits/rejected": -1.2781054973602295, "logps/chosen": -88.05123901367188, "logps/rejected": -88.05123901367188, "loss": 0.3471, "rewards/accuracies": 0.0, "rewards/chosen": -5.272524356842041, "rewards/margins": 0.0, "rewards/rejected": -5.272524356842041, "step": 6503 }, { "epoch": 1.44, "learning_rate": 9.98271932652822e-06, "logits/chosen": -1.1219843626022339, "logits/rejected": -1.1219843626022339, "logps/chosen": -53.800689697265625, "logps/rejected": -53.800689697265625, "loss": 0.3473, "rewards/accuracies": 0.0, "rewards/chosen": -0.2909339964389801, "rewards/margins": 0.0, "rewards/rejected": -0.2909339964389801, "step": 6504 }, { "epoch": 1.44, "learning_rate": 9.982570120944847e-06, "logits/chosen": -1.370953917503357, "logits/rejected": -1.370953917503357, "logps/chosen": -246.03411865234375, "logps/rejected": -246.03411865234375, "loss": 0.362, "rewards/accuracies": 0.0, "rewards/chosen": -10.726448059082031, "rewards/margins": 0.0, "rewards/rejected": -10.726448059082031, "step": 6505 }, { "epoch": 1.44, "learning_rate": 9.982420275113194e-06, "logits/chosen": -1.577398657798767, "logits/rejected": -1.6319329738616943, "logps/chosen": -88.4041976928711, "logps/rejected": -154.78330993652344, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": 0.21563033759593964, "rewards/margins": 6.181253910064697, "rewards/rejected": -5.965623378753662, "step": 6506 }, { "epoch": 1.44, "learning_rate": 9.98226978905251e-06, "logits/chosen": -1.326034665107727, "logits/rejected": -1.3084630966186523, "logps/chosen": -103.4014892578125, "logps/rejected": -121.21077728271484, "loss": 0.1796, "rewards/accuracies": 1.0, "rewards/chosen": -1.7797889709472656, "rewards/margins": 0.8385765552520752, "rewards/rejected": -2.618365526199341, "step": 6507 }, { "epoch": 1.44, "learning_rate": 9.982118662782136e-06, "logits/chosen": -1.087770938873291, "logits/rejected": -0.9040417075157166, "logps/chosen": -84.42605590820312, "logps/rejected": -249.19525146484375, "loss": 0.0878, "rewards/accuracies": 1.0, "rewards/chosen": -1.2681869268417358, "rewards/margins": 1.650933861732483, "rewards/rejected": -2.9191207885742188, "step": 6508 }, { "epoch": 1.44, "learning_rate": 9.981966896321492e-06, "logits/chosen": -1.2884776592254639, "logits/rejected": -1.2851412296295166, "logps/chosen": -106.84677124023438, "logps/rejected": -103.19054412841797, "loss": 0.417, "rewards/accuracies": 0.0, "rewards/chosen": -2.371328830718994, "rewards/margins": -0.25735926628112793, "rewards/rejected": -2.113969564437866, "step": 6509 }, { "epoch": 1.44, "learning_rate": 9.981814489690077e-06, "logits/chosen": -1.3319469690322876, "logits/rejected": -1.3319469690322876, "logps/chosen": -196.4741973876953, "logps/rejected": -196.4741973876953, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -9.308917999267578, "rewards/margins": 0.0, "rewards/rejected": -9.308917999267578, "step": 6510 }, { "epoch": 1.44, "learning_rate": 9.981661442907477e-06, "logits/chosen": -1.0761100053787231, "logits/rejected": -1.0643398761749268, "logps/chosen": -101.47392272949219, "logps/rejected": -129.07571411132812, "loss": 0.0817, "rewards/accuracies": 1.0, "rewards/chosen": -1.569427490234375, "rewards/margins": 1.7474792003631592, "rewards/rejected": -3.316906690597534, "step": 6511 }, { "epoch": 1.44, "learning_rate": 9.981507755993357e-06, "logits/chosen": -1.0066545009613037, "logits/rejected": -1.0451661348342896, "logps/chosen": -133.7593994140625, "logps/rejected": -133.22958374023438, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": 0.3885910212993622, "rewards/margins": 3.5490357875823975, "rewards/rejected": -3.160444736480713, "step": 6512 }, { "epoch": 1.44, "learning_rate": 9.981353428967465e-06, "logits/chosen": -0.9480933547019958, "logits/rejected": -1.0198758840560913, "logps/chosen": -190.38629150390625, "logps/rejected": -139.39129638671875, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": 1.395623803138733, "rewards/margins": 5.232840061187744, "rewards/rejected": -3.8372161388397217, "step": 6513 }, { "epoch": 1.44, "learning_rate": 9.98119846184963e-06, "logits/chosen": -1.2883893251419067, "logits/rejected": -1.2883893251419067, "logps/chosen": -168.07118225097656, "logps/rejected": -168.07118225097656, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -8.17884349822998, "rewards/margins": 0.0, "rewards/rejected": -8.17884349822998, "step": 6514 }, { "epoch": 1.44, "learning_rate": 9.98104285465977e-06, "logits/chosen": -1.435665249824524, "logits/rejected": -1.4073407649993896, "logps/chosen": -107.06441497802734, "logps/rejected": -188.71759033203125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.30518266558647156, "rewards/margins": 6.667745590209961, "rewards/rejected": -6.972928047180176, "step": 6515 }, { "epoch": 1.44, "learning_rate": 9.980886607417877e-06, "logits/chosen": -1.9493579864501953, "logits/rejected": -1.9742257595062256, "logps/chosen": -86.83840942382812, "logps/rejected": -95.51437377929688, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": -3.4467697143554688, "rewards/margins": 2.903914451599121, "rewards/rejected": -6.35068416595459, "step": 6516 }, { "epoch": 1.44, "learning_rate": 9.980729720144027e-06, "logits/chosen": -0.9808492064476013, "logits/rejected": -0.9210032224655151, "logps/chosen": -98.71409606933594, "logps/rejected": -172.6556396484375, "loss": 0.2315, "rewards/accuracies": 1.0, "rewards/chosen": -2.1615493297576904, "rewards/margins": 5.600550651550293, "rewards/rejected": -7.7621002197265625, "step": 6517 }, { "epoch": 1.44, "learning_rate": 9.980572192858383e-06, "logits/chosen": -1.4276275634765625, "logits/rejected": -1.4448963403701782, "logps/chosen": -154.43663024902344, "logps/rejected": -144.67518615722656, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -1.7900803089141846, "rewards/margins": 3.4239776134490967, "rewards/rejected": -5.214057922363281, "step": 6518 }, { "epoch": 1.44, "learning_rate": 9.980414025581185e-06, "logits/chosen": -1.0403554439544678, "logits/rejected": -1.1015938520431519, "logps/chosen": -188.56527709960938, "logps/rejected": -238.88636779785156, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -5.744955539703369, "rewards/margins": 5.82755708694458, "rewards/rejected": -11.57251262664795, "step": 6519 }, { "epoch": 1.44, "learning_rate": 9.980255218332758e-06, "logits/chosen": -1.5687470436096191, "logits/rejected": -1.551514744758606, "logps/chosen": -136.59805297851562, "logps/rejected": -242.45718383789062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6899459958076477, "rewards/margins": 8.835886001586914, "rewards/rejected": -9.525832176208496, "step": 6520 }, { "epoch": 1.44, "learning_rate": 9.980095771133504e-06, "logits/chosen": -1.4046093225479126, "logits/rejected": -1.2234199047088623, "logps/chosen": -166.37216186523438, "logps/rejected": -397.7188720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.4001801013946533, "rewards/margins": 18.240196228027344, "rewards/rejected": -16.840015411376953, "step": 6521 }, { "epoch": 1.44, "learning_rate": 9.979935684003918e-06, "logits/chosen": -1.158143162727356, "logits/rejected": -1.185875654220581, "logps/chosen": -183.59678649902344, "logps/rejected": -146.65655517578125, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -2.291609287261963, "rewards/margins": 2.9847779273986816, "rewards/rejected": -5.2763872146606445, "step": 6522 }, { "epoch": 1.44, "learning_rate": 9.979774956964569e-06, "logits/chosen": -1.2410039901733398, "logits/rejected": -1.3133325576782227, "logps/chosen": -232.674072265625, "logps/rejected": -109.52000427246094, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": -0.3508010804653168, "rewards/margins": 2.1135683059692383, "rewards/rejected": -2.464369297027588, "step": 6523 }, { "epoch": 1.44, "learning_rate": 9.979613590036108e-06, "logits/chosen": -1.376779556274414, "logits/rejected": -1.376779556274414, "logps/chosen": -85.8813247680664, "logps/rejected": -85.8813247680664, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -1.4631966352462769, "rewards/margins": 0.0, "rewards/rejected": -1.4631966352462769, "step": 6524 }, { "epoch": 1.44, "learning_rate": 9.979451583239272e-06, "logits/chosen": -1.4518051147460938, "logits/rejected": -1.4748774766921997, "logps/chosen": -62.23628234863281, "logps/rejected": -84.45599365234375, "loss": 0.1474, "rewards/accuracies": 1.0, "rewards/chosen": -1.016394019126892, "rewards/margins": 5.237565994262695, "rewards/rejected": -6.253960132598877, "step": 6525 }, { "epoch": 1.44, "learning_rate": 9.979288936594877e-06, "logits/chosen": -1.4766364097595215, "logits/rejected": -1.5926458835601807, "logps/chosen": -216.05975341796875, "logps/rejected": -168.39410400390625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -7.855050563812256, "rewards/margins": 4.937692165374756, "rewards/rejected": -12.792742729187012, "step": 6526 }, { "epoch": 1.44, "learning_rate": 9.979125650123824e-06, "logits/chosen": -0.8838542103767395, "logits/rejected": -0.7873924374580383, "logps/chosen": -93.913330078125, "logps/rejected": -295.9103698730469, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 0.56854248046875, "rewards/margins": 7.195967197418213, "rewards/rejected": -6.627424716949463, "step": 6527 }, { "epoch": 1.44, "learning_rate": 9.978961723847093e-06, "logits/chosen": -1.4246330261230469, "logits/rejected": -1.3586161136627197, "logps/chosen": -98.1503677368164, "logps/rejected": -160.64596557617188, "loss": 0.1087, "rewards/accuracies": 1.0, "rewards/chosen": -1.834978461265564, "rewards/margins": 2.9094414710998535, "rewards/rejected": -4.744420051574707, "step": 6528 }, { "epoch": 1.45, "learning_rate": 9.978797157785752e-06, "logits/chosen": -1.1578127145767212, "logits/rejected": -1.2632955312728882, "logps/chosen": -264.61224365234375, "logps/rejected": -318.5830383300781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.152508497238159, "rewards/margins": 14.69017505645752, "rewards/rejected": -16.842683792114258, "step": 6529 }, { "epoch": 1.45, "learning_rate": 9.978631951960942e-06, "logits/chosen": -1.0086781978607178, "logits/rejected": -1.0103552341461182, "logps/chosen": -117.46903991699219, "logps/rejected": -99.24827575683594, "loss": 0.0654, "rewards/accuracies": 1.0, "rewards/chosen": -3.9724502563476562, "rewards/margins": 3.718289375305176, "rewards/rejected": -7.690739631652832, "step": 6530 }, { "epoch": 1.45, "learning_rate": 9.978466106393896e-06, "logits/chosen": -1.625632643699646, "logits/rejected": -1.5153279304504395, "logps/chosen": -97.19007873535156, "logps/rejected": -283.6906433105469, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.391854852437973, "rewards/margins": 6.415289402008057, "rewards/rejected": -6.8071441650390625, "step": 6531 }, { "epoch": 1.45, "learning_rate": 9.978299621105924e-06, "logits/chosen": -1.0872722864151, "logits/rejected": -1.0632894039154053, "logps/chosen": -128.1952362060547, "logps/rejected": -170.41590881347656, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -1.3743613958358765, "rewards/margins": 4.867081642150879, "rewards/rejected": -6.241443157196045, "step": 6532 }, { "epoch": 1.45, "learning_rate": 9.978132496118418e-06, "logits/chosen": -1.5044656991958618, "logits/rejected": -1.627150297164917, "logps/chosen": -204.27578735351562, "logps/rejected": -130.94285583496094, "loss": 0.3127, "rewards/accuracies": 1.0, "rewards/chosen": -4.509573459625244, "rewards/margins": 0.14031076431274414, "rewards/rejected": -4.649884223937988, "step": 6533 }, { "epoch": 1.45, "learning_rate": 9.977964731452852e-06, "logits/chosen": -1.3303221464157104, "logits/rejected": -1.3303221464157104, "logps/chosen": -191.38156127929688, "logps/rejected": -191.38156127929688, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -8.608242988586426, "rewards/margins": 0.0, "rewards/rejected": -8.608242988586426, "step": 6534 }, { "epoch": 1.45, "learning_rate": 9.977796327130786e-06, "logits/chosen": -1.5538631677627563, "logits/rejected": -1.4699862003326416, "logps/chosen": -124.48808288574219, "logps/rejected": -185.93910217285156, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -3.1272218227386475, "rewards/margins": 3.0791380405426025, "rewards/rejected": -6.20635986328125, "step": 6535 }, { "epoch": 1.45, "learning_rate": 9.977627283173858e-06, "logits/chosen": -1.2467501163482666, "logits/rejected": -0.7641682624816895, "logps/chosen": -111.1439208984375, "logps/rejected": -1017.4011840820312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9920700192451477, "rewards/margins": 91.1187744140625, "rewards/rejected": -92.11084747314453, "step": 6536 }, { "epoch": 1.45, "learning_rate": 9.97745759960379e-06, "logits/chosen": -1.5182257890701294, "logits/rejected": -1.5172380208969116, "logps/chosen": -109.75579071044922, "logps/rejected": -286.8018798828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3705543279647827, "rewards/margins": 14.640308380126953, "rewards/rejected": -16.010862350463867, "step": 6537 }, { "epoch": 1.45, "learning_rate": 9.977287276442385e-06, "logits/chosen": -1.293440818786621, "logits/rejected": -1.2259385585784912, "logps/chosen": -130.18405151367188, "logps/rejected": -186.4984588623047, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.249587297439575, "rewards/margins": 6.984734535217285, "rewards/rejected": -9.234321594238281, "step": 6538 }, { "epoch": 1.45, "learning_rate": 9.97711631371153e-06, "logits/chosen": -1.333561897277832, "logits/rejected": -1.3062496185302734, "logps/chosen": -90.40193939208984, "logps/rejected": -126.10276794433594, "loss": 0.0516, "rewards/accuracies": 1.0, "rewards/chosen": -1.6644080877304077, "rewards/margins": 2.2183337211608887, "rewards/rejected": -3.882741689682007, "step": 6539 }, { "epoch": 1.45, "learning_rate": 9.976944711433194e-06, "logits/chosen": -1.6206209659576416, "logits/rejected": -1.7109026908874512, "logps/chosen": -123.13262939453125, "logps/rejected": -172.53346252441406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9068557620048523, "rewards/margins": 10.725841522216797, "rewards/rejected": -11.632697105407715, "step": 6540 }, { "epoch": 1.45, "learning_rate": 9.976772469629428e-06, "logits/chosen": -1.0065419673919678, "logits/rejected": -0.8653649687767029, "logps/chosen": -124.60823059082031, "logps/rejected": -259.10400390625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.3184120655059814, "rewards/margins": 4.718132972717285, "rewards/rejected": -7.0365447998046875, "step": 6541 }, { "epoch": 1.45, "learning_rate": 9.976599588322362e-06, "logits/chosen": -1.5154554843902588, "logits/rejected": -1.5122629404067993, "logps/chosen": -131.5637664794922, "logps/rejected": -161.50613403320312, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -3.568801164627075, "rewards/margins": 3.363321542739868, "rewards/rejected": -6.932122707366943, "step": 6542 }, { "epoch": 1.45, "learning_rate": 9.976426067534212e-06, "logits/chosen": -1.579039454460144, "logits/rejected": -0.7787944078445435, "logps/chosen": -73.50872802734375, "logps/rejected": -496.47894287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.550449371337891, "rewards/margins": 17.918895721435547, "rewards/rejected": -24.469345092773438, "step": 6543 }, { "epoch": 1.45, "learning_rate": 9.976251907287277e-06, "logits/chosen": -1.2542431354522705, "logits/rejected": -1.1557762622833252, "logps/chosen": -155.9560089111328, "logps/rejected": -226.9639129638672, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 2.9133682250976562, "rewards/margins": 5.286184787750244, "rewards/rejected": -2.372816562652588, "step": 6544 }, { "epoch": 1.45, "learning_rate": 9.976077107603933e-06, "logits/chosen": -1.3098360300064087, "logits/rejected": -1.3410133123397827, "logps/chosen": -155.78907775878906, "logps/rejected": -146.0083770751953, "loss": 0.3873, "rewards/accuracies": 1.0, "rewards/chosen": -6.929349422454834, "rewards/margins": 2.46567964553833, "rewards/rejected": -9.395029067993164, "step": 6545 }, { "epoch": 1.45, "learning_rate": 9.975901668506644e-06, "logits/chosen": -1.3996920585632324, "logits/rejected": -1.3601185083389282, "logps/chosen": -137.80520629882812, "logps/rejected": -254.04876708984375, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": 0.296090692281723, "rewards/margins": 13.200532913208008, "rewards/rejected": -12.904441833496094, "step": 6546 }, { "epoch": 1.45, "learning_rate": 9.97572559001795e-06, "logits/chosen": -1.5046206712722778, "logits/rejected": -1.4856492280960083, "logps/chosen": -80.01493072509766, "logps/rejected": -188.95777893066406, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -0.5944755673408508, "rewards/margins": 3.1157326698303223, "rewards/rejected": -3.7102081775665283, "step": 6547 }, { "epoch": 1.45, "learning_rate": 9.975548872160482e-06, "logits/chosen": -1.092207670211792, "logits/rejected": -1.092207670211792, "logps/chosen": -261.7035217285156, "logps/rejected": -261.7035217285156, "loss": 0.5701, "rewards/accuracies": 0.0, "rewards/chosen": -2.040391683578491, "rewards/margins": 0.0, "rewards/rejected": -2.040391683578491, "step": 6548 }, { "epoch": 1.45, "learning_rate": 9.975371514956945e-06, "logits/chosen": -1.1114392280578613, "logits/rejected": -1.1174650192260742, "logps/chosen": -74.28221893310547, "logps/rejected": -66.35130310058594, "loss": 0.1631, "rewards/accuracies": 1.0, "rewards/chosen": -4.543148994445801, "rewards/margins": 0.9550771713256836, "rewards/rejected": -5.498226165771484, "step": 6549 }, { "epoch": 1.45, "learning_rate": 9.975193518430127e-06, "logits/chosen": -1.1440716981887817, "logits/rejected": -1.2754982709884644, "logps/chosen": -228.06100463867188, "logps/rejected": -197.56414794921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.606892466545105, "rewards/margins": 11.011483192443848, "rewards/rejected": -12.618375778198242, "step": 6550 }, { "epoch": 1.45, "learning_rate": 9.9750148826029e-06, "logits/chosen": -1.531776785850525, "logits/rejected": -1.5229628086090088, "logps/chosen": -71.24508666992188, "logps/rejected": -124.01782989501953, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": 1.096967339515686, "rewards/margins": 4.860633850097656, "rewards/rejected": -3.7636666297912598, "step": 6551 }, { "epoch": 1.45, "learning_rate": 9.974835607498224e-06, "logits/chosen": -1.2442007064819336, "logits/rejected": -1.2442007064819336, "logps/chosen": -96.31812286376953, "logps/rejected": -96.31812286376953, "loss": 0.3492, "rewards/accuracies": 0.0, "rewards/chosen": -7.2672438621521, "rewards/margins": 0.0, "rewards/rejected": -7.2672438621521, "step": 6552 }, { "epoch": 1.45, "learning_rate": 9.97465569313913e-06, "logits/chosen": -1.5859583616256714, "logits/rejected": -1.5859583616256714, "logps/chosen": -163.23928833007812, "logps/rejected": -163.23928833007812, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.839988708496094, "rewards/margins": 0.0, "rewards/rejected": -5.839988708496094, "step": 6553 }, { "epoch": 1.45, "learning_rate": 9.974475139548738e-06, "logits/chosen": -1.3513188362121582, "logits/rejected": -1.2670482397079468, "logps/chosen": -141.19711303710938, "logps/rejected": -278.05059814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.10702209919691086, "rewards/margins": 9.300383567810059, "rewards/rejected": -9.193361282348633, "step": 6554 }, { "epoch": 1.45, "learning_rate": 9.97429394675025e-06, "logits/chosen": -1.540073037147522, "logits/rejected": -1.5294908285140991, "logps/chosen": -110.956298828125, "logps/rejected": -214.14990234375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.3935928344726562, "rewards/margins": 7.029484748840332, "rewards/rejected": -8.423077583312988, "step": 6555 }, { "epoch": 1.45, "learning_rate": 9.974112114766945e-06, "logits/chosen": -1.4299864768981934, "logits/rejected": -1.4267520904541016, "logps/chosen": -149.61355590820312, "logps/rejected": -398.84478759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.1997833251953125, "rewards/margins": 21.131698608398438, "rewards/rejected": -18.931915283203125, "step": 6556 }, { "epoch": 1.45, "learning_rate": 9.973929643622194e-06, "logits/chosen": -1.3981026411056519, "logits/rejected": -1.3205504417419434, "logps/chosen": -122.60848236083984, "logps/rejected": -270.63543701171875, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": -0.5560615658760071, "rewards/margins": 3.1884102821350098, "rewards/rejected": -3.744471788406372, "step": 6557 }, { "epoch": 1.45, "learning_rate": 9.973746533339438e-06, "logits/chosen": -1.3665639162063599, "logits/rejected": -1.3411853313446045, "logps/chosen": -116.62358093261719, "logps/rejected": -130.7144775390625, "loss": 0.5814, "rewards/accuracies": 1.0, "rewards/chosen": -5.003568172454834, "rewards/margins": 0.5119991302490234, "rewards/rejected": -5.515567302703857, "step": 6558 }, { "epoch": 1.45, "learning_rate": 9.97356278394221e-06, "logits/chosen": -1.5440558195114136, "logits/rejected": -1.4594229459762573, "logps/chosen": -103.27424621582031, "logps/rejected": -201.833740234375, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.7441024780273438, "rewards/margins": 3.9511094093322754, "rewards/rejected": -5.695211887359619, "step": 6559 }, { "epoch": 1.45, "learning_rate": 9.973378395454121e-06, "logits/chosen": -1.2510571479797363, "logits/rejected": -1.2400115728378296, "logps/chosen": -106.7303466796875, "logps/rejected": -149.91632080078125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.193185567855835, "rewards/margins": 5.508048057556152, "rewards/rejected": -8.701233863830566, "step": 6560 }, { "epoch": 1.45, "learning_rate": 9.973193367898863e-06, "logits/chosen": -1.1522666215896606, "logits/rejected": -1.2453283071517944, "logps/chosen": -250.05751037597656, "logps/rejected": -280.09661865234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 2.7454025745391846, "rewards/margins": 6.229454040527344, "rewards/rejected": -3.484051465988159, "step": 6561 }, { "epoch": 1.45, "learning_rate": 9.973007701300214e-06, "logits/chosen": -1.3301095962524414, "logits/rejected": -1.3613563776016235, "logps/chosen": -217.73348999023438, "logps/rejected": -611.605712890625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -11.008127212524414, "rewards/margins": 41.85186767578125, "rewards/rejected": -52.85999298095703, "step": 6562 }, { "epoch": 1.45, "learning_rate": 9.972821395682029e-06, "logits/chosen": -1.1646462678909302, "logits/rejected": -1.146051287651062, "logps/chosen": -158.8160400390625, "logps/rejected": -177.70645141601562, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -6.955986022949219, "rewards/margins": 3.820021629333496, "rewards/rejected": -10.776007652282715, "step": 6563 }, { "epoch": 1.45, "learning_rate": 9.972634451068248e-06, "logits/chosen": -1.4359220266342163, "logits/rejected": -0.8486073017120361, "logps/chosen": -209.5434112548828, "logps/rejected": -841.5184326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.493450880050659, "rewards/margins": 63.87349319458008, "rewards/rejected": -61.380043029785156, "step": 6564 }, { "epoch": 1.45, "learning_rate": 9.972446867482896e-06, "logits/chosen": -1.2751080989837646, "logits/rejected": -1.212929606437683, "logps/chosen": -106.8222427368164, "logps/rejected": -209.48788452148438, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.1431801319122314, "rewards/margins": 5.376535415649414, "rewards/rejected": -6.519715785980225, "step": 6565 }, { "epoch": 1.45, "learning_rate": 9.972258644950074e-06, "logits/chosen": -1.0204533338546753, "logits/rejected": -0.9794567227363586, "logps/chosen": -104.59614562988281, "logps/rejected": -207.63592529296875, "loss": 0.2073, "rewards/accuracies": 1.0, "rewards/chosen": -1.7034409046173096, "rewards/margins": 0.6661543846130371, "rewards/rejected": -2.3695952892303467, "step": 6566 }, { "epoch": 1.45, "learning_rate": 9.97206978349397e-06, "logits/chosen": -1.193648338317871, "logits/rejected": -1.2470375299453735, "logps/chosen": -138.53045654296875, "logps/rejected": -134.39053344726562, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 1.1488555669784546, "rewards/margins": 6.17339563369751, "rewards/rejected": -5.024539947509766, "step": 6567 }, { "epoch": 1.45, "learning_rate": 9.971880283138849e-06, "logits/chosen": -1.8207011222839355, "logits/rejected": -1.8406307697296143, "logps/chosen": -142.7820281982422, "logps/rejected": -166.21939086914062, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.4956650733947754, "rewards/margins": 5.464659214019775, "rewards/rejected": -7.960324287414551, "step": 6568 }, { "epoch": 1.45, "learning_rate": 9.971690143909066e-06, "logits/chosen": -1.3112587928771973, "logits/rejected": -1.1590309143066406, "logps/chosen": -189.09701538085938, "logps/rejected": -355.72064208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 5.899267673492432, "rewards/margins": 13.446887969970703, "rewards/rejected": -7.547619819641113, "step": 6569 }, { "epoch": 1.45, "learning_rate": 9.971499365829049e-06, "logits/chosen": -1.1053760051727295, "logits/rejected": -1.100582242012024, "logps/chosen": -90.45590209960938, "logps/rejected": -185.002197265625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -2.1945817470550537, "rewards/margins": 5.709904670715332, "rewards/rejected": -7.904486179351807, "step": 6570 }, { "epoch": 1.45, "learning_rate": 9.971307948923316e-06, "logits/chosen": -1.6654455661773682, "logits/rejected": -1.688100814819336, "logps/chosen": -121.76300048828125, "logps/rejected": -110.69329833984375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.0580811500549316, "rewards/margins": 5.284695148468018, "rewards/rejected": -8.34277629852295, "step": 6571 }, { "epoch": 1.45, "learning_rate": 9.971115893216463e-06, "logits/chosen": -1.3648998737335205, "logits/rejected": -1.3572829961776733, "logps/chosen": -125.47616577148438, "logps/rejected": -117.074951171875, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.5361725091934204, "rewards/margins": 3.983654022216797, "rewards/rejected": -4.519826412200928, "step": 6572 }, { "epoch": 1.45, "learning_rate": 9.970923198733167e-06, "logits/chosen": -1.5700232982635498, "logits/rejected": -1.6061514616012573, "logps/chosen": -92.91411590576172, "logps/rejected": -99.65994262695312, "loss": 0.0315, "rewards/accuracies": 1.0, "rewards/chosen": -4.520604610443115, "rewards/margins": 3.178384780883789, "rewards/rejected": -7.698989391326904, "step": 6573 }, { "epoch": 1.46, "learning_rate": 9.97072986549819e-06, "logits/chosen": -1.3649441003799438, "logits/rejected": -1.3760390281677246, "logps/chosen": -137.15870666503906, "logps/rejected": -172.52392578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.3318130671977997, "rewards/margins": 7.354370594024658, "rewards/rejected": -7.686183452606201, "step": 6574 }, { "epoch": 1.46, "learning_rate": 9.970535893536375e-06, "logits/chosen": -1.424691915512085, "logits/rejected": -1.3953754901885986, "logps/chosen": -139.9710235595703, "logps/rejected": -190.4877471923828, "loss": 0.1017, "rewards/accuracies": 1.0, "rewards/chosen": -4.558568000793457, "rewards/margins": 4.517865180969238, "rewards/rejected": -9.076433181762695, "step": 6575 }, { "epoch": 1.46, "learning_rate": 9.970341282872645e-06, "logits/chosen": -1.2082161903381348, "logits/rejected": -1.3155710697174072, "logps/chosen": -194.65695190429688, "logps/rejected": -271.588134765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.6335739493370056, "rewards/margins": 14.114619255065918, "rewards/rejected": -13.481045722961426, "step": 6576 }, { "epoch": 1.46, "learning_rate": 9.97014603353201e-06, "logits/chosen": -1.5164506435394287, "logits/rejected": -1.5673813819885254, "logps/chosen": -119.043701171875, "logps/rejected": -98.52477264404297, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -3.758108615875244, "rewards/margins": 4.288151264190674, "rewards/rejected": -8.046259880065918, "step": 6577 }, { "epoch": 1.46, "learning_rate": 9.969950145539557e-06, "logits/chosen": -1.4113366603851318, "logits/rejected": -1.443898320198059, "logps/chosen": -123.55860137939453, "logps/rejected": -160.48468017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.4247642755508423, "rewards/margins": 12.172236442565918, "rewards/rejected": -10.747471809387207, "step": 6578 }, { "epoch": 1.46, "learning_rate": 9.969753618920456e-06, "logits/chosen": -1.644524097442627, "logits/rejected": -1.6253911256790161, "logps/chosen": -114.99546813964844, "logps/rejected": -148.12130737304688, "loss": 0.444, "rewards/accuracies": 1.0, "rewards/chosen": -1.7703392505645752, "rewards/margins": 1.5366013050079346, "rewards/rejected": -3.3069405555725098, "step": 6579 }, { "epoch": 1.46, "learning_rate": 9.969556453699966e-06, "logits/chosen": -1.713374137878418, "logits/rejected": -0.7520344257354736, "logps/chosen": -153.42105102539062, "logps/rejected": -1181.3629150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.414952278137207, "rewards/margins": 97.9945297241211, "rewards/rejected": -103.40948486328125, "step": 6580 }, { "epoch": 1.46, "learning_rate": 9.969358649903415e-06, "logits/chosen": -1.0368562936782837, "logits/rejected": -0.9337908625602722, "logps/chosen": -163.06585693359375, "logps/rejected": -297.4377136230469, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 0.4956466853618622, "rewards/margins": 5.0290632247924805, "rewards/rejected": -4.533416748046875, "step": 6581 }, { "epoch": 1.46, "learning_rate": 9.969160207556225e-06, "logits/chosen": -1.5979653596878052, "logits/rejected": -1.5652236938476562, "logps/chosen": -126.81832122802734, "logps/rejected": -178.70870971679688, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.8906394839286804, "rewards/margins": 6.1908440589904785, "rewards/rejected": -7.081483364105225, "step": 6582 }, { "epoch": 1.46, "learning_rate": 9.968961126683893e-06, "logits/chosen": -1.1965504884719849, "logits/rejected": -1.1737420558929443, "logps/chosen": -132.24322509765625, "logps/rejected": -124.53424072265625, "loss": 0.0765, "rewards/accuracies": 1.0, "rewards/chosen": -3.4019806385040283, "rewards/margins": 1.8019707202911377, "rewards/rejected": -5.203951358795166, "step": 6583 }, { "epoch": 1.46, "learning_rate": 9.968761407312002e-06, "logits/chosen": -1.195093035697937, "logits/rejected": -1.2504781484603882, "logps/chosen": -197.77528381347656, "logps/rejected": -157.58543395996094, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 0.259194940328598, "rewards/margins": 4.9059157371521, "rewards/rejected": -4.646720886230469, "step": 6584 }, { "epoch": 1.46, "learning_rate": 9.968561049466214e-06, "logits/chosen": -1.6623965501785278, "logits/rejected": -1.6359663009643555, "logps/chosen": -135.30503845214844, "logps/rejected": -166.64788818359375, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.020625352859497, "rewards/margins": 4.7496232986450195, "rewards/rejected": -7.7702484130859375, "step": 6585 }, { "epoch": 1.46, "learning_rate": 9.968360053172275e-06, "logits/chosen": -1.3588123321533203, "logits/rejected": -1.4845669269561768, "logps/chosen": -175.08770751953125, "logps/rejected": -112.0448989868164, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 3.3192474842071533, "rewards/margins": 5.162297248840332, "rewards/rejected": -1.8430496454238892, "step": 6586 }, { "epoch": 1.46, "learning_rate": 9.968158418456013e-06, "logits/chosen": -1.6009739637374878, "logits/rejected": -1.276295781135559, "logps/chosen": -211.48367309570312, "logps/rejected": -544.7821655273438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.1106719970703125, "rewards/margins": 22.305280685424805, "rewards/rejected": -22.194608688354492, "step": 6587 }, { "epoch": 1.46, "learning_rate": 9.967956145343339e-06, "logits/chosen": -1.4294687509536743, "logits/rejected": -1.3981409072875977, "logps/chosen": -104.869140625, "logps/rejected": -216.85565185546875, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": -0.09123535454273224, "rewards/margins": 5.650946140289307, "rewards/rejected": -5.742181301116943, "step": 6588 }, { "epoch": 1.46, "learning_rate": 9.96775323386024e-06, "logits/chosen": -1.5680843591690063, "logits/rejected": -1.5341905355453491, "logps/chosen": -114.59735107421875, "logps/rejected": -167.2334442138672, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.28206712007522583, "rewards/margins": 8.038312911987305, "rewards/rejected": -7.7562456130981445, "step": 6589 }, { "epoch": 1.46, "learning_rate": 9.967549684032796e-06, "logits/chosen": -1.344834804534912, "logits/rejected": -1.344834804534912, "logps/chosen": -85.85165405273438, "logps/rejected": -85.85165405273438, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -3.223057508468628, "rewards/margins": 0.0, "rewards/rejected": -3.223057508468628, "step": 6590 }, { "epoch": 1.46, "learning_rate": 9.967345495887157e-06, "logits/chosen": -1.4437323808670044, "logits/rejected": -1.576041579246521, "logps/chosen": -288.70166015625, "logps/rejected": -262.37445068359375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.784701824188232, "rewards/margins": 6.780041217803955, "rewards/rejected": -11.564743041992188, "step": 6591 }, { "epoch": 1.46, "learning_rate": 9.967140669449562e-06, "logits/chosen": -1.2186546325683594, "logits/rejected": -1.2287043333053589, "logps/chosen": -188.25149536132812, "logps/rejected": -150.3993682861328, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": -0.6009125113487244, "rewards/margins": 2.1448981761932373, "rewards/rejected": -2.7458107471466064, "step": 6592 }, { "epoch": 1.46, "learning_rate": 9.966935204746332e-06, "logits/chosen": -1.012589693069458, "logits/rejected": -0.9250569343566895, "logps/chosen": -57.95628356933594, "logps/rejected": -157.77830505371094, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.9207969903945923, "rewards/margins": 9.421735763549805, "rewards/rejected": -11.342533111572266, "step": 6593 }, { "epoch": 1.46, "learning_rate": 9.966729101803872e-06, "logits/chosen": -1.1893935203552246, "logits/rejected": -1.217185616493225, "logps/chosen": -186.7102508544922, "logps/rejected": -153.1223907470703, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.012054443359375, "rewards/margins": 11.12881088256836, "rewards/rejected": -10.116756439208984, "step": 6594 }, { "epoch": 1.46, "learning_rate": 9.966522360648659e-06, "logits/chosen": -1.1932295560836792, "logits/rejected": -1.1821657419204712, "logps/chosen": -263.15887451171875, "logps/rejected": -227.898681640625, "loss": 0.079, "rewards/accuracies": 1.0, "rewards/chosen": -10.966313362121582, "rewards/margins": 1.7648296356201172, "rewards/rejected": -12.7311429977417, "step": 6595 }, { "epoch": 1.46, "learning_rate": 9.966314981307261e-06, "logits/chosen": -1.0625392198562622, "logits/rejected": -0.9656500816345215, "logps/chosen": -173.97047424316406, "logps/rejected": -355.68896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8987335562705994, "rewards/margins": 9.571664810180664, "rewards/rejected": -10.47039794921875, "step": 6596 }, { "epoch": 1.46, "learning_rate": 9.96610696380633e-06, "logits/chosen": -1.887173056602478, "logits/rejected": -1.785996437072754, "logps/chosen": -108.69151306152344, "logps/rejected": -234.4237060546875, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": 0.4357971251010895, "rewards/margins": 3.585402011871338, "rewards/rejected": -3.1496047973632812, "step": 6597 }, { "epoch": 1.46, "learning_rate": 9.965898308172589e-06, "logits/chosen": -1.22360360622406, "logits/rejected": -1.230264663696289, "logps/chosen": -144.69268798828125, "logps/rejected": -207.36306762695312, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -1.96490478515625, "rewards/margins": 8.524614334106445, "rewards/rejected": -10.489519119262695, "step": 6598 }, { "epoch": 1.46, "learning_rate": 9.965689014432854e-06, "logits/chosen": -1.2576907873153687, "logits/rejected": -1.153792142868042, "logps/chosen": -150.54994201660156, "logps/rejected": -367.37591552734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7529312372207642, "rewards/margins": 15.420299530029297, "rewards/rejected": -16.17323112487793, "step": 6599 }, { "epoch": 1.46, "learning_rate": 9.965479082614019e-06, "logits/chosen": -1.4904636144638062, "logits/rejected": -1.4928373098373413, "logps/chosen": -92.97689819335938, "logps/rejected": -170.55970764160156, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.4157562255859375, "rewards/margins": 7.634762763977051, "rewards/rejected": -8.050518989562988, "step": 6600 }, { "epoch": 1.46, "learning_rate": 9.965268512743058e-06, "logits/chosen": -1.2512482404708862, "logits/rejected": -1.2276661396026611, "logps/chosen": -221.2379608154297, "logps/rejected": -178.9285888671875, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 3.9966049194335938, "rewards/margins": 6.803564548492432, "rewards/rejected": -2.806959629058838, "step": 6601 }, { "epoch": 1.46, "learning_rate": 9.965057304847029e-06, "logits/chosen": -1.425122618675232, "logits/rejected": -1.276686668395996, "logps/chosen": -139.49813842773438, "logps/rejected": -274.0809326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 4.6316237449646, "rewards/margins": 16.787967681884766, "rewards/rejected": -12.156344413757324, "step": 6602 }, { "epoch": 1.46, "learning_rate": 9.964845458953072e-06, "logits/chosen": -1.5177141427993774, "logits/rejected": -1.5164625644683838, "logps/chosen": -188.79869079589844, "logps/rejected": -207.9769287109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.145091250538826, "rewards/margins": 6.2885637283325195, "rewards/rejected": -6.43365478515625, "step": 6603 }, { "epoch": 1.46, "learning_rate": 9.964632975088408e-06, "logits/chosen": -1.892460823059082, "logits/rejected": -1.9147543907165527, "logps/chosen": -97.66957092285156, "logps/rejected": -138.91925048828125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.7763175964355469, "rewards/margins": 5.8301544189453125, "rewards/rejected": -7.606472015380859, "step": 6604 }, { "epoch": 1.46, "learning_rate": 9.964419853280343e-06, "logits/chosen": -1.6126736402511597, "logits/rejected": -1.6326261758804321, "logps/chosen": -151.99147033691406, "logps/rejected": -153.4410400390625, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -4.21727991104126, "rewards/margins": 3.0148468017578125, "rewards/rejected": -7.232126712799072, "step": 6605 }, { "epoch": 1.46, "learning_rate": 9.96420609355626e-06, "logits/chosen": -1.2295610904693604, "logits/rejected": -1.2307302951812744, "logps/chosen": -165.25584411621094, "logps/rejected": -167.68988037109375, "loss": 1.0682, "rewards/accuracies": 0.0, "rewards/chosen": -5.357926845550537, "rewards/margins": -2.0107131004333496, "rewards/rejected": -3.3472137451171875, "step": 6606 }, { "epoch": 1.46, "learning_rate": 9.963991695943627e-06, "logits/chosen": -1.3691489696502686, "logits/rejected": -1.5106669664382935, "logps/chosen": -221.05377197265625, "logps/rejected": -225.7687530517578, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.2723572254180908, "rewards/margins": 9.486000061035156, "rewards/rejected": -8.213643074035645, "step": 6607 }, { "epoch": 1.46, "learning_rate": 9.963776660469996e-06, "logits/chosen": -1.5517181158065796, "logits/rejected": -0.9275338649749756, "logps/chosen": -142.4651336669922, "logps/rejected": -786.5609741210938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.8247056007385254, "rewards/margins": 51.51005935668945, "rewards/rejected": -54.33476638793945, "step": 6608 }, { "epoch": 1.46, "learning_rate": 9.963560987162994e-06, "logits/chosen": -0.8853309750556946, "logits/rejected": -0.8639360070228577, "logps/chosen": -96.32557678222656, "logps/rejected": -207.65155029296875, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -1.1559066772460938, "rewards/margins": 12.392900466918945, "rewards/rejected": -13.548807144165039, "step": 6609 }, { "epoch": 1.46, "learning_rate": 9.96334467605034e-06, "logits/chosen": -1.434491753578186, "logits/rejected": -1.534975290298462, "logps/chosen": -186.8780059814453, "logps/rejected": -183.6063690185547, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.2898605465888977, "rewards/margins": 6.965980052947998, "rewards/rejected": -6.676119327545166, "step": 6610 }, { "epoch": 1.46, "learning_rate": 9.963127727159825e-06, "logits/chosen": -1.41049325466156, "logits/rejected": -1.2790907621383667, "logps/chosen": -107.17543029785156, "logps/rejected": -257.00921630859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.4725593626499176, "rewards/margins": 12.106294631958008, "rewards/rejected": -11.633735656738281, "step": 6611 }, { "epoch": 1.46, "learning_rate": 9.962910140519328e-06, "logits/chosen": -1.03932785987854, "logits/rejected": -1.0571930408477783, "logps/chosen": -199.48863220214844, "logps/rejected": -271.29327392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.3343521356582642, "rewards/margins": 9.708515167236328, "rewards/rejected": -8.374162673950195, "step": 6612 }, { "epoch": 1.46, "learning_rate": 9.96269191615681e-06, "logits/chosen": -1.0443944931030273, "logits/rejected": -1.1366021633148193, "logps/chosen": -355.940185546875, "logps/rejected": -254.1949462890625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.339080810546875, "rewards/margins": 7.044641494750977, "rewards/rejected": -12.383722305297852, "step": 6613 }, { "epoch": 1.46, "learning_rate": 9.96247305410031e-06, "logits/chosen": -1.4518309831619263, "logits/rejected": -1.4135991334915161, "logps/chosen": -81.31361389160156, "logps/rejected": -130.14117431640625, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 1.1294571161270142, "rewards/margins": 4.12606954574585, "rewards/rejected": -2.996612548828125, "step": 6614 }, { "epoch": 1.46, "learning_rate": 9.962253554377952e-06, "logits/chosen": -1.0466268062591553, "logits/rejected": -1.1503747701644897, "logps/chosen": -214.41497802734375, "logps/rejected": -116.75178527832031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.706372022628784, "rewards/margins": 10.026838302612305, "rewards/rejected": -7.3204665184021, "step": 6615 }, { "epoch": 1.46, "learning_rate": 9.96203341701794e-06, "logits/chosen": -1.6618943214416504, "logits/rejected": -1.7632670402526855, "logps/chosen": -137.33706665039062, "logps/rejected": -211.2362060546875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.285736083984375, "rewards/margins": 10.094035148620605, "rewards/rejected": -12.37977123260498, "step": 6616 }, { "epoch": 1.46, "learning_rate": 9.961812642048563e-06, "logits/chosen": -1.3078291416168213, "logits/rejected": -1.3078291416168213, "logps/chosen": -119.57498931884766, "logps/rejected": -119.57498931884766, "loss": 0.3505, "rewards/accuracies": 0.0, "rewards/chosen": -6.148895740509033, "rewards/margins": 0.0, "rewards/rejected": -6.148895740509033, "step": 6617 }, { "epoch": 1.46, "learning_rate": 9.961591229498192e-06, "logits/chosen": -1.173835039138794, "logits/rejected": -1.079508662223816, "logps/chosen": -146.03924560546875, "logps/rejected": -132.28665161132812, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -2.634871006011963, "rewards/margins": 3.2980971336364746, "rewards/rejected": -5.9329681396484375, "step": 6618 }, { "epoch": 1.47, "learning_rate": 9.96136917939527e-06, "logits/chosen": -1.3590222597122192, "logits/rejected": -1.3657283782958984, "logps/chosen": -145.47314453125, "logps/rejected": -294.61529541015625, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": 0.5729400515556335, "rewards/margins": 15.89225959777832, "rewards/rejected": -15.319319725036621, "step": 6619 }, { "epoch": 1.47, "learning_rate": 9.961146491768338e-06, "logits/chosen": -1.257765531539917, "logits/rejected": -1.225376009941101, "logps/chosen": -161.129638671875, "logps/rejected": -194.3417205810547, "loss": 0.0503, "rewards/accuracies": 1.0, "rewards/chosen": 1.4173096418380737, "rewards/margins": 2.2453248500823975, "rewards/rejected": -0.828015148639679, "step": 6620 }, { "epoch": 1.47, "learning_rate": 9.96092316664601e-06, "logits/chosen": -1.4308249950408936, "logits/rejected": -1.3928886651992798, "logps/chosen": -184.76596069335938, "logps/rejected": -174.63499450683594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.701007127761841, "rewards/margins": 11.74073600769043, "rewards/rejected": -9.039729118347168, "step": 6621 }, { "epoch": 1.47, "learning_rate": 9.960699204056978e-06, "logits/chosen": -1.2741141319274902, "logits/rejected": -1.0040301084518433, "logps/chosen": -129.1375732421875, "logps/rejected": -581.20703125, "loss": 0.0797, "rewards/accuracies": 1.0, "rewards/chosen": -2.963045597076416, "rewards/margins": 26.44154167175293, "rewards/rejected": -29.404586791992188, "step": 6622 }, { "epoch": 1.47, "learning_rate": 9.960474604030026e-06, "logits/chosen": -1.6359081268310547, "logits/rejected": -1.6391019821166992, "logps/chosen": -108.06266784667969, "logps/rejected": -96.67564392089844, "loss": 0.1053, "rewards/accuracies": 1.0, "rewards/chosen": -2.2531495094299316, "rewards/margins": 1.4501392841339111, "rewards/rejected": -3.7032887935638428, "step": 6623 }, { "epoch": 1.47, "learning_rate": 9.96024936659401e-06, "logits/chosen": -1.7013894319534302, "logits/rejected": -1.7464847564697266, "logps/chosen": -177.16415405273438, "logps/rejected": -193.62405395507812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 2.8442490100860596, "rewards/margins": 13.423112869262695, "rewards/rejected": -10.578864097595215, "step": 6624 }, { "epoch": 1.47, "learning_rate": 9.960023491777875e-06, "logits/chosen": -1.5777074098587036, "logits/rejected": -1.0797654390335083, "logps/chosen": -123.53130340576172, "logps/rejected": -791.2440185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.709338426589966, "rewards/margins": 54.70759201049805, "rewards/rejected": -58.41693115234375, "step": 6625 }, { "epoch": 1.47, "learning_rate": 9.959796979610646e-06, "logits/chosen": -1.5013397932052612, "logits/rejected": -1.3655481338500977, "logps/chosen": -133.71507263183594, "logps/rejected": -299.22979736328125, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -3.003948926925659, "rewards/margins": 4.519961357116699, "rewards/rejected": -7.5239105224609375, "step": 6626 }, { "epoch": 1.47, "learning_rate": 9.959569830121427e-06, "logits/chosen": -1.8183637857437134, "logits/rejected": -1.89650559425354, "logps/chosen": -104.82246398925781, "logps/rejected": -123.7490005493164, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.276998996734619, "rewards/margins": 5.171139717102051, "rewards/rejected": -7.44813871383667, "step": 6627 }, { "epoch": 1.47, "learning_rate": 9.959342043339406e-06, "logits/chosen": -1.178235650062561, "logits/rejected": -1.1347278356552124, "logps/chosen": -88.20567321777344, "logps/rejected": -133.2750244140625, "loss": 0.3501, "rewards/accuracies": 1.0, "rewards/chosen": 0.00296783447265625, "rewards/margins": 4.9427924156188965, "rewards/rejected": -4.93982458114624, "step": 6628 }, { "epoch": 1.47, "learning_rate": 9.959113619293857e-06, "logits/chosen": -1.4913532733917236, "logits/rejected": -1.4581403732299805, "logps/chosen": -117.76368713378906, "logps/rejected": -252.96005249023438, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.9907807111740112, "rewards/margins": 8.356112480163574, "rewards/rejected": -10.346893310546875, "step": 6629 }, { "epoch": 1.47, "learning_rate": 9.958884558014128e-06, "logits/chosen": -1.261128306388855, "logits/rejected": -1.2047529220581055, "logps/chosen": -228.8101806640625, "logps/rejected": -346.16448974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.377816766500473, "rewards/margins": 11.241290092468262, "rewards/rejected": -11.619107246398926, "step": 6630 }, { "epoch": 1.47, "learning_rate": 9.958654859529654e-06, "logits/chosen": -1.4484772682189941, "logits/rejected": -1.2805505990982056, "logps/chosen": -113.30911254882812, "logps/rejected": -282.041259765625, "loss": 0.0886, "rewards/accuracies": 1.0, "rewards/chosen": -4.264993190765381, "rewards/margins": 1.6604127883911133, "rewards/rejected": -5.925405979156494, "step": 6631 }, { "epoch": 1.47, "learning_rate": 9.958424523869952e-06, "logits/chosen": -1.9232511520385742, "logits/rejected": -1.8578778505325317, "logps/chosen": -109.35771179199219, "logps/rejected": -206.45201110839844, "loss": 0.3263, "rewards/accuracies": 1.0, "rewards/chosen": -5.34633731842041, "rewards/margins": 2.25565242767334, "rewards/rejected": -7.60198974609375, "step": 6632 }, { "epoch": 1.47, "learning_rate": 9.958193551064617e-06, "logits/chosen": -1.6395633220672607, "logits/rejected": -1.5673125982284546, "logps/chosen": -202.80767822265625, "logps/rejected": -253.92959594726562, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.5892486572265625, "rewards/margins": 7.605295181274414, "rewards/rejected": -13.194543838500977, "step": 6633 }, { "epoch": 1.47, "learning_rate": 9.95796194114333e-06, "logits/chosen": -1.2373591661453247, "logits/rejected": -1.2649401426315308, "logps/chosen": -175.69082641601562, "logps/rejected": -119.44192504882812, "loss": 0.0337, "rewards/accuracies": 1.0, "rewards/chosen": -6.201623439788818, "rewards/margins": 2.764024257659912, "rewards/rejected": -8.96564769744873, "step": 6634 }, { "epoch": 1.47, "learning_rate": 9.957729694135852e-06, "logits/chosen": -1.6503385305404663, "logits/rejected": -1.6598162651062012, "logps/chosen": -103.57434844970703, "logps/rejected": -128.97972106933594, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.7258102893829346, "rewards/margins": 5.3087873458862305, "rewards/rejected": -8.034597396850586, "step": 6635 }, { "epoch": 1.47, "learning_rate": 9.957496810072027e-06, "logits/chosen": -1.3986302614212036, "logits/rejected": -1.392710566520691, "logps/chosen": -136.26263427734375, "logps/rejected": -177.09095764160156, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.0029174804221838713, "rewards/margins": 6.53877067565918, "rewards/rejected": -6.541687965393066, "step": 6636 }, { "epoch": 1.47, "learning_rate": 9.957263288981779e-06, "logits/chosen": -1.2241168022155762, "logits/rejected": -1.2068551778793335, "logps/chosen": -109.47468566894531, "logps/rejected": -239.64736938476562, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.0610597133636475, "rewards/margins": 6.489044189453125, "rewards/rejected": -8.550104141235352, "step": 6637 }, { "epoch": 1.47, "learning_rate": 9.957029130895116e-06, "logits/chosen": -1.6435729265213013, "logits/rejected": -1.605442762374878, "logps/chosen": -124.00565338134766, "logps/rejected": -284.53741455078125, "loss": 0.3487, "rewards/accuracies": 1.0, "rewards/chosen": -5.304009437561035, "rewards/margins": 5.462342262268066, "rewards/rejected": -10.766351699829102, "step": 6638 }, { "epoch": 1.47, "learning_rate": 9.956794335842126e-06, "logits/chosen": -1.1229617595672607, "logits/rejected": -1.1946872472763062, "logps/chosen": -135.2695770263672, "logps/rejected": -87.93302917480469, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 1.4539505243301392, "rewards/margins": 6.97254753112793, "rewards/rejected": -5.51859712600708, "step": 6639 }, { "epoch": 1.47, "learning_rate": 9.956558903852978e-06, "logits/chosen": -1.2444382905960083, "logits/rejected": -1.286975383758545, "logps/chosen": -311.358154296875, "logps/rejected": -285.0015869140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.8768768310546875, "rewards/margins": 6.042375564575195, "rewards/rejected": -8.919252395629883, "step": 6640 }, { "epoch": 1.47, "learning_rate": 9.956322834957929e-06, "logits/chosen": -1.5257785320281982, "logits/rejected": -1.5000526905059814, "logps/chosen": -98.1970443725586, "logps/rejected": -202.8974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.10926742851734161, "rewards/margins": 9.856942176818848, "rewards/rejected": -9.747674942016602, "step": 6641 }, { "epoch": 1.47, "learning_rate": 9.956086129187308e-06, "logits/chosen": -1.5857691764831543, "logits/rejected": -1.5619953870773315, "logps/chosen": -125.70801544189453, "logps/rejected": -159.40289306640625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.5773048400878906, "rewards/margins": 4.566590309143066, "rewards/rejected": -5.143895149230957, "step": 6642 }, { "epoch": 1.47, "learning_rate": 9.955848786571534e-06, "logits/chosen": -1.6501572132110596, "logits/rejected": -1.7358921766281128, "logps/chosen": -142.12088012695312, "logps/rejected": -101.12212371826172, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.458392381668091, "rewards/margins": 5.4586896896362305, "rewards/rejected": -7.9170823097229, "step": 6643 }, { "epoch": 1.47, "learning_rate": 9.955610807141105e-06, "logits/chosen": -1.2955362796783447, "logits/rejected": -1.2985255718231201, "logps/chosen": -254.0803680419922, "logps/rejected": -414.37445068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.107963562011719, "rewards/margins": 19.19350814819336, "rewards/rejected": -24.301471710205078, "step": 6644 }, { "epoch": 1.47, "learning_rate": 9.9553721909266e-06, "logits/chosen": -1.3574782609939575, "logits/rejected": -1.2036641836166382, "logps/chosen": -134.77830505371094, "logps/rejected": -283.5834655761719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.9616714715957642, "rewards/margins": 9.101676940917969, "rewards/rejected": -7.140005588531494, "step": 6645 }, { "epoch": 1.47, "learning_rate": 9.95513293795868e-06, "logits/chosen": -1.3073712587356567, "logits/rejected": -1.2938681840896606, "logps/chosen": -170.76068115234375, "logps/rejected": -183.46322631835938, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": -4.787500858306885, "rewards/margins": 2.841585636138916, "rewards/rejected": -7.629086494445801, "step": 6646 }, { "epoch": 1.47, "learning_rate": 9.95489304826809e-06, "logits/chosen": -1.459079384803772, "logits/rejected": -1.4124523401260376, "logps/chosen": -80.10808563232422, "logps/rejected": -160.80526733398438, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -1.4092658758163452, "rewards/margins": 3.905430793762207, "rewards/rejected": -5.314696788787842, "step": 6647 }, { "epoch": 1.47, "learning_rate": 9.954652521885656e-06, "logits/chosen": -1.2978794574737549, "logits/rejected": -1.2621302604675293, "logps/chosen": -111.3260498046875, "logps/rejected": -182.36492919921875, "loss": 0.3369, "rewards/accuracies": 1.0, "rewards/chosen": -2.706831455230713, "rewards/margins": 0.03915548324584961, "rewards/rejected": -2.7459869384765625, "step": 6648 }, { "epoch": 1.47, "learning_rate": 9.954411358842282e-06, "logits/chosen": -1.072744607925415, "logits/rejected": -1.0032002925872803, "logps/chosen": -98.68513488769531, "logps/rejected": -259.2761535644531, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -0.9302719235420227, "rewards/margins": 12.9681978225708, "rewards/rejected": -13.898469924926758, "step": 6649 }, { "epoch": 1.47, "learning_rate": 9.954169559168958e-06, "logits/chosen": -1.2671105861663818, "logits/rejected": -1.1722688674926758, "logps/chosen": -102.69207763671875, "logps/rejected": -238.26040649414062, "loss": 0.0514, "rewards/accuracies": 1.0, "rewards/chosen": -4.271764278411865, "rewards/margins": 2.222921371459961, "rewards/rejected": -6.494685649871826, "step": 6650 }, { "epoch": 1.47, "learning_rate": 9.953927122896756e-06, "logits/chosen": -1.546265721321106, "logits/rejected": -1.546265721321106, "logps/chosen": -159.78793334960938, "logps/rejected": -159.78793334960938, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.837066650390625, "rewards/margins": 0.0, "rewards/rejected": -5.837066650390625, "step": 6651 }, { "epoch": 1.47, "learning_rate": 9.953684050056827e-06, "logits/chosen": -1.3672401905059814, "logits/rejected": -1.4492887258529663, "logps/chosen": -181.5946044921875, "logps/rejected": -121.63420104980469, "loss": 1.2712, "rewards/accuracies": 0.0, "rewards/chosen": -4.544595241546631, "rewards/margins": -2.45986008644104, "rewards/rejected": -2.084735155105591, "step": 6652 }, { "epoch": 1.47, "learning_rate": 9.953440340680407e-06, "logits/chosen": -1.6896743774414062, "logits/rejected": -1.8065236806869507, "logps/chosen": -181.51553344726562, "logps/rejected": -186.98150634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.3644837141036987, "rewards/margins": 10.701433181762695, "rewards/rejected": -9.336949348449707, "step": 6653 }, { "epoch": 1.47, "learning_rate": 9.95319599479881e-06, "logits/chosen": -1.3471485376358032, "logits/rejected": -1.3011106252670288, "logps/chosen": -116.15192413330078, "logps/rejected": -315.9845275878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8759819269180298, "rewards/margins": 12.31911849975586, "rewards/rejected": -14.195100784301758, "step": 6654 }, { "epoch": 1.47, "learning_rate": 9.952951012443434e-06, "logits/chosen": -1.2727510929107666, "logits/rejected": -1.2483490705490112, "logps/chosen": -92.72758483886719, "logps/rejected": -118.6185302734375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.353292852640152, "rewards/margins": 8.851838111877441, "rewards/rejected": -9.205130577087402, "step": 6655 }, { "epoch": 1.47, "learning_rate": 9.952705393645761e-06, "logits/chosen": -1.5361089706420898, "logits/rejected": -1.5361089706420898, "logps/chosen": -272.39471435546875, "logps/rejected": -272.39471435546875, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -14.246031761169434, "rewards/margins": 0.0, "rewards/rejected": -14.246031761169434, "step": 6656 }, { "epoch": 1.47, "learning_rate": 9.952459138437352e-06, "logits/chosen": -1.1987148523330688, "logits/rejected": -1.3358490467071533, "logps/chosen": -206.84307861328125, "logps/rejected": -199.93124389648438, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.534393310546875, "rewards/margins": 6.953003883361816, "rewards/rejected": -8.487397193908691, "step": 6657 }, { "epoch": 1.47, "learning_rate": 9.952212246849847e-06, "logits/chosen": -1.126136064529419, "logits/rejected": -1.1959495544433594, "logps/chosen": -216.37171936035156, "logps/rejected": -216.46646118164062, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.547651767730713, "rewards/margins": 8.013376235961914, "rewards/rejected": -10.561027526855469, "step": 6658 }, { "epoch": 1.47, "learning_rate": 9.951964718914972e-06, "logits/chosen": -1.4205571413040161, "logits/rejected": -1.4006818532943726, "logps/chosen": -139.0276641845703, "logps/rejected": -253.45706176757812, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.274079322814941, "rewards/margins": 7.266898155212402, "rewards/rejected": -11.540977478027344, "step": 6659 }, { "epoch": 1.47, "learning_rate": 9.951716554664537e-06, "logits/chosen": -1.3912931680679321, "logits/rejected": -1.3059254884719849, "logps/chosen": -222.23892211914062, "logps/rejected": -407.78759765625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": 1.3337310552597046, "rewards/margins": 11.499624252319336, "rewards/rejected": -10.1658935546875, "step": 6660 }, { "epoch": 1.47, "learning_rate": 9.951467754130429e-06, "logits/chosen": -1.2620500326156616, "logits/rejected": -1.204030990600586, "logps/chosen": -125.26744079589844, "logps/rejected": -204.62747192382812, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -3.7337844371795654, "rewards/margins": 4.5472612380981445, "rewards/rejected": -8.281045913696289, "step": 6661 }, { "epoch": 1.47, "learning_rate": 9.951218317344615e-06, "logits/chosen": -1.6882543563842773, "logits/rejected": -1.6896308660507202, "logps/chosen": -89.45030212402344, "logps/rejected": -217.8392333984375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 0.5298477411270142, "rewards/margins": 11.181879997253418, "rewards/rejected": -10.652031898498535, "step": 6662 }, { "epoch": 1.47, "learning_rate": 9.950968244339152e-06, "logits/chosen": -1.1479450464248657, "logits/rejected": -1.131638765335083, "logps/chosen": -228.6617431640625, "logps/rejected": -186.89431762695312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 4.556420803070068, "rewards/margins": 8.4473295211792, "rewards/rejected": -3.89090895652771, "step": 6663 }, { "epoch": 1.47, "learning_rate": 9.95071753514617e-06, "logits/chosen": -1.2470630407333374, "logits/rejected": -1.2290012836456299, "logps/chosen": -123.36933898925781, "logps/rejected": -128.19808959960938, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": -2.98321533203125, "rewards/margins": 2.1782679557800293, "rewards/rejected": -5.161483287811279, "step": 6664 }, { "epoch": 1.48, "learning_rate": 9.950466189797885e-06, "logits/chosen": -1.1445825099945068, "logits/rejected": -1.1885368824005127, "logps/chosen": -156.99288940429688, "logps/rejected": -120.6861343383789, "loss": 1.4069, "rewards/accuracies": 0.0, "rewards/chosen": -5.31782865524292, "rewards/margins": -2.751847267150879, "rewards/rejected": -2.565981388092041, "step": 6665 }, { "epoch": 1.48, "learning_rate": 9.950214208326598e-06, "logits/chosen": -1.3860260248184204, "logits/rejected": -1.5584263801574707, "logps/chosen": -247.18455505371094, "logps/rejected": -236.6139678955078, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.178950786590576, "rewards/margins": 10.270816802978516, "rewards/rejected": -14.44976806640625, "step": 6666 }, { "epoch": 1.48, "learning_rate": 9.949961590764682e-06, "logits/chosen": -1.3018572330474854, "logits/rejected": -1.3097946643829346, "logps/chosen": -161.1339111328125, "logps/rejected": -226.302734375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -3.897839307785034, "rewards/margins": 4.73487663269043, "rewards/rejected": -8.632716178894043, "step": 6667 }, { "epoch": 1.48, "learning_rate": 9.949708337144603e-06, "logits/chosen": -1.462307095527649, "logits/rejected": -1.4792786836624146, "logps/chosen": -111.26094055175781, "logps/rejected": -131.5476837158203, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -5.021085262298584, "rewards/margins": 3.9957499504089355, "rewards/rejected": -9.01683521270752, "step": 6668 }, { "epoch": 1.48, "learning_rate": 9.949454447498901e-06, "logits/chosen": -1.624918818473816, "logits/rejected": -1.5950404405593872, "logps/chosen": -166.4324493408203, "logps/rejected": -391.7415466308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.47301027178764343, "rewards/margins": 19.27528953552246, "rewards/rejected": -18.802278518676758, "step": 6669 }, { "epoch": 1.48, "learning_rate": 9.949199921860202e-06, "logits/chosen": -1.6153229475021362, "logits/rejected": -1.4273759126663208, "logps/chosen": -207.24734497070312, "logps/rejected": -371.0585632324219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.772265613079071, "rewards/margins": 13.780936241149902, "rewards/rejected": -14.553201675415039, "step": 6670 }, { "epoch": 1.48, "learning_rate": 9.94894476026121e-06, "logits/chosen": -1.4121474027633667, "logits/rejected": -1.3430732488632202, "logps/chosen": -70.20149230957031, "logps/rejected": -276.0892639160156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1202640533447266, "rewards/margins": 14.48330020904541, "rewards/rejected": -15.603564262390137, "step": 6671 }, { "epoch": 1.48, "learning_rate": 9.948688962734711e-06, "logits/chosen": -1.2097151279449463, "logits/rejected": -1.3012607097625732, "logps/chosen": -161.36770629882812, "logps/rejected": -127.27661895751953, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.038038730621338, "rewards/margins": 7.548570156097412, "rewards/rejected": -9.58660888671875, "step": 6672 }, { "epoch": 1.48, "learning_rate": 9.94843252931358e-06, "logits/chosen": -1.138837218284607, "logits/rejected": -1.0848208665847778, "logps/chosen": -94.67752075195312, "logps/rejected": -220.3874053955078, "loss": 0.0699, "rewards/accuracies": 1.0, "rewards/chosen": -4.329540252685547, "rewards/margins": 1.9116144180297852, "rewards/rejected": -6.241154670715332, "step": 6673 }, { "epoch": 1.48, "learning_rate": 9.948175460030762e-06, "logits/chosen": -1.3966326713562012, "logits/rejected": -1.3717153072357178, "logps/chosen": -104.51304626464844, "logps/rejected": -93.60125732421875, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": 0.5890358090400696, "rewards/margins": 3.378864288330078, "rewards/rejected": -2.7898285388946533, "step": 6674 }, { "epoch": 1.48, "learning_rate": 9.947917754919293e-06, "logits/chosen": -1.5014965534210205, "logits/rejected": -1.4196840524673462, "logps/chosen": -110.01904296875, "logps/rejected": -298.8311767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7307113409042358, "rewards/margins": 11.690767288208008, "rewards/rejected": -13.421478271484375, "step": 6675 }, { "epoch": 1.48, "learning_rate": 9.947659414012287e-06, "logits/chosen": -1.5935693979263306, "logits/rejected": -1.5935693979263306, "logps/chosen": -181.718017578125, "logps/rejected": -181.718017578125, "loss": 0.3486, "rewards/accuracies": 0.0, "rewards/chosen": -5.665887355804443, "rewards/margins": 0.0, "rewards/rejected": -5.665887355804443, "step": 6676 }, { "epoch": 1.48, "learning_rate": 9.94740043734294e-06, "logits/chosen": -1.240639328956604, "logits/rejected": -1.3371284008026123, "logps/chosen": -231.27423095703125, "logps/rejected": -141.8695526123047, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -4.118064880371094, "rewards/margins": 5.061304092407227, "rewards/rejected": -9.17936897277832, "step": 6677 }, { "epoch": 1.48, "learning_rate": 9.947140824944533e-06, "logits/chosen": -1.110435962677002, "logits/rejected": -0.9676884412765503, "logps/chosen": -184.29672241210938, "logps/rejected": -269.98443603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.070483446121216, "rewards/margins": 13.455032348632812, "rewards/rejected": -11.384549140930176, "step": 6678 }, { "epoch": 1.48, "learning_rate": 9.946880576850418e-06, "logits/chosen": -1.5307950973510742, "logits/rejected": -1.7034544944763184, "logps/chosen": -198.44371032714844, "logps/rejected": -190.75942993164062, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -2.4924514293670654, "rewards/margins": 9.726503372192383, "rewards/rejected": -12.218955039978027, "step": 6679 }, { "epoch": 1.48, "learning_rate": 9.946619693094044e-06, "logits/chosen": -1.403952956199646, "logits/rejected": -1.2253845930099487, "logps/chosen": -152.78472900390625, "logps/rejected": -602.3055419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 5.5314483642578125, "rewards/margins": 27.21889305114746, "rewards/rejected": -21.68744468688965, "step": 6680 }, { "epoch": 1.48, "learning_rate": 9.94635817370893e-06, "logits/chosen": -1.4026024341583252, "logits/rejected": -1.3592802286148071, "logps/chosen": -192.9420928955078, "logps/rejected": -401.385009765625, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.6380401849746704, "rewards/margins": 21.406774520874023, "rewards/rejected": -22.044815063476562, "step": 6681 }, { "epoch": 1.48, "learning_rate": 9.94609601872868e-06, "logits/chosen": -1.6649649143218994, "logits/rejected": -1.6866956949234009, "logps/chosen": -185.33517456054688, "logps/rejected": -322.5290222167969, "loss": 0.2181, "rewards/accuracies": 1.0, "rewards/chosen": -4.213925361633301, "rewards/margins": 4.998159408569336, "rewards/rejected": -9.212084770202637, "step": 6682 }, { "epoch": 1.48, "learning_rate": 9.945833228186984e-06, "logits/chosen": -1.5112786293029785, "logits/rejected": -1.6633552312850952, "logps/chosen": -266.94140625, "logps/rejected": -197.3399200439453, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -4.073530673980713, "rewards/margins": 4.147233486175537, "rewards/rejected": -8.22076416015625, "step": 6683 }, { "epoch": 1.48, "learning_rate": 9.945569802117604e-06, "logits/chosen": -1.619749665260315, "logits/rejected": -1.60013747215271, "logps/chosen": -66.43679809570312, "logps/rejected": -75.48208618164062, "loss": 0.6477, "rewards/accuracies": 0.0, "rewards/chosen": -4.383567810058594, "rewards/margins": -0.9754989147186279, "rewards/rejected": -3.408068895339966, "step": 6684 }, { "epoch": 1.48, "learning_rate": 9.945305740554397e-06, "logits/chosen": -1.8663698434829712, "logits/rejected": -1.8075038194656372, "logps/chosen": -122.13023376464844, "logps/rejected": -250.7034912109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.330181837081909, "rewards/margins": 11.320752143859863, "rewards/rejected": -13.650934219360352, "step": 6685 }, { "epoch": 1.48, "learning_rate": 9.945041043531289e-06, "logits/chosen": -1.4630253314971924, "logits/rejected": -1.3996131420135498, "logps/chosen": -84.90193176269531, "logps/rejected": -137.34307861328125, "loss": 0.0953, "rewards/accuracies": 1.0, "rewards/chosen": -3.362938404083252, "rewards/margins": 1.5739741325378418, "rewards/rejected": -4.936912536621094, "step": 6686 }, { "epoch": 1.48, "learning_rate": 9.944775711082296e-06, "logits/chosen": -1.4432437419891357, "logits/rejected": -1.4335229396820068, "logps/chosen": -139.03097534179688, "logps/rejected": -275.4668884277344, "loss": 0.0637, "rewards/accuracies": 1.0, "rewards/chosen": -2.6307570934295654, "rewards/margins": 5.15300178527832, "rewards/rejected": -7.783758640289307, "step": 6687 }, { "epoch": 1.48, "learning_rate": 9.944509743241508e-06, "logits/chosen": -1.5871058702468872, "logits/rejected": -1.5939451456069946, "logps/chosen": -98.498779296875, "logps/rejected": -105.06507110595703, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": -0.7338913083076477, "rewards/margins": 2.4870615005493164, "rewards/rejected": -3.2209527492523193, "step": 6688 }, { "epoch": 1.48, "learning_rate": 9.944243140043106e-06, "logits/chosen": -1.4053785800933838, "logits/rejected": -1.4199484586715698, "logps/chosen": -101.24647521972656, "logps/rejected": -205.3739776611328, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.8483574390411377, "rewards/margins": 9.057059288024902, "rewards/rejected": -10.905416488647461, "step": 6689 }, { "epoch": 1.48, "learning_rate": 9.943975901521347e-06, "logits/chosen": -1.2201846837997437, "logits/rejected": -1.2052562236785889, "logps/chosen": -244.42626953125, "logps/rejected": -278.83160400390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.775988757610321, "rewards/margins": 7.133345127105713, "rewards/rejected": -6.357356548309326, "step": 6690 }, { "epoch": 1.48, "learning_rate": 9.943708027710567e-06, "logits/chosen": -1.3468278646469116, "logits/rejected": -1.3468278646469116, "logps/chosen": -134.43515014648438, "logps/rejected": -134.43515014648438, "loss": 0.3472, "rewards/accuracies": 0.0, "rewards/chosen": -3.9369850158691406, "rewards/margins": 0.0, "rewards/rejected": -3.9369850158691406, "step": 6691 }, { "epoch": 1.48, "learning_rate": 9.943439518645193e-06, "logits/chosen": -1.3264362812042236, "logits/rejected": -1.2548235654830933, "logps/chosen": -154.6416778564453, "logps/rejected": -248.3040771484375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.4308303594589233, "rewards/margins": 10.669615745544434, "rewards/rejected": -12.100445747375488, "step": 6692 }, { "epoch": 1.48, "learning_rate": 9.943170374359722e-06, "logits/chosen": -0.9889980554580688, "logits/rejected": -0.7600347995758057, "logps/chosen": -151.60971069335938, "logps/rejected": -401.1624755859375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.6213409304618835, "rewards/margins": 14.925971031188965, "rewards/rejected": -15.547311782836914, "step": 6693 }, { "epoch": 1.48, "learning_rate": 9.942900594888743e-06, "logits/chosen": -1.3269898891448975, "logits/rejected": -1.3486970663070679, "logps/chosen": -151.75384521484375, "logps/rejected": -117.75489807128906, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -4.218806743621826, "rewards/margins": 4.659755229949951, "rewards/rejected": -8.878561973571777, "step": 6694 }, { "epoch": 1.48, "learning_rate": 9.94263018026692e-06, "logits/chosen": -1.427991509437561, "logits/rejected": -1.4128998517990112, "logps/chosen": -122.83113861083984, "logps/rejected": -138.10397338867188, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.356438398361206, "rewards/margins": 7.429717063903809, "rewards/rejected": -10.786155700683594, "step": 6695 }, { "epoch": 1.48, "learning_rate": 9.942359130528998e-06, "logits/chosen": -1.5733743906021118, "logits/rejected": -1.592836618423462, "logps/chosen": -110.88215637207031, "logps/rejected": -133.77386474609375, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/chosen": 0.1413566619157791, "rewards/margins": 2.847754716873169, "rewards/rejected": -2.7063980102539062, "step": 6696 }, { "epoch": 1.48, "learning_rate": 9.942087445709811e-06, "logits/chosen": -1.2665146589279175, "logits/rejected": -1.290562391281128, "logps/chosen": -173.47413635253906, "logps/rejected": -172.62283325195312, "loss": 2.2712, "rewards/accuracies": 0.0, "rewards/chosen": -10.408463478088379, "rewards/margins": -4.531718730926514, "rewards/rejected": -5.876744747161865, "step": 6697 }, { "epoch": 1.48, "learning_rate": 9.941815125844267e-06, "logits/chosen": -1.8181012868881226, "logits/rejected": -1.750441551208496, "logps/chosen": -120.62408447265625, "logps/rejected": -183.87078857421875, "loss": 0.3658, "rewards/accuracies": 1.0, "rewards/chosen": -2.8721330165863037, "rewards/margins": 0.2747175693511963, "rewards/rejected": -3.1468505859375, "step": 6698 }, { "epoch": 1.48, "learning_rate": 9.94154217096736e-06, "logits/chosen": -1.6866012811660767, "logits/rejected": -1.7356717586517334, "logps/chosen": -160.08453369140625, "logps/rejected": -212.3732452392578, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -6.032388210296631, "rewards/margins": 3.970393657684326, "rewards/rejected": -10.002781867980957, "step": 6699 }, { "epoch": 1.48, "learning_rate": 9.941268581114162e-06, "logits/chosen": -1.292657732963562, "logits/rejected": -1.292657732963562, "logps/chosen": -107.7640380859375, "logps/rejected": -107.7640380859375, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -1.4807052612304688, "rewards/margins": 0.0, "rewards/rejected": -1.4807052612304688, "step": 6700 }, { "epoch": 1.48, "learning_rate": 9.94099435631983e-06, "logits/chosen": -1.3746474981307983, "logits/rejected": -1.4423162937164307, "logps/chosen": -174.59271240234375, "logps/rejected": -227.27554321289062, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 1.9752930402755737, "rewards/margins": 13.978900909423828, "rewards/rejected": -12.003607749938965, "step": 6701 }, { "epoch": 1.48, "learning_rate": 9.940719496619601e-06, "logits/chosen": -1.3446868658065796, "logits/rejected": -1.3523757457733154, "logps/chosen": -228.35948181152344, "logps/rejected": -200.33987426757812, "loss": 0.5103, "rewards/accuracies": 0.0, "rewards/chosen": -2.07177734375, "rewards/margins": -0.5736358165740967, "rewards/rejected": -1.4981415271759033, "step": 6702 }, { "epoch": 1.48, "learning_rate": 9.940444002048794e-06, "logits/chosen": -1.1370270252227783, "logits/rejected": -1.261687994003296, "logps/chosen": -190.57534790039062, "logps/rejected": -278.8363342285156, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -3.0134003162384033, "rewards/margins": 11.167680740356445, "rewards/rejected": -14.18108081817627, "step": 6703 }, { "epoch": 1.48, "learning_rate": 9.94016787264281e-06, "logits/chosen": -1.6662356853485107, "logits/rejected": -1.6770433187484741, "logps/chosen": -180.68817138671875, "logps/rejected": -187.57769775390625, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": 0.3150573670864105, "rewards/margins": 4.300169467926025, "rewards/rejected": -3.985111951828003, "step": 6704 }, { "epoch": 1.48, "learning_rate": 9.939891108437129e-06, "logits/chosen": -1.3566367626190186, "logits/rejected": -1.2915934324264526, "logps/chosen": -96.47613525390625, "logps/rejected": -239.05569458007812, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.219989061355591, "rewards/margins": 7.419521331787109, "rewards/rejected": -9.639510154724121, "step": 6705 }, { "epoch": 1.48, "learning_rate": 9.939613709467317e-06, "logits/chosen": -1.0389430522918701, "logits/rejected": -1.0096243619918823, "logps/chosen": -106.41278076171875, "logps/rejected": -130.14617919921875, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -2.6341354846954346, "rewards/margins": 3.537490129470825, "rewards/rejected": -6.17162561416626, "step": 6706 }, { "epoch": 1.48, "learning_rate": 9.939335675769017e-06, "logits/chosen": -1.3062785863876343, "logits/rejected": -1.4107381105422974, "logps/chosen": -280.8298645019531, "logps/rejected": -240.52215576171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.4175262451171875, "rewards/margins": 6.121284484863281, "rewards/rejected": -12.538810729980469, "step": 6707 }, { "epoch": 1.48, "learning_rate": 9.939057007377955e-06, "logits/chosen": -1.4073675870895386, "logits/rejected": -1.4560374021530151, "logps/chosen": -109.3558349609375, "logps/rejected": -107.43351745605469, "loss": 0.1026, "rewards/accuracies": 1.0, "rewards/chosen": -1.9882385730743408, "rewards/margins": 1.482560634613037, "rewards/rejected": -3.470799207687378, "step": 6708 }, { "epoch": 1.48, "learning_rate": 9.938777704329943e-06, "logits/chosen": -1.2793903350830078, "logits/rejected": -1.1835834980010986, "logps/chosen": -91.13102722167969, "logps/rejected": -174.14886474609375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.1491669416427612, "rewards/margins": 5.700509548187256, "rewards/rejected": -6.849676609039307, "step": 6709 }, { "epoch": 1.49, "learning_rate": 9.938497766660869e-06, "logits/chosen": -1.1363199949264526, "logits/rejected": -1.1602027416229248, "logps/chosen": -66.71634674072266, "logps/rejected": -105.15359497070312, "loss": 0.1993, "rewards/accuracies": 1.0, "rewards/chosen": -2.8192222118377686, "rewards/margins": 0.714120626449585, "rewards/rejected": -3.5333428382873535, "step": 6710 }, { "epoch": 1.49, "learning_rate": 9.938217194406701e-06, "logits/chosen": -1.6469910144805908, "logits/rejected": -1.7049560546875, "logps/chosen": -179.3916015625, "logps/rejected": -294.6266174316406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2224868535995483, "rewards/margins": 13.773018836975098, "rewards/rejected": -14.995505332946777, "step": 6711 }, { "epoch": 1.49, "learning_rate": 9.937935987603497e-06, "logits/chosen": -0.9165312647819519, "logits/rejected": -0.9165312647819519, "logps/chosen": -155.12644958496094, "logps/rejected": -155.12644958496094, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -10.013445854187012, "rewards/margins": 0.0, "rewards/rejected": -10.013445854187012, "step": 6712 }, { "epoch": 1.49, "learning_rate": 9.937654146287388e-06, "logits/chosen": -1.143763542175293, "logits/rejected": -1.1744439601898193, "logps/chosen": -131.11862182617188, "logps/rejected": -108.66209411621094, "loss": 2.4643, "rewards/accuracies": 1.0, "rewards/chosen": -0.824688732624054, "rewards/margins": 7.274914741516113, "rewards/rejected": -8.099603652954102, "step": 6713 }, { "epoch": 1.49, "learning_rate": 9.937371670494591e-06, "logits/chosen": -0.9762437343597412, "logits/rejected": -1.046284556388855, "logps/chosen": -195.75341796875, "logps/rejected": -223.8140869140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.243701457977295, "rewards/margins": 7.878322124481201, "rewards/rejected": -12.122023582458496, "step": 6714 }, { "epoch": 1.49, "learning_rate": 9.937088560261404e-06, "logits/chosen": -1.5844999551773071, "logits/rejected": -1.615216851234436, "logps/chosen": -195.56503295898438, "logps/rejected": -216.6627960205078, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.031637668609619, "rewards/margins": 8.28451919555664, "rewards/rejected": -13.316156387329102, "step": 6715 }, { "epoch": 1.49, "learning_rate": 9.936804815624205e-06, "logits/chosen": -1.3612656593322754, "logits/rejected": -1.3044904470443726, "logps/chosen": -106.67886352539062, "logps/rejected": -274.7667236328125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.598776340484619, "rewards/margins": 10.296916961669922, "rewards/rejected": -12.8956937789917, "step": 6716 }, { "epoch": 1.49, "learning_rate": 9.936520436619455e-06, "logits/chosen": -1.3882018327713013, "logits/rejected": -1.3474247455596924, "logps/chosen": -113.89881896972656, "logps/rejected": -130.28224182128906, "loss": 0.0708, "rewards/accuracies": 1.0, "rewards/chosen": -2.4938347339630127, "rewards/margins": 2.074505090713501, "rewards/rejected": -4.568339824676514, "step": 6717 }, { "epoch": 1.49, "learning_rate": 9.936235423283696e-06, "logits/chosen": -1.46878182888031, "logits/rejected": -1.4617502689361572, "logps/chosen": -165.1000518798828, "logps/rejected": -177.48056030273438, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -4.572326183319092, "rewards/margins": 3.169731616973877, "rewards/rejected": -7.742057800292969, "step": 6718 }, { "epoch": 1.49, "learning_rate": 9.935949775653554e-06, "logits/chosen": -1.6819039583206177, "logits/rejected": -1.6819039583206177, "logps/chosen": -191.14883422851562, "logps/rejected": -191.14883422851562, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": 0.49889832735061646, "rewards/margins": 0.0, "rewards/rejected": 0.49889832735061646, "step": 6719 }, { "epoch": 1.49, "learning_rate": 9.935663493765726e-06, "logits/chosen": -1.8329885005950928, "logits/rejected": -1.7894278764724731, "logps/chosen": -90.49158477783203, "logps/rejected": -153.7642822265625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.15224838256835938, "rewards/margins": 4.502451419830322, "rewards/rejected": -4.654699802398682, "step": 6720 }, { "epoch": 1.49, "learning_rate": 9.935376577657008e-06, "logits/chosen": -1.495678424835205, "logits/rejected": -1.495678424835205, "logps/chosen": -145.5491180419922, "logps/rejected": -145.5491180419922, "loss": 0.3478, "rewards/accuracies": 0.0, "rewards/chosen": -11.264999389648438, "rewards/margins": 0.0, "rewards/rejected": -11.264999389648438, "step": 6721 }, { "epoch": 1.49, "learning_rate": 9.935089027364264e-06, "logits/chosen": -1.854609489440918, "logits/rejected": -1.75413179397583, "logps/chosen": -108.21354675292969, "logps/rejected": -203.9465789794922, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 0.7174293398857117, "rewards/margins": 7.594837188720703, "rewards/rejected": -6.877408027648926, "step": 6722 }, { "epoch": 1.49, "learning_rate": 9.934800842924443e-06, "logits/chosen": -1.6269745826721191, "logits/rejected": -1.6143910884857178, "logps/chosen": -240.3079833984375, "logps/rejected": -246.39498901367188, "loss": 0.0693, "rewards/accuracies": 1.0, "rewards/chosen": -2.3345184326171875, "rewards/margins": 2.200125217437744, "rewards/rejected": -4.534643650054932, "step": 6723 }, { "epoch": 1.49, "learning_rate": 9.934512024374577e-06, "logits/chosen": -0.9355971217155457, "logits/rejected": -1.078787922859192, "logps/chosen": -265.3548278808594, "logps/rejected": -174.26402282714844, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.23471374809741974, "rewards/margins": 5.505335807800293, "rewards/rejected": -5.270622253417969, "step": 6724 }, { "epoch": 1.49, "learning_rate": 9.934222571751777e-06, "logits/chosen": -1.6870319843292236, "logits/rejected": -1.6444917917251587, "logps/chosen": -147.40200805664062, "logps/rejected": -194.36087036132812, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6327972412109375, "rewards/margins": 5.518662929534912, "rewards/rejected": -6.15146017074585, "step": 6725 }, { "epoch": 1.49, "learning_rate": 9.933932485093239e-06, "logits/chosen": -1.2577255964279175, "logits/rejected": -0.922483503818512, "logps/chosen": -154.91058349609375, "logps/rejected": -720.582275390625, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/chosen": -5.818744659423828, "rewards/margins": 37.22715377807617, "rewards/rejected": -43.0458984375, "step": 6726 }, { "epoch": 1.49, "learning_rate": 9.933641764436237e-06, "logits/chosen": -1.636823058128357, "logits/rejected": -1.636823058128357, "logps/chosen": -231.9790496826172, "logps/rejected": -231.9790496826172, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -12.429089546203613, "rewards/margins": 0.0, "rewards/rejected": -12.429089546203613, "step": 6727 }, { "epoch": 1.49, "learning_rate": 9.933350409818128e-06, "logits/chosen": -1.1282727718353271, "logits/rejected": -1.1467671394348145, "logps/chosen": -82.99478149414062, "logps/rejected": -82.92304229736328, "loss": 0.1803, "rewards/accuracies": 1.0, "rewards/chosen": -0.4215751588344574, "rewards/margins": 0.8340591192245483, "rewards/rejected": -1.2556343078613281, "step": 6728 }, { "epoch": 1.49, "learning_rate": 9.933058421276351e-06, "logits/chosen": -1.695029616355896, "logits/rejected": -1.5910322666168213, "logps/chosen": -116.53323364257812, "logps/rejected": -251.23056030273438, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 0.064539335668087, "rewards/margins": 6.431299686431885, "rewards/rejected": -6.36676025390625, "step": 6729 }, { "epoch": 1.49, "learning_rate": 9.932765798848428e-06, "logits/chosen": -1.5490049123764038, "logits/rejected": -1.6171574592590332, "logps/chosen": -214.578857421875, "logps/rejected": -113.68354797363281, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.9571259021759033, "rewards/margins": 5.580377578735352, "rewards/rejected": -7.537503719329834, "step": 6730 }, { "epoch": 1.49, "learning_rate": 9.932472542571954e-06, "logits/chosen": -1.6556631326675415, "logits/rejected": -1.7114237546920776, "logps/chosen": -137.5317840576172, "logps/rejected": -129.69125366210938, "loss": 0.0779, "rewards/accuracies": 1.0, "rewards/chosen": -1.7470886707305908, "rewards/margins": 1.7805237770080566, "rewards/rejected": -3.5276124477386475, "step": 6731 }, { "epoch": 1.49, "learning_rate": 9.932178652484617e-06, "logits/chosen": -1.8598114252090454, "logits/rejected": -1.8499152660369873, "logps/chosen": -94.62142181396484, "logps/rejected": -161.27154541015625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.5287460684776306, "rewards/margins": 5.513108253479004, "rewards/rejected": -4.9843621253967285, "step": 6732 }, { "epoch": 1.49, "learning_rate": 9.931884128624181e-06, "logits/chosen": -1.4463948011398315, "logits/rejected": -1.3079066276550293, "logps/chosen": -193.91329956054688, "logps/rejected": -268.98773193359375, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/chosen": -3.3881592750549316, "rewards/margins": 2.750235080718994, "rewards/rejected": -6.138394355773926, "step": 6733 }, { "epoch": 1.49, "learning_rate": 9.93158897102849e-06, "logits/chosen": -1.7668139934539795, "logits/rejected": -1.8501348495483398, "logps/chosen": -165.04254150390625, "logps/rejected": -208.86500549316406, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.359152227640152, "rewards/margins": 12.36131763458252, "rewards/rejected": -12.72046947479248, "step": 6734 }, { "epoch": 1.49, "learning_rate": 9.93129317973547e-06, "logits/chosen": -1.3368858098983765, "logits/rejected": -1.4614946842193604, "logps/chosen": -203.89907836914062, "logps/rejected": -161.68186950683594, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -1.3999954462051392, "rewards/margins": 3.6780877113342285, "rewards/rejected": -5.078083038330078, "step": 6735 }, { "epoch": 1.49, "learning_rate": 9.930996754783134e-06, "logits/chosen": -1.5595816373825073, "logits/rejected": -1.4642434120178223, "logps/chosen": -190.97030639648438, "logps/rejected": -347.4882507324219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9843002557754517, "rewards/margins": 10.638771057128906, "rewards/rejected": -11.623071670532227, "step": 6736 }, { "epoch": 1.49, "learning_rate": 9.930699696209566e-06, "logits/chosen": -1.564207673072815, "logits/rejected": -1.4705246686935425, "logps/chosen": -77.45341491699219, "logps/rejected": -167.53713989257812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.8816009759902954, "rewards/margins": 5.562617301940918, "rewards/rejected": -4.681016445159912, "step": 6737 }, { "epoch": 1.49, "learning_rate": 9.93040200405294e-06, "logits/chosen": -1.5786256790161133, "logits/rejected": -1.5786256790161133, "logps/chosen": -60.17341232299805, "logps/rejected": -60.17341232299805, "loss": 0.3477, "rewards/accuracies": 0.0, "rewards/chosen": -2.425934314727783, "rewards/margins": 0.0, "rewards/rejected": -2.425934314727783, "step": 6738 }, { "epoch": 1.49, "learning_rate": 9.930103678351511e-06, "logits/chosen": -1.4370064735412598, "logits/rejected": -1.592266321182251, "logps/chosen": -174.82882690429688, "logps/rejected": -222.53506469726562, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.762713611125946, "rewards/margins": 7.27469539642334, "rewards/rejected": -8.037408828735352, "step": 6739 }, { "epoch": 1.49, "learning_rate": 9.92980471914361e-06, "logits/chosen": -1.6819730997085571, "logits/rejected": -1.5944724082946777, "logps/chosen": -217.94366455078125, "logps/rejected": -462.49432373046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.2868194580078125, "rewards/margins": 11.814956665039062, "rewards/rejected": -10.52813720703125, "step": 6740 }, { "epoch": 1.49, "learning_rate": 9.929505126467653e-06, "logits/chosen": -1.690737247467041, "logits/rejected": -1.705257773399353, "logps/chosen": -119.13263702392578, "logps/rejected": -110.72798156738281, "loss": 0.9903, "rewards/accuracies": 0.0, "rewards/chosen": -3.018336534500122, "rewards/margins": -1.831713080406189, "rewards/rejected": -1.186623454093933, "step": 6741 }, { "epoch": 1.49, "learning_rate": 9.929204900362137e-06, "logits/chosen": -1.5014015436172485, "logits/rejected": -1.4209351539611816, "logps/chosen": -133.5465850830078, "logps/rejected": -146.8301544189453, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": -0.72344970703125, "rewards/margins": 2.830080509185791, "rewards/rejected": -3.553530216217041, "step": 6742 }, { "epoch": 1.49, "learning_rate": 9.928904040865642e-06, "logits/chosen": -1.6010671854019165, "logits/rejected": -1.5752067565917969, "logps/chosen": -65.44973754882812, "logps/rejected": -124.47640228271484, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": 0.17697982490062714, "rewards/margins": 2.4904181957244873, "rewards/rejected": -2.3134384155273438, "step": 6743 }, { "epoch": 1.49, "learning_rate": 9.928602548016826e-06, "logits/chosen": -1.5025495290756226, "logits/rejected": -1.508535623550415, "logps/chosen": -208.72772216796875, "logps/rejected": -388.4718933105469, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": 1.209437608718872, "rewards/margins": 27.853404998779297, "rewards/rejected": -26.643966674804688, "step": 6744 }, { "epoch": 1.49, "learning_rate": 9.92830042185443e-06, "logits/chosen": -1.0887774229049683, "logits/rejected": -1.0759023427963257, "logps/chosen": -210.28575134277344, "logps/rejected": -267.6645812988281, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 2.194349765777588, "rewards/margins": 6.127192497253418, "rewards/rejected": -3.932842969894409, "step": 6745 }, { "epoch": 1.49, "learning_rate": 9.927997662417277e-06, "logits/chosen": -1.458544135093689, "logits/rejected": -1.4558634757995605, "logps/chosen": -109.99102020263672, "logps/rejected": -226.85391235351562, "loss": 0.3472, "rewards/accuracies": 1.0, "rewards/chosen": -0.7363373041152954, "rewards/margins": 6.741992473602295, "rewards/rejected": -7.478329658508301, "step": 6746 }, { "epoch": 1.49, "learning_rate": 9.927694269744273e-06, "logits/chosen": -1.709999442100525, "logits/rejected": -1.7090531587600708, "logps/chosen": -49.70101547241211, "logps/rejected": -94.23345947265625, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -0.023385239765048027, "rewards/margins": 2.796754837036133, "rewards/rejected": -2.8201401233673096, "step": 6747 }, { "epoch": 1.49, "learning_rate": 9.9273902438744e-06, "logits/chosen": -1.5603423118591309, "logits/rejected": -1.5825506448745728, "logps/chosen": -87.66682434082031, "logps/rejected": -99.71559143066406, "loss": 0.7458, "rewards/accuracies": 0.0, "rewards/chosen": -1.4406311511993408, "rewards/margins": -0.20074772834777832, "rewards/rejected": -1.2398834228515625, "step": 6748 }, { "epoch": 1.49, "learning_rate": 9.927085584846725e-06, "logits/chosen": -1.0811759233474731, "logits/rejected": -0.9339444041252136, "logps/chosen": -114.43153381347656, "logps/rejected": -345.58441162109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.3341377377510071, "rewards/margins": 6.246990203857422, "rewards/rejected": -6.581128120422363, "step": 6749 }, { "epoch": 1.49, "learning_rate": 9.926780292700397e-06, "logits/chosen": -1.7496744394302368, "logits/rejected": -1.66875159740448, "logps/chosen": -80.59820556640625, "logps/rejected": -245.53225708007812, "loss": 0.1816, "rewards/accuracies": 1.0, "rewards/chosen": 0.6622215509414673, "rewards/margins": 5.852298736572266, "rewards/rejected": -5.190077304840088, "step": 6750 }, { "epoch": 1.49, "learning_rate": 9.926474367474646e-06, "logits/chosen": -1.5713636875152588, "logits/rejected": -1.4646685123443604, "logps/chosen": -156.996337890625, "logps/rejected": -357.1731872558594, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.4062378406524658, "rewards/margins": 6.691318511962891, "rewards/rejected": -8.097556114196777, "step": 6751 }, { "epoch": 1.49, "learning_rate": 9.92616780920878e-06, "logits/chosen": -1.6657414436340332, "logits/rejected": -1.7900456190109253, "logps/chosen": -250.6761932373047, "logps/rejected": -192.99099731445312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.7115005850791931, "rewards/margins": 7.280763626098633, "rewards/rejected": -6.569262981414795, "step": 6752 }, { "epoch": 1.49, "learning_rate": 9.925860617942195e-06, "logits/chosen": -1.4137437343597412, "logits/rejected": -1.4082255363464355, "logps/chosen": -63.129249572753906, "logps/rejected": -75.14312744140625, "loss": 0.0915, "rewards/accuracies": 1.0, "rewards/chosen": 0.4758033752441406, "rewards/margins": 1.8624382019042969, "rewards/rejected": -1.3866348266601562, "step": 6753 }, { "epoch": 1.49, "learning_rate": 9.92555279371436e-06, "logits/chosen": -1.6427944898605347, "logits/rejected": -1.6941962242126465, "logps/chosen": -171.77174377441406, "logps/rejected": -221.46258544921875, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": -4.3821702003479, "rewards/margins": 14.194534301757812, "rewards/rejected": -18.576704025268555, "step": 6754 }, { "epoch": 1.5, "learning_rate": 9.925244336564831e-06, "logits/chosen": -1.7402817010879517, "logits/rejected": -1.7030432224273682, "logps/chosen": -193.2141571044922, "logps/rejected": -162.52740478515625, "loss": 0.2325, "rewards/accuracies": 1.0, "rewards/chosen": -3.4046127796173096, "rewards/margins": 0.5239715576171875, "rewards/rejected": -3.928584337234497, "step": 6755 }, { "epoch": 1.5, "learning_rate": 9.924935246533249e-06, "logits/chosen": -1.7595711946487427, "logits/rejected": -1.7595711946487427, "logps/chosen": -142.06881713867188, "logps/rejected": -142.06881713867188, "loss": 0.3578, "rewards/accuracies": 0.0, "rewards/chosen": -2.307570695877075, "rewards/margins": 0.0, "rewards/rejected": -2.307570695877075, "step": 6756 }, { "epoch": 1.5, "learning_rate": 9.924625523659324e-06, "logits/chosen": -1.3935033082962036, "logits/rejected": -1.4607034921646118, "logps/chosen": -265.83551025390625, "logps/rejected": -239.05999755859375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 2.402758836746216, "rewards/margins": 6.0040130615234375, "rewards/rejected": -3.6012542247772217, "step": 6757 }, { "epoch": 1.5, "learning_rate": 9.924315167982858e-06, "logits/chosen": -1.8538743257522583, "logits/rejected": -1.9536397457122803, "logps/chosen": -140.1134033203125, "logps/rejected": -119.51055908203125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -3.372565507888794, "rewards/margins": 5.261373519897461, "rewards/rejected": -8.633938789367676, "step": 6758 }, { "epoch": 1.5, "learning_rate": 9.924004179543728e-06, "logits/chosen": -1.503180980682373, "logits/rejected": -1.473973035812378, "logps/chosen": -116.34622192382812, "logps/rejected": -110.37068939208984, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.749524712562561, "rewards/margins": 6.404757976531982, "rewards/rejected": -8.154282569885254, "step": 6759 }, { "epoch": 1.5, "learning_rate": 9.923692558381902e-06, "logits/chosen": -1.7137316465377808, "logits/rejected": -1.7743597030639648, "logps/chosen": -157.95248413085938, "logps/rejected": -172.56057739257812, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": 4.475070476531982, "rewards/margins": 4.5026631355285645, "rewards/rejected": -0.02759246900677681, "step": 6760 }, { "epoch": 1.5, "learning_rate": 9.923380304537417e-06, "logits/chosen": -1.857138752937317, "logits/rejected": -1.8699884414672852, "logps/chosen": -126.43820190429688, "logps/rejected": -150.57723999023438, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.6155059933662415, "rewards/margins": 5.052188396453857, "rewards/rejected": -4.436682224273682, "step": 6761 }, { "epoch": 1.5, "learning_rate": 9.923067418050399e-06, "logits/chosen": -1.4899728298187256, "logits/rejected": -1.5229398012161255, "logps/chosen": -207.3052520751953, "logps/rejected": -167.94920349121094, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.22236938774585724, "rewards/margins": 5.6197614669799805, "rewards/rejected": -5.397392272949219, "step": 6762 }, { "epoch": 1.5, "learning_rate": 9.922753898961052e-06, "logits/chosen": -1.5847835540771484, "logits/rejected": -1.4747999906539917, "logps/chosen": -165.30615234375, "logps/rejected": -219.19415283203125, "loss": 0.1265, "rewards/accuracies": 1.0, "rewards/chosen": -1.7017395496368408, "rewards/margins": 1.2645020484924316, "rewards/rejected": -2.9662415981292725, "step": 6763 }, { "epoch": 1.5, "learning_rate": 9.922439747309663e-06, "logits/chosen": -1.6158925294876099, "logits/rejected": -1.6337107419967651, "logps/chosen": -274.62518310546875, "logps/rejected": -181.6803741455078, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.2601470947265625, "rewards/margins": 5.960272789001465, "rewards/rejected": -9.220419883728027, "step": 6764 }, { "epoch": 1.5, "learning_rate": 9.922124963136599e-06, "logits/chosen": -1.3807224035263062, "logits/rejected": -1.3357107639312744, "logps/chosen": -115.67457580566406, "logps/rejected": -181.06517028808594, "loss": 0.1048, "rewards/accuracies": 1.0, "rewards/chosen": -5.430212497711182, "rewards/margins": 1.507781982421875, "rewards/rejected": -6.937994480133057, "step": 6765 }, { "epoch": 1.5, "learning_rate": 9.92180954648231e-06, "logits/chosen": -1.949296474456787, "logits/rejected": -2.0453948974609375, "logps/chosen": -153.77801513671875, "logps/rejected": -151.4639129638672, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3580322265625, "rewards/margins": 8.912102699279785, "rewards/rejected": -10.270134925842285, "step": 6766 }, { "epoch": 1.5, "learning_rate": 9.921493497387327e-06, "logits/chosen": -1.246321439743042, "logits/rejected": -1.2513915300369263, "logps/chosen": -150.67990112304688, "logps/rejected": -108.33378601074219, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.4460952877998352, "rewards/margins": 7.564635276794434, "rewards/rejected": -8.010730743408203, "step": 6767 }, { "epoch": 1.5, "learning_rate": 9.921176815892259e-06, "logits/chosen": -1.7399672269821167, "logits/rejected": -1.7180007696151733, "logps/chosen": -119.13191223144531, "logps/rejected": -238.61456298828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.7301902770996094, "rewards/margins": 7.703922271728516, "rewards/rejected": -8.434112548828125, "step": 6768 }, { "epoch": 1.5, "learning_rate": 9.920859502037801e-06, "logits/chosen": -1.8723257780075073, "logits/rejected": -1.7681177854537964, "logps/chosen": -119.07878875732422, "logps/rejected": -179.548583984375, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -1.507171630859375, "rewards/margins": 4.898991584777832, "rewards/rejected": -6.406163215637207, "step": 6769 }, { "epoch": 1.5, "learning_rate": 9.920541555864726e-06, "logits/chosen": -1.6781498193740845, "logits/rejected": -1.67039155960083, "logps/chosen": -185.80093383789062, "logps/rejected": -213.02993774414062, "loss": 0.8868, "rewards/accuracies": 0.0, "rewards/chosen": -0.18337097764015198, "rewards/margins": -1.5628845691680908, "rewards/rejected": 1.3795136213302612, "step": 6770 }, { "epoch": 1.5, "learning_rate": 9.920222977413892e-06, "logits/chosen": -1.761054277420044, "logits/rejected": -1.7072553634643555, "logps/chosen": -145.64572143554688, "logps/rejected": -177.69854736328125, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -0.4893295466899872, "rewards/margins": 4.477992534637451, "rewards/rejected": -4.967321872711182, "step": 6771 }, { "epoch": 1.5, "learning_rate": 9.919903766726229e-06, "logits/chosen": -1.4482941627502441, "logits/rejected": -1.4762071371078491, "logps/chosen": -140.15322875976562, "logps/rejected": -118.39041900634766, "loss": 0.5231, "rewards/accuracies": 0.0, "rewards/chosen": -4.322168827056885, "rewards/margins": -0.5968246459960938, "rewards/rejected": -3.725344181060791, "step": 6772 }, { "epoch": 1.5, "learning_rate": 9.919583923842763e-06, "logits/chosen": -1.1760600805282593, "logits/rejected": -1.0637611150741577, "logps/chosen": -240.1153564453125, "logps/rejected": -295.49908447265625, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -0.712841808795929, "rewards/margins": 5.247100830078125, "rewards/rejected": -5.959942817687988, "step": 6773 }, { "epoch": 1.5, "learning_rate": 9.919263448804589e-06, "logits/chosen": -1.344116449356079, "logits/rejected": -1.2783668041229248, "logps/chosen": -79.79179382324219, "logps/rejected": -205.01385498046875, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": -0.7977157831192017, "rewards/margins": 8.337066650390625, "rewards/rejected": -9.134782791137695, "step": 6774 }, { "epoch": 1.5, "learning_rate": 9.918942341652885e-06, "logits/chosen": -1.5832042694091797, "logits/rejected": -1.714002013206482, "logps/chosen": -280.1475830078125, "logps/rejected": -202.99778747558594, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.62345290184021, "rewards/margins": 5.814026832580566, "rewards/rejected": -8.437479972839355, "step": 6775 }, { "epoch": 1.5, "learning_rate": 9.918620602428916e-06, "logits/chosen": -1.7674983739852905, "logits/rejected": -1.8301384449005127, "logps/chosen": -101.12947845458984, "logps/rejected": -147.55496215820312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6411964893341064, "rewards/margins": 10.55978012084961, "rewards/rejected": -12.200976371765137, "step": 6776 }, { "epoch": 1.5, "learning_rate": 9.918298231174023e-06, "logits/chosen": -1.6767256259918213, "logits/rejected": -1.6848595142364502, "logps/chosen": -148.08447265625, "logps/rejected": -100.48507690429688, "loss": 0.0874, "rewards/accuracies": 1.0, "rewards/chosen": -6.843042850494385, "rewards/margins": 1.6559605598449707, "rewards/rejected": -8.499003410339355, "step": 6777 }, { "epoch": 1.5, "learning_rate": 9.917975227929631e-06, "logits/chosen": -1.6821390390396118, "logits/rejected": -1.461944818496704, "logps/chosen": -67.42906188964844, "logps/rejected": -220.3258056640625, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -0.695539116859436, "rewards/margins": 5.862874984741211, "rewards/rejected": -6.558413982391357, "step": 6778 }, { "epoch": 1.5, "learning_rate": 9.917651592737245e-06, "logits/chosen": -1.8075302839279175, "logits/rejected": -1.8300259113311768, "logps/chosen": -91.77336120605469, "logps/rejected": -96.12657928466797, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 0.7244873046875, "rewards/margins": 8.363391876220703, "rewards/rejected": -7.638904094696045, "step": 6779 }, { "epoch": 1.5, "learning_rate": 9.91732732563845e-06, "logits/chosen": -1.5074937343597412, "logits/rejected": -1.5739400386810303, "logps/chosen": -165.6278076171875, "logps/rejected": -228.2205810546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 3.51220703125, "rewards/margins": 12.580087661743164, "rewards/rejected": -9.067880630493164, "step": 6780 }, { "epoch": 1.5, "learning_rate": 9.917002426674916e-06, "logits/chosen": -1.6073875427246094, "logits/rejected": -1.6361302137374878, "logps/chosen": -102.96041107177734, "logps/rejected": -65.33686828613281, "loss": 0.5658, "rewards/accuracies": 0.0, "rewards/chosen": -0.7798805236816406, "rewards/margins": -0.4206710755825043, "rewards/rejected": -0.35920944809913635, "step": 6781 }, { "epoch": 1.5, "learning_rate": 9.91667689588839e-06, "logits/chosen": -1.4777512550354004, "logits/rejected": -1.4777512550354004, "logps/chosen": -119.22122955322266, "logps/rejected": -119.22122955322266, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.5643844604492188, "rewards/margins": 0.0, "rewards/rejected": -2.5643844604492188, "step": 6782 }, { "epoch": 1.5, "learning_rate": 9.916350733320704e-06, "logits/chosen": -1.490993618965149, "logits/rejected": -1.550632357597351, "logps/chosen": -126.00147247314453, "logps/rejected": -153.70135498046875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.935189127922058, "rewards/margins": 6.458045959472656, "rewards/rejected": -8.393235206604004, "step": 6783 }, { "epoch": 1.5, "learning_rate": 9.916023939013764e-06, "logits/chosen": -1.3678935766220093, "logits/rejected": -0.9118649959564209, "logps/chosen": -202.869384765625, "logps/rejected": -599.039794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0291779041290283, "rewards/margins": 38.59656524658203, "rewards/rejected": -41.6257438659668, "step": 6784 }, { "epoch": 1.5, "learning_rate": 9.915696513009567e-06, "logits/chosen": -1.403212547302246, "logits/rejected": -1.3716480731964111, "logps/chosen": -229.06082153320312, "logps/rejected": -399.96630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.5351364612579346, "rewards/margins": 13.163612365722656, "rewards/rejected": -11.6284761428833, "step": 6785 }, { "epoch": 1.5, "learning_rate": 9.915368455350185e-06, "logits/chosen": -1.5013056993484497, "logits/rejected": -1.4431051015853882, "logps/chosen": -87.05023956298828, "logps/rejected": -156.4475860595703, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.3787399530410767, "rewards/margins": 4.899776458740234, "rewards/rejected": -6.2785162925720215, "step": 6786 }, { "epoch": 1.5, "learning_rate": 9.915039766077772e-06, "logits/chosen": -1.47842276096344, "logits/rejected": -1.6972135305404663, "logps/chosen": -228.87948608398438, "logps/rejected": -87.77793884277344, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -2.0209412574768066, "rewards/margins": 4.010863304138184, "rewards/rejected": -6.03180456161499, "step": 6787 }, { "epoch": 1.5, "learning_rate": 9.914710445234567e-06, "logits/chosen": -1.8249239921569824, "logits/rejected": -1.8249239921569824, "logps/chosen": -52.37206268310547, "logps/rejected": -52.37206268310547, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -0.8354774713516235, "rewards/margins": 0.0, "rewards/rejected": -0.8354774713516235, "step": 6788 }, { "epoch": 1.5, "learning_rate": 9.914380492862883e-06, "logits/chosen": -1.462219476699829, "logits/rejected": -1.3115276098251343, "logps/chosen": -103.26966094970703, "logps/rejected": -338.8200378417969, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": -0.7449027895927429, "rewards/margins": 7.898677825927734, "rewards/rejected": -8.643580436706543, "step": 6789 }, { "epoch": 1.5, "learning_rate": 9.91404990900512e-06, "logits/chosen": -1.6803185939788818, "logits/rejected": -1.5425950288772583, "logps/chosen": -159.328369140625, "logps/rejected": -305.759521484375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.8398560285568237, "rewards/margins": 6.819671630859375, "rewards/rejected": -8.659527778625488, "step": 6790 }, { "epoch": 1.5, "learning_rate": 9.913718693703755e-06, "logits/chosen": -1.9853256940841675, "logits/rejected": -1.9879785776138306, "logps/chosen": -112.82561492919922, "logps/rejected": -92.6162338256836, "loss": 0.3127, "rewards/accuracies": 1.0, "rewards/chosen": -1.8754005432128906, "rewards/margins": 0.24489831924438477, "rewards/rejected": -2.1202988624572754, "step": 6791 }, { "epoch": 1.5, "learning_rate": 9.91338684700135e-06, "logits/chosen": -1.7713491916656494, "logits/rejected": -1.8082523345947266, "logps/chosen": -159.2703399658203, "logps/rejected": -202.7593536376953, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.063868761062622, "rewards/margins": 8.750787734985352, "rewards/rejected": -10.814656257629395, "step": 6792 }, { "epoch": 1.5, "learning_rate": 9.91305436894055e-06, "logits/chosen": -1.669905424118042, "logits/rejected": -1.7058682441711426, "logps/chosen": -167.2667694091797, "logps/rejected": -291.7623291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.791651964187622, "rewards/margins": 11.848851203918457, "rewards/rejected": -13.6405029296875, "step": 6793 }, { "epoch": 1.5, "learning_rate": 9.912721259564072e-06, "logits/chosen": -1.659010410308838, "logits/rejected": -1.5376176834106445, "logps/chosen": -113.7629623413086, "logps/rejected": -243.29183959960938, "loss": 0.1193, "rewards/accuracies": 1.0, "rewards/chosen": 1.2062355279922485, "rewards/margins": 8.695531845092773, "rewards/rejected": -7.489295959472656, "step": 6794 }, { "epoch": 1.5, "learning_rate": 9.91238751891472e-06, "logits/chosen": -1.136372685432434, "logits/rejected": -1.0621553659439087, "logps/chosen": -139.65603637695312, "logps/rejected": -204.36590576171875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.6787475943565369, "rewards/margins": 5.919045925140381, "rewards/rejected": -6.5977935791015625, "step": 6795 }, { "epoch": 1.5, "learning_rate": 9.912053147035383e-06, "logits/chosen": -1.3765298128128052, "logits/rejected": -1.4813748598098755, "logps/chosen": -121.84227752685547, "logps/rejected": -88.27580261230469, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -3.1204240322113037, "rewards/margins": 3.4028913974761963, "rewards/rejected": -6.5233154296875, "step": 6796 }, { "epoch": 1.5, "learning_rate": 9.911718143969024e-06, "logits/chosen": -1.4266905784606934, "logits/rejected": -1.420192003250122, "logps/chosen": -91.95948791503906, "logps/rejected": -117.91405487060547, "loss": 0.5408, "rewards/accuracies": 0.0, "rewards/chosen": -2.3373184204101562, "rewards/margins": -0.24393677711486816, "rewards/rejected": -2.093381643295288, "step": 6797 }, { "epoch": 1.5, "learning_rate": 9.911382509758692e-06, "logits/chosen": -1.9428836107254028, "logits/rejected": -2.0933666229248047, "logps/chosen": -145.7767333984375, "logps/rejected": -137.43893432617188, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 3.2697160243988037, "rewards/margins": 7.393133163452148, "rewards/rejected": -4.123416900634766, "step": 6798 }, { "epoch": 1.5, "learning_rate": 9.911046244447515e-06, "logits/chosen": -1.4620091915130615, "logits/rejected": -1.4305603504180908, "logps/chosen": -96.67660522460938, "logps/rejected": -177.4622802734375, "loss": 0.4512, "rewards/accuracies": 1.0, "rewards/chosen": -4.273881435394287, "rewards/margins": 2.821467876434326, "rewards/rejected": -7.095349311828613, "step": 6799 }, { "epoch": 1.51, "learning_rate": 9.910709348078699e-06, "logits/chosen": -2.0957913398742676, "logits/rejected": -2.593194007873535, "logps/chosen": -150.11514282226562, "logps/rejected": -97.06410217285156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.8666794300079346, "rewards/margins": 9.489542961120605, "rewards/rejected": -7.622863292694092, "step": 6800 }, { "epoch": 1.51, "learning_rate": 9.910371820695538e-06, "logits/chosen": -1.9218698740005493, "logits/rejected": -1.9218698740005493, "logps/chosen": -109.4390869140625, "logps/rejected": -109.4390869140625, "loss": 0.3546, "rewards/accuracies": 0.0, "rewards/chosen": -2.9160964488983154, "rewards/margins": 0.0, "rewards/rejected": -2.9160964488983154, "step": 6801 }, { "epoch": 1.51, "learning_rate": 9.910033662341403e-06, "logits/chosen": -1.6685669422149658, "logits/rejected": -1.7319395542144775, "logps/chosen": -148.1175537109375, "logps/rejected": -153.338134765625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.05424804612994194, "rewards/margins": 5.300558567047119, "rewards/rejected": -5.246310710906982, "step": 6802 }, { "epoch": 1.51, "learning_rate": 9.909694873059745e-06, "logits/chosen": -1.8445286750793457, "logits/rejected": -1.8900632858276367, "logps/chosen": -156.17507934570312, "logps/rejected": -305.0401306152344, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": 4.612913608551025, "rewards/margins": 18.700796127319336, "rewards/rejected": -14.087882041931152, "step": 6803 }, { "epoch": 1.51, "learning_rate": 9.909355452894098e-06, "logits/chosen": -1.4101238250732422, "logits/rejected": -0.7992280125617981, "logps/chosen": -103.66563415527344, "logps/rejected": -1046.10498046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.12319488823413849, "rewards/margins": 91.08306884765625, "rewards/rejected": -91.20626068115234, "step": 6804 }, { "epoch": 1.51, "learning_rate": 9.909015401888077e-06, "logits/chosen": -1.3331201076507568, "logits/rejected": -1.3553087711334229, "logps/chosen": -107.95421600341797, "logps/rejected": -148.51776123046875, "loss": 0.1274, "rewards/accuracies": 1.0, "rewards/chosen": -2.1909425258636475, "rewards/margins": 1.237177848815918, "rewards/rejected": -3.4281203746795654, "step": 6805 }, { "epoch": 1.51, "learning_rate": 9.908674720085378e-06, "logits/chosen": -1.9676223993301392, "logits/rejected": -1.991755723953247, "logps/chosen": -40.33941650390625, "logps/rejected": -51.437652587890625, "loss": 0.1112, "rewards/accuracies": 1.0, "rewards/chosen": 0.14498062431812286, "rewards/margins": 1.4953808784484863, "rewards/rejected": -1.3504002094268799, "step": 6806 }, { "epoch": 1.51, "learning_rate": 9.908333407529779e-06, "logits/chosen": -1.3281447887420654, "logits/rejected": -1.3154394626617432, "logps/chosen": -85.23310089111328, "logps/rejected": -101.81632995605469, "loss": 0.325, "rewards/accuracies": 1.0, "rewards/chosen": -2.4363861083984375, "rewards/margins": 0.09313368797302246, "rewards/rejected": -2.52951979637146, "step": 6807 }, { "epoch": 1.51, "learning_rate": 9.907991464265136e-06, "logits/chosen": -1.8905925750732422, "logits/rejected": -1.3144944906234741, "logps/chosen": -97.65443420410156, "logps/rejected": -939.0286865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4240158796310425, "rewards/margins": 83.16844940185547, "rewards/rejected": -84.59246826171875, "step": 6808 }, { "epoch": 1.51, "learning_rate": 9.907648890335387e-06, "logits/chosen": -1.8562984466552734, "logits/rejected": -2.0316576957702637, "logps/chosen": -204.56027221679688, "logps/rejected": -121.070068359375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.03511810302734375, "rewards/margins": 7.442633152008057, "rewards/rejected": -7.407515048980713, "step": 6809 }, { "epoch": 1.51, "learning_rate": 9.907305685784553e-06, "logits/chosen": -1.4791094064712524, "logits/rejected": -1.5056835412979126, "logps/chosen": -113.68425750732422, "logps/rejected": -122.04380798339844, "loss": 0.0676, "rewards/accuracies": 1.0, "rewards/chosen": -0.4163452088832855, "rewards/margins": 2.7648096084594727, "rewards/rejected": -3.181154727935791, "step": 6810 }, { "epoch": 1.51, "learning_rate": 9.906961850656737e-06, "logits/chosen": -1.8344595432281494, "logits/rejected": -1.8172996044158936, "logps/chosen": -243.10633850097656, "logps/rejected": -393.8395080566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 4.107792854309082, "rewards/margins": 11.271672248840332, "rewards/rejected": -7.16387939453125, "step": 6811 }, { "epoch": 1.51, "learning_rate": 9.906617384996118e-06, "logits/chosen": -1.6737866401672363, "logits/rejected": -1.6518820524215698, "logps/chosen": -165.59365844726562, "logps/rejected": -230.09654235839844, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.32934266328811646, "rewards/margins": 5.4148850440979, "rewards/rejected": -5.08554220199585, "step": 6812 }, { "epoch": 1.51, "learning_rate": 9.906272288846962e-06, "logits/chosen": -2.0374910831451416, "logits/rejected": -2.0374910831451416, "logps/chosen": -135.37261962890625, "logps/rejected": -135.37261962890625, "loss": 0.3549, "rewards/accuracies": 0.0, "rewards/chosen": -3.534283399581909, "rewards/margins": 0.0, "rewards/rejected": -3.534283399581909, "step": 6813 }, { "epoch": 1.51, "learning_rate": 9.90592656225361e-06, "logits/chosen": -1.7148292064666748, "logits/rejected": -1.7148292064666748, "logps/chosen": -145.32400512695312, "logps/rejected": -145.32400512695312, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -6.812554836273193, "rewards/margins": 0.0, "rewards/rejected": -6.812554836273193, "step": 6814 }, { "epoch": 1.51, "learning_rate": 9.905580205260487e-06, "logits/chosen": -1.7726125717163086, "logits/rejected": -1.7935099601745605, "logps/chosen": -128.7625732421875, "logps/rejected": -226.0290985107422, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.3580292463302612, "rewards/margins": 11.131935119628906, "rewards/rejected": -9.773905754089355, "step": 6815 }, { "epoch": 1.51, "learning_rate": 9.905233217912102e-06, "logits/chosen": -1.7851507663726807, "logits/rejected": -1.955404281616211, "logps/chosen": -199.0963897705078, "logps/rejected": -107.61705017089844, "loss": 0.082, "rewards/accuracies": 1.0, "rewards/chosen": -3.4868454933166504, "rewards/margins": 3.6235570907592773, "rewards/rejected": -7.110402584075928, "step": 6816 }, { "epoch": 1.51, "learning_rate": 9.904885600253038e-06, "logits/chosen": -1.4042588472366333, "logits/rejected": -1.2967681884765625, "logps/chosen": -105.262451171875, "logps/rejected": -238.01162719726562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.022905111312866, "rewards/margins": 15.395572662353516, "rewards/rejected": -17.41847801208496, "step": 6817 }, { "epoch": 1.51, "learning_rate": 9.904537352327968e-06, "logits/chosen": -1.6790252923965454, "logits/rejected": -1.7587718963623047, "logps/chosen": -231.79470825195312, "logps/rejected": -161.0526123046875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6778320670127869, "rewards/margins": 5.507330417633057, "rewards/rejected": -6.185162544250488, "step": 6818 }, { "epoch": 1.51, "learning_rate": 9.904188474181637e-06, "logits/chosen": -1.570041298866272, "logits/rejected": -1.5584121942520142, "logps/chosen": -146.45384216308594, "logps/rejected": -161.45144653320312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.10669250786304474, "rewards/margins": 5.620194911956787, "rewards/rejected": -5.513502597808838, "step": 6819 }, { "epoch": 1.51, "learning_rate": 9.903838965858877e-06, "logits/chosen": -1.5457777976989746, "logits/rejected": -1.3117051124572754, "logps/chosen": -100.97866821289062, "logps/rejected": -528.5694580078125, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -3.2393929958343506, "rewards/margins": 41.684627532958984, "rewards/rejected": -44.92401885986328, "step": 6820 }, { "epoch": 1.51, "learning_rate": 9.9034888274046e-06, "logits/chosen": -1.3187165260314941, "logits/rejected": -1.4675921201705933, "logps/chosen": -222.5225830078125, "logps/rejected": -151.96311950683594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.04181976243853569, "rewards/margins": 8.080195426940918, "rewards/rejected": -8.038375854492188, "step": 6821 }, { "epoch": 1.51, "learning_rate": 9.903138058863793e-06, "logits/chosen": -1.652418851852417, "logits/rejected": -1.6711238622665405, "logps/chosen": -111.76973724365234, "logps/rejected": -161.85263061523438, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.4356285333633423, "rewards/margins": 4.208725929260254, "rewards/rejected": -5.644354343414307, "step": 6822 }, { "epoch": 1.51, "learning_rate": 9.902786660281533e-06, "logits/chosen": -1.441304087638855, "logits/rejected": -1.2823785543441772, "logps/chosen": -162.37734985351562, "logps/rejected": -422.53515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.80253005027771, "rewards/margins": 14.778202056884766, "rewards/rejected": -11.975671768188477, "step": 6823 }, { "epoch": 1.51, "learning_rate": 9.902434631702976e-06, "logits/chosen": -1.9735972881317139, "logits/rejected": -1.868589162826538, "logps/chosen": -90.80926513671875, "logps/rejected": -195.95716857910156, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.4796890318393707, "rewards/margins": 6.870047092437744, "rewards/rejected": -6.390357971191406, "step": 6824 }, { "epoch": 1.51, "learning_rate": 9.902081973173352e-06, "logits/chosen": -1.7119865417480469, "logits/rejected": -1.7119865417480469, "logps/chosen": -118.7569808959961, "logps/rejected": -118.7569808959961, "loss": 0.3477, "rewards/accuracies": 0.0, "rewards/chosen": -2.8961684703826904, "rewards/margins": 0.0, "rewards/rejected": -2.8961684703826904, "step": 6825 }, { "epoch": 1.51, "learning_rate": 9.901728684737977e-06, "logits/chosen": -1.2202273607254028, "logits/rejected": -1.1620532274246216, "logps/chosen": -68.53047943115234, "logps/rejected": -155.9021453857422, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": 0.4904121458530426, "rewards/margins": 4.556532382965088, "rewards/rejected": -4.066120147705078, "step": 6826 }, { "epoch": 1.51, "learning_rate": 9.901374766442252e-06, "logits/chosen": -1.393483281135559, "logits/rejected": -1.393483281135559, "logps/chosen": -186.7752227783203, "logps/rejected": -186.7752227783203, "loss": 0.6298, "rewards/accuracies": 0.0, "rewards/chosen": -6.9057207107543945, "rewards/margins": 0.0, "rewards/rejected": -6.9057207107543945, "step": 6827 }, { "epoch": 1.51, "learning_rate": 9.901020218331652e-06, "logits/chosen": -1.3647814989089966, "logits/rejected": -1.278845191001892, "logps/chosen": -195.7598114013672, "logps/rejected": -295.6775817871094, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": -4.375679016113281, "rewards/margins": 2.1131362915039062, "rewards/rejected": -6.4888153076171875, "step": 6828 }, { "epoch": 1.51, "learning_rate": 9.900665040451735e-06, "logits/chosen": -1.6401735544204712, "logits/rejected": -1.4505876302719116, "logps/chosen": -77.77140808105469, "logps/rejected": -196.45217895507812, "loss": 0.2091, "rewards/accuracies": 1.0, "rewards/chosen": -0.43969422578811646, "rewards/margins": 0.6565399765968323, "rewards/rejected": -1.0962342023849487, "step": 6829 }, { "epoch": 1.51, "learning_rate": 9.90030923284814e-06, "logits/chosen": -1.5891982316970825, "logits/rejected": -1.5338770151138306, "logps/chosen": -166.95196533203125, "logps/rejected": -204.0664825439453, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.7430450916290283, "rewards/margins": 6.383915901184082, "rewards/rejected": -9.126960754394531, "step": 6830 }, { "epoch": 1.51, "learning_rate": 9.89995279556659e-06, "logits/chosen": -1.6783770322799683, "logits/rejected": -1.633332371711731, "logps/chosen": -92.61067199707031, "logps/rejected": -164.85400390625, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.7430984377861023, "rewards/margins": 6.392479419708252, "rewards/rejected": -7.13557767868042, "step": 6831 }, { "epoch": 1.51, "learning_rate": 9.899595728652883e-06, "logits/chosen": -1.5602811574935913, "logits/rejected": -1.6511361598968506, "logps/chosen": -129.50657653808594, "logps/rejected": -93.064453125, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -2.985649824142456, "rewards/margins": 4.512181282043457, "rewards/rejected": -7.497830867767334, "step": 6832 }, { "epoch": 1.51, "learning_rate": 9.899238032152907e-06, "logits/chosen": -1.3182684183120728, "logits/rejected": -1.318064570426941, "logps/chosen": -79.21875, "logps/rejected": -66.3132095336914, "loss": 0.0836, "rewards/accuracies": 1.0, "rewards/chosen": 0.3362899720668793, "rewards/margins": 5.722104072570801, "rewards/rejected": -5.385814189910889, "step": 6833 }, { "epoch": 1.51, "learning_rate": 9.898879706112618e-06, "logits/chosen": -1.7233514785766602, "logits/rejected": -1.702190637588501, "logps/chosen": -159.19546508789062, "logps/rejected": -291.79931640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 7.244800090789795, "rewards/margins": 15.520238876342773, "rewards/rejected": -8.275439262390137, "step": 6834 }, { "epoch": 1.51, "learning_rate": 9.898520750578065e-06, "logits/chosen": -1.547973394393921, "logits/rejected": -1.547973394393921, "logps/chosen": -155.4923095703125, "logps/rejected": -155.4923095703125, "loss": 0.3563, "rewards/accuracies": 0.0, "rewards/chosen": -7.045525550842285, "rewards/margins": 0.0, "rewards/rejected": -7.045525550842285, "step": 6835 }, { "epoch": 1.51, "learning_rate": 9.898161165595371e-06, "logits/chosen": -1.7079945802688599, "logits/rejected": -1.776066541671753, "logps/chosen": -85.39092254638672, "logps/rejected": -82.77622985839844, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.7369400262832642, "rewards/margins": 6.954031467437744, "rewards/rejected": -6.2170915603637695, "step": 6836 }, { "epoch": 1.51, "learning_rate": 9.897800951210741e-06, "logits/chosen": -1.7273234128952026, "logits/rejected": -1.744710087776184, "logps/chosen": -106.41949462890625, "logps/rejected": -151.44384765625, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -1.61095130443573, "rewards/margins": 3.3337225914001465, "rewards/rejected": -4.944674015045166, "step": 6837 }, { "epoch": 1.51, "learning_rate": 9.897440107470463e-06, "logits/chosen": -1.712917685508728, "logits/rejected": -1.8432142734527588, "logps/chosen": -145.29904174804688, "logps/rejected": -113.4365005493164, "loss": 0.3477, "rewards/accuracies": 1.0, "rewards/chosen": -2.3635125160217285, "rewards/margins": 6.053661823272705, "rewards/rejected": -8.417174339294434, "step": 6838 }, { "epoch": 1.51, "learning_rate": 9.897078634420905e-06, "logits/chosen": -1.7059135437011719, "logits/rejected": -1.6698495149612427, "logps/chosen": -143.81423950195312, "logps/rejected": -173.29763793945312, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": -3.1061394214630127, "rewards/margins": 2.5838844776153564, "rewards/rejected": -5.690023899078369, "step": 6839 }, { "epoch": 1.51, "learning_rate": 9.896716532108515e-06, "logits/chosen": -2.0272490978240967, "logits/rejected": -1.810310959815979, "logps/chosen": -104.45848083496094, "logps/rejected": -398.677490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6339134573936462, "rewards/margins": 11.410508155822754, "rewards/rejected": -12.044421195983887, "step": 6840 }, { "epoch": 1.51, "learning_rate": 9.896353800579823e-06, "logits/chosen": -1.876668095588684, "logits/rejected": -1.9239046573638916, "logps/chosen": -163.40298461914062, "logps/rejected": -101.67874908447266, "loss": 0.2944, "rewards/accuracies": 1.0, "rewards/chosen": -3.192247152328491, "rewards/margins": 0.2208259105682373, "rewards/rejected": -3.4130730628967285, "step": 6841 }, { "epoch": 1.51, "learning_rate": 9.895990439881436e-06, "logits/chosen": -1.647372841835022, "logits/rejected": -1.5965092182159424, "logps/chosen": -121.03376770019531, "logps/rejected": -229.83218383789062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9896209836006165, "rewards/margins": 12.268318176269531, "rewards/rejected": -13.257939338684082, "step": 6842 }, { "epoch": 1.51, "learning_rate": 9.895626450060047e-06, "logits/chosen": -1.5749354362487793, "logits/rejected": -1.528456449508667, "logps/chosen": -179.70419311523438, "logps/rejected": -263.6354064941406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4516403377056122, "rewards/margins": 10.587071418762207, "rewards/rejected": -11.038711547851562, "step": 6843 }, { "epoch": 1.51, "learning_rate": 9.89526183116243e-06, "logits/chosen": -1.52950918674469, "logits/rejected": -1.0617083311080933, "logps/chosen": -224.44345092773438, "logps/rejected": -775.9638671875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": 0.8464996218681335, "rewards/margins": 49.641239166259766, "rewards/rejected": -48.79473876953125, "step": 6844 }, { "epoch": 1.52, "learning_rate": 9.894896583235434e-06, "logits/chosen": -1.2635096311569214, "logits/rejected": -1.284757375717163, "logps/chosen": -121.11266326904297, "logps/rejected": -180.98533630371094, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 2.7549569606781006, "rewards/margins": 9.183730125427246, "rewards/rejected": -6.428772926330566, "step": 6845 }, { "epoch": 1.52, "learning_rate": 9.894530706325994e-06, "logits/chosen": -1.451493501663208, "logits/rejected": -1.4749287366867065, "logps/chosen": -127.11979675292969, "logps/rejected": -81.29557800292969, "loss": 0.6497, "rewards/accuracies": 0.0, "rewards/chosen": -1.4128952026367188, "rewards/margins": -0.9809188842773438, "rewards/rejected": -0.431976318359375, "step": 6846 }, { "epoch": 1.52, "learning_rate": 9.894164200481124e-06, "logits/chosen": -1.8702319860458374, "logits/rejected": -1.8209196329116821, "logps/chosen": -152.80735778808594, "logps/rejected": -182.5098876953125, "loss": 0.0864, "rewards/accuracies": 1.0, "rewards/chosen": -1.4824905395507812, "rewards/margins": 1.6683640480041504, "rewards/rejected": -3.1508545875549316, "step": 6847 }, { "epoch": 1.52, "learning_rate": 9.89379706574792e-06, "logits/chosen": -1.779954433441162, "logits/rejected": -1.797534704208374, "logps/chosen": -116.2275390625, "logps/rejected": -131.90985107421875, "loss": 0.0634, "rewards/accuracies": 1.0, "rewards/chosen": -0.4020591676235199, "rewards/margins": 2.0003578662872314, "rewards/rejected": -2.402416944503784, "step": 6848 }, { "epoch": 1.52, "learning_rate": 9.893429302173558e-06, "logits/chosen": -1.4390307664871216, "logits/rejected": -1.4176514148712158, "logps/chosen": -72.60942077636719, "logps/rejected": -109.26172637939453, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.064368486404419, "rewards/margins": 3.9008209705352783, "rewards/rejected": -4.965189456939697, "step": 6849 }, { "epoch": 1.52, "learning_rate": 9.893060909805294e-06, "logits/chosen": -1.6139416694641113, "logits/rejected": -1.5539249181747437, "logps/chosen": -110.00575256347656, "logps/rejected": -225.1715850830078, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -2.670436143875122, "rewards/margins": 3.9141814708709717, "rewards/rejected": -6.584617614746094, "step": 6850 }, { "epoch": 1.52, "learning_rate": 9.892691888690466e-06, "logits/chosen": -1.4766212701797485, "logits/rejected": -1.4202560186386108, "logps/chosen": -153.65499877929688, "logps/rejected": -168.5582275390625, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": 0.6106857657432556, "rewards/margins": 4.546522617340088, "rewards/rejected": -3.9358367919921875, "step": 6851 }, { "epoch": 1.52, "learning_rate": 9.892322238876492e-06, "logits/chosen": -1.8094631433486938, "logits/rejected": -1.7743667364120483, "logps/chosen": -96.02294921875, "logps/rejected": -156.35089111328125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.1750916242599487, "rewards/margins": 4.293902397155762, "rewards/rejected": -5.468994140625, "step": 6852 }, { "epoch": 1.52, "learning_rate": 9.89195196041087e-06, "logits/chosen": -1.5816311836242676, "logits/rejected": -1.6009986400604248, "logps/chosen": -136.51657104492188, "logps/rejected": -206.2295684814453, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.09848480671644211, "rewards/margins": 7.123186111450195, "rewards/rejected": -7.024701118469238, "step": 6853 }, { "epoch": 1.52, "learning_rate": 9.891581053341182e-06, "logits/chosen": -1.6881744861602783, "logits/rejected": -1.7279795408248901, "logps/chosen": -170.20346069335938, "logps/rejected": -179.15872192382812, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.5884017944335938, "rewards/margins": 3.9407424926757812, "rewards/rejected": -4.529144287109375, "step": 6854 }, { "epoch": 1.52, "learning_rate": 9.891209517715088e-06, "logits/chosen": -1.5662238597869873, "logits/rejected": -1.5691015720367432, "logps/chosen": -97.70790100097656, "logps/rejected": -119.63420104980469, "loss": 0.1291, "rewards/accuracies": 1.0, "rewards/chosen": -2.1313881874084473, "rewards/margins": 1.224687099456787, "rewards/rejected": -3.3560752868652344, "step": 6855 }, { "epoch": 1.52, "learning_rate": 9.890837353580327e-06, "logits/chosen": -1.5462309122085571, "logits/rejected": -1.5462309122085571, "logps/chosen": -122.63477325439453, "logps/rejected": -122.63477325439453, "loss": 0.3748, "rewards/accuracies": 0.0, "rewards/chosen": -5.802765846252441, "rewards/margins": 0.0, "rewards/rejected": -5.802765846252441, "step": 6856 }, { "epoch": 1.52, "learning_rate": 9.890464560984725e-06, "logits/chosen": -1.370422601699829, "logits/rejected": -1.3705579042434692, "logps/chosen": -96.96380615234375, "logps/rejected": -131.6206817626953, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.022802710533142, "rewards/margins": 7.945645332336426, "rewards/rejected": -6.922842502593994, "step": 6857 }, { "epoch": 1.52, "learning_rate": 9.890091139976183e-06, "logits/chosen": -1.6937251091003418, "logits/rejected": -1.7483021020889282, "logps/chosen": -214.51145935058594, "logps/rejected": -198.22108459472656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.09739074856042862, "rewards/margins": 9.811285018920898, "rewards/rejected": -9.908676147460938, "step": 6858 }, { "epoch": 1.52, "learning_rate": 9.889717090602685e-06, "logits/chosen": -1.6558938026428223, "logits/rejected": -1.663855791091919, "logps/chosen": -89.39434814453125, "logps/rejected": -158.7061309814453, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.07852554321289062, "rewards/margins": 7.487567901611328, "rewards/rejected": -7.4090423583984375, "step": 6859 }, { "epoch": 1.52, "learning_rate": 9.889342412912296e-06, "logits/chosen": -1.699196219444275, "logits/rejected": -1.785076379776001, "logps/chosen": -148.994140625, "logps/rejected": -177.41134643554688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.9532387256622314, "rewards/margins": 6.010747909545898, "rewards/rejected": -8.96398639678955, "step": 6860 }, { "epoch": 1.52, "learning_rate": 9.88896710695316e-06, "logits/chosen": -1.7596389055252075, "logits/rejected": -1.754603624343872, "logps/chosen": -221.47276306152344, "logps/rejected": -249.93130493164062, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.159863233566284, "rewards/margins": 5.661211013793945, "rewards/rejected": -8.821074485778809, "step": 6861 }, { "epoch": 1.52, "learning_rate": 9.888591172773502e-06, "logits/chosen": -1.8025155067443848, "logits/rejected": -1.8574614524841309, "logps/chosen": -157.88348388671875, "logps/rejected": -117.41835021972656, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.301295518875122, "rewards/margins": 6.506648063659668, "rewards/rejected": -8.807943344116211, "step": 6862 }, { "epoch": 1.52, "learning_rate": 9.888214610421633e-06, "logits/chosen": -1.5667752027511597, "logits/rejected": -1.4674830436706543, "logps/chosen": -238.26742553710938, "logps/rejected": -290.9739074707031, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": 1.4917449951171875, "rewards/margins": 3.434659004211426, "rewards/rejected": -1.9429138898849487, "step": 6863 }, { "epoch": 1.52, "learning_rate": 9.887837419945937e-06, "logits/chosen": -1.5269790887832642, "logits/rejected": -1.4775749444961548, "logps/chosen": -86.8187255859375, "logps/rejected": -134.61566162109375, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -1.699884057044983, "rewards/margins": 3.370701789855957, "rewards/rejected": -5.07058572769165, "step": 6864 }, { "epoch": 1.52, "learning_rate": 9.887459601394881e-06, "logits/chosen": -1.3369901180267334, "logits/rejected": -1.371904969215393, "logps/chosen": -80.55376434326172, "logps/rejected": -116.05622100830078, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.14570465683937073, "rewards/margins": 9.041485786437988, "rewards/rejected": -9.187190055847168, "step": 6865 }, { "epoch": 1.52, "learning_rate": 9.887081154817015e-06, "logits/chosen": -1.8487995862960815, "logits/rejected": -1.8487995862960815, "logps/chosen": -107.54730224609375, "logps/rejected": -107.54730224609375, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -3.1108033657073975, "rewards/margins": 0.0, "rewards/rejected": -3.1108033657073975, "step": 6866 }, { "epoch": 1.52, "learning_rate": 9.88670208026097e-06, "logits/chosen": -1.9034065008163452, "logits/rejected": -1.9707685708999634, "logps/chosen": -190.2161865234375, "logps/rejected": -362.032470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.0350494384765625, "rewards/margins": 18.570425033569336, "rewards/rejected": -18.6054744720459, "step": 6867 }, { "epoch": 1.52, "learning_rate": 9.886322377775455e-06, "logits/chosen": -1.6961536407470703, "logits/rejected": -1.6969881057739258, "logps/chosen": -106.72232055664062, "logps/rejected": -143.23623657226562, "loss": 0.0483, "rewards/accuracies": 1.0, "rewards/chosen": -1.3077430725097656, "rewards/margins": 2.3415679931640625, "rewards/rejected": -3.649311065673828, "step": 6868 }, { "epoch": 1.52, "learning_rate": 9.885942047409262e-06, "logits/chosen": -1.610787272453308, "logits/rejected": -1.6489790678024292, "logps/chosen": -113.2516860961914, "logps/rejected": -150.68157958984375, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -1.459246039390564, "rewards/margins": 10.372838020324707, "rewards/rejected": -11.832083702087402, "step": 6869 }, { "epoch": 1.52, "learning_rate": 9.885561089211259e-06, "logits/chosen": -1.7354553937911987, "logits/rejected": -1.7262208461761475, "logps/chosen": -136.66412353515625, "logps/rejected": -249.9352264404297, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.211114525794983, "rewards/margins": 8.058253288269043, "rewards/rejected": -9.269368171691895, "step": 6870 }, { "epoch": 1.52, "learning_rate": 9.885179503230403e-06, "logits/chosen": -1.565116047859192, "logits/rejected": -1.5586405992507935, "logps/chosen": -114.12995910644531, "logps/rejected": -204.66357421875, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -2.421279191970825, "rewards/margins": 8.259861946105957, "rewards/rejected": -10.681140899658203, "step": 6871 }, { "epoch": 1.52, "learning_rate": 9.884797289515723e-06, "logits/chosen": -1.812088966369629, "logits/rejected": -1.8134398460388184, "logps/chosen": -69.53611755371094, "logps/rejected": -69.22429656982422, "loss": 0.1831, "rewards/accuracies": 1.0, "rewards/chosen": -5.067794322967529, "rewards/margins": 0.8160290718078613, "rewards/rejected": -5.883823394775391, "step": 6872 }, { "epoch": 1.52, "learning_rate": 9.884414448116335e-06, "logits/chosen": -1.7191100120544434, "logits/rejected": -1.6906929016113281, "logps/chosen": -110.50697326660156, "logps/rejected": -224.72642517089844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.52873694896698, "rewards/margins": 8.168787956237793, "rewards/rejected": -9.697525024414062, "step": 6873 }, { "epoch": 1.52, "learning_rate": 9.88403097908143e-06, "logits/chosen": -1.4996585845947266, "logits/rejected": -1.4797542095184326, "logps/chosen": -132.4499053955078, "logps/rejected": -176.89727783203125, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": 0.2998977601528168, "rewards/margins": 4.460497856140137, "rewards/rejected": -4.160600185394287, "step": 6874 }, { "epoch": 1.52, "learning_rate": 9.883646882460287e-06, "logits/chosen": -1.2269492149353027, "logits/rejected": -1.266019344329834, "logps/chosen": -144.9819793701172, "logps/rejected": -189.11634826660156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.3929550647735596, "rewards/margins": 8.555727005004883, "rewards/rejected": -7.162771701812744, "step": 6875 }, { "epoch": 1.52, "learning_rate": 9.883262158302259e-06, "logits/chosen": -1.563846230506897, "logits/rejected": -1.5697077512741089, "logps/chosen": -179.54342651367188, "logps/rejected": -165.83236694335938, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.4374268054962158, "rewards/margins": 5.26038932800293, "rewards/rejected": -6.697816371917725, "step": 6876 }, { "epoch": 1.52, "learning_rate": 9.882876806656783e-06, "logits/chosen": -1.6465017795562744, "logits/rejected": -1.3384759426116943, "logps/chosen": -143.75279235839844, "logps/rejected": -438.64703369140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.882171630859375, "rewards/margins": 22.87906265258789, "rewards/rejected": -24.761234283447266, "step": 6877 }, { "epoch": 1.52, "learning_rate": 9.882490827573375e-06, "logits/chosen": -1.64230477809906, "logits/rejected": -1.5895819664001465, "logps/chosen": -69.24673461914062, "logps/rejected": -188.15771484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9886978268623352, "rewards/margins": 10.362592697143555, "rewards/rejected": -11.351290702819824, "step": 6878 }, { "epoch": 1.52, "learning_rate": 9.882104221101634e-06, "logits/chosen": -1.6559339761734009, "logits/rejected": -1.6704537868499756, "logps/chosen": -95.09561157226562, "logps/rejected": -140.35182189941406, "loss": 0.1405, "rewards/accuracies": 1.0, "rewards/chosen": -1.944649577140808, "rewards/margins": 1.1266952753067017, "rewards/rejected": -3.0713448524475098, "step": 6879 }, { "epoch": 1.52, "learning_rate": 9.881716987291235e-06, "logits/chosen": -1.451059341430664, "logits/rejected": -1.4267383813858032, "logps/chosen": -160.59994506835938, "logps/rejected": -214.69940185546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.1565628051757812, "rewards/margins": 7.510871887207031, "rewards/rejected": -9.667434692382812, "step": 6880 }, { "epoch": 1.52, "learning_rate": 9.88132912619194e-06, "logits/chosen": -1.6288658380508423, "logits/rejected": -1.6064627170562744, "logps/chosen": -94.4697265625, "logps/rejected": -119.88908386230469, "loss": 0.0953, "rewards/accuracies": 1.0, "rewards/chosen": -1.707525610923767, "rewards/margins": 1.5839043855667114, "rewards/rejected": -3.2914299964904785, "step": 6881 }, { "epoch": 1.52, "learning_rate": 9.880940637853585e-06, "logits/chosen": -1.4144165515899658, "logits/rejected": -1.4123355150222778, "logps/chosen": -101.24832916259766, "logps/rejected": -155.04776000976562, "loss": 0.1357, "rewards/accuracies": 1.0, "rewards/chosen": -1.7990318536758423, "rewards/margins": 1.1858574151992798, "rewards/rejected": -2.984889268875122, "step": 6882 }, { "epoch": 1.52, "learning_rate": 9.880551522326093e-06, "logits/chosen": -1.3006445169448853, "logits/rejected": -1.2877477407455444, "logps/chosen": -164.36044311523438, "logps/rejected": -277.81329345703125, "loss": 0.1388, "rewards/accuracies": 1.0, "rewards/chosen": -8.40267562866211, "rewards/margins": 9.329486846923828, "rewards/rejected": -17.732162475585938, "step": 6883 }, { "epoch": 1.52, "learning_rate": 9.880161779659463e-06, "logits/chosen": -1.9424638748168945, "logits/rejected": -1.9839507341384888, "logps/chosen": -163.44931030273438, "logps/rejected": -141.23397827148438, "loss": 0.0735, "rewards/accuracies": 1.0, "rewards/chosen": -8.661627769470215, "rewards/margins": 1.8422174453735352, "rewards/rejected": -10.50384521484375, "step": 6884 }, { "epoch": 1.52, "learning_rate": 9.879771409903775e-06, "logits/chosen": -1.33699631690979, "logits/rejected": -1.4017335176467896, "logps/chosen": -220.72979736328125, "logps/rejected": -162.72122192382812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.809472680091858, "rewards/margins": 9.124581336975098, "rewards/rejected": -10.934054374694824, "step": 6885 }, { "epoch": 1.52, "learning_rate": 9.879380413109193e-06, "logits/chosen": -1.992045521736145, "logits/rejected": -2.0923404693603516, "logps/chosen": -206.96240234375, "logps/rejected": -158.85040283203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.3586975038051605, "rewards/margins": 7.71209716796875, "rewards/rejected": -7.353399753570557, "step": 6886 }, { "epoch": 1.52, "learning_rate": 9.878988789325955e-06, "logits/chosen": -1.9933974742889404, "logits/rejected": -1.9707072973251343, "logps/chosen": -109.19361877441406, "logps/rejected": -201.42233276367188, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.362457275390625, "rewards/margins": 6.979068756103516, "rewards/rejected": -8.34152603149414, "step": 6887 }, { "epoch": 1.52, "learning_rate": 9.878596538604388e-06, "logits/chosen": -1.55612313747406, "logits/rejected": -1.5783190727233887, "logps/chosen": -115.73857879638672, "logps/rejected": -153.81358337402344, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -1.4011802673339844, "rewards/margins": 5.793323040008545, "rewards/rejected": -7.194503307342529, "step": 6888 }, { "epoch": 1.52, "learning_rate": 9.878203660994894e-06, "logits/chosen": -1.917067527770996, "logits/rejected": -1.821276307106018, "logps/chosen": -98.6548080444336, "logps/rejected": -256.4983825683594, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.4501854181289673, "rewards/margins": 6.549358367919922, "rewards/rejected": -7.9995436668396, "step": 6889 }, { "epoch": 1.53, "learning_rate": 9.877810156547956e-06, "logits/chosen": -1.420723557472229, "logits/rejected": -1.5027743577957153, "logps/chosen": -203.03863525390625, "logps/rejected": -150.81033325195312, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -4.351284980773926, "rewards/margins": 5.400323867797852, "rewards/rejected": -9.751608848571777, "step": 6890 }, { "epoch": 1.53, "learning_rate": 9.877416025314139e-06, "logits/chosen": -1.5616509914398193, "logits/rejected": -1.5819861888885498, "logps/chosen": -156.2388916015625, "logps/rejected": -184.14276123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.3057083189487457, "rewards/margins": 10.324178695678711, "rewards/rejected": -10.018470764160156, "step": 6891 }, { "epoch": 1.53, "learning_rate": 9.877021267344087e-06, "logits/chosen": -1.7323532104492188, "logits/rejected": -1.7641260623931885, "logps/chosen": -180.58465576171875, "logps/rejected": -114.9974365234375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.4473694562911987, "rewards/margins": 6.452410697937012, "rewards/rejected": -7.8997802734375, "step": 6892 }, { "epoch": 1.53, "learning_rate": 9.876625882688526e-06, "logits/chosen": -1.427589774131775, "logits/rejected": -1.4081227779388428, "logps/chosen": -70.46009063720703, "logps/rejected": -118.24372863769531, "loss": 0.0573, "rewards/accuracies": 1.0, "rewards/chosen": -4.521705150604248, "rewards/margins": 2.1210074424743652, "rewards/rejected": -6.642712593078613, "step": 6893 }, { "epoch": 1.53, "learning_rate": 9.876229871398263e-06, "logits/chosen": -1.7924959659576416, "logits/rejected": -1.8895660638809204, "logps/chosen": -259.4896545410156, "logps/rejected": -184.8070068359375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.72039794921875, "rewards/margins": 13.845852851867676, "rewards/rejected": -13.125454902648926, "step": 6894 }, { "epoch": 1.53, "learning_rate": 9.875833233524183e-06, "logits/chosen": -1.876074194908142, "logits/rejected": -1.941512107849121, "logps/chosen": -116.78939819335938, "logps/rejected": -173.0383758544922, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -0.8226715326309204, "rewards/margins": 11.688374519348145, "rewards/rejected": -12.511046409606934, "step": 6895 }, { "epoch": 1.53, "learning_rate": 9.875435969117254e-06, "logits/chosen": -1.5136209726333618, "logits/rejected": -1.2897721529006958, "logps/chosen": -109.50023651123047, "logps/rejected": -374.55743408203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.8371543884277344, "rewards/margins": 8.448416709899902, "rewards/rejected": -11.285571098327637, "step": 6896 }, { "epoch": 1.53, "learning_rate": 9.875038078228522e-06, "logits/chosen": -1.6830748319625854, "logits/rejected": -0.8994302749633789, "logps/chosen": -184.88357543945312, "logps/rejected": -791.6163330078125, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": 4.09462308883667, "rewards/margins": 72.25101470947266, "rewards/rejected": -68.1563949584961, "step": 6897 }, { "epoch": 1.53, "learning_rate": 9.874639560909118e-06, "logits/chosen": -1.6303174495697021, "logits/rejected": -1.6303174495697021, "logps/chosen": -103.98332977294922, "logps/rejected": -103.98332977294922, "loss": 0.3478, "rewards/accuracies": 0.0, "rewards/chosen": -0.46910402178764343, "rewards/margins": 0.0, "rewards/rejected": -0.46910402178764343, "step": 6898 }, { "epoch": 1.53, "learning_rate": 9.87424041721025e-06, "logits/chosen": -1.480700969696045, "logits/rejected": -1.4307395219802856, "logps/chosen": -112.74531555175781, "logps/rejected": -192.2720184326172, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8496368527412415, "rewards/margins": 6.155264377593994, "rewards/rejected": -7.00490140914917, "step": 6899 }, { "epoch": 1.53, "learning_rate": 9.873840647183204e-06, "logits/chosen": -1.1353073120117188, "logits/rejected": -1.2237844467163086, "logps/chosen": -138.9139404296875, "logps/rejected": -88.4237060546875, "loss": 0.8253, "rewards/accuracies": 0.0, "rewards/chosen": -7.871829509735107, "rewards/margins": -1.4373626708984375, "rewards/rejected": -6.43446683883667, "step": 6900 }, { "epoch": 1.53, "learning_rate": 9.87344025087935e-06, "logits/chosen": -1.7543302774429321, "logits/rejected": -1.5831362009048462, "logps/chosen": -82.49574279785156, "logps/rejected": -260.7958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.832502007484436, "rewards/margins": 12.24773120880127, "rewards/rejected": -13.080233573913574, "step": 6901 }, { "epoch": 1.53, "learning_rate": 9.87303922835014e-06, "logits/chosen": -1.687820315361023, "logits/rejected": -1.6136122941970825, "logps/chosen": -268.13275146484375, "logps/rejected": -401.22015380859375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.4033203125, "rewards/margins": 6.991476535797119, "rewards/rejected": -6.588156223297119, "step": 6902 }, { "epoch": 1.53, "learning_rate": 9.872637579647105e-06, "logits/chosen": -1.6807384490966797, "logits/rejected": -1.6731208562850952, "logps/chosen": -128.19967651367188, "logps/rejected": -143.0401611328125, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -3.282257080078125, "rewards/margins": 2.9893136024475098, "rewards/rejected": -6.271570682525635, "step": 6903 }, { "epoch": 1.53, "learning_rate": 9.872235304821853e-06, "logits/chosen": -1.3849146366119385, "logits/rejected": -1.4822360277175903, "logps/chosen": -145.88491821289062, "logps/rejected": -122.93655395507812, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.6854965686798096, "rewards/margins": 7.6376848220825195, "rewards/rejected": -10.32318115234375, "step": 6904 }, { "epoch": 1.53, "learning_rate": 9.871832403926077e-06, "logits/chosen": -1.405306339263916, "logits/rejected": -1.3601499795913696, "logps/chosen": -169.52346801757812, "logps/rejected": -295.1915588378906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.022114634513855, "rewards/margins": 12.93989372253418, "rewards/rejected": -13.962008476257324, "step": 6905 }, { "epoch": 1.53, "learning_rate": 9.871428877011549e-06, "logits/chosen": -1.9792555570602417, "logits/rejected": -1.9486823081970215, "logps/chosen": -94.8692626953125, "logps/rejected": -148.13937377929688, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -1.6744521856307983, "rewards/margins": 3.032766819000244, "rewards/rejected": -4.707219123840332, "step": 6906 }, { "epoch": 1.53, "learning_rate": 9.87102472413012e-06, "logits/chosen": -1.6928584575653076, "logits/rejected": -1.6950340270996094, "logps/chosen": -78.29105377197266, "logps/rejected": -91.72467803955078, "loss": 0.1339, "rewards/accuracies": 1.0, "rewards/chosen": -2.7643356323242188, "rewards/margins": 1.1803905963897705, "rewards/rejected": -3.9447262287139893, "step": 6907 }, { "epoch": 1.53, "learning_rate": 9.870619945333727e-06, "logits/chosen": -1.520738124847412, "logits/rejected": -1.6085237264633179, "logps/chosen": -163.68504333496094, "logps/rejected": -121.04497528076172, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.7122451663017273, "rewards/margins": 5.1389336585998535, "rewards/rejected": -5.8511786460876465, "step": 6908 }, { "epoch": 1.53, "learning_rate": 9.870214540674377e-06, "logits/chosen": -1.5434672832489014, "logits/rejected": -1.4864057302474976, "logps/chosen": -167.869384765625, "logps/rejected": -248.3387908935547, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.8236907720565796, "rewards/margins": 7.983646869659424, "rewards/rejected": -9.807337760925293, "step": 6909 }, { "epoch": 1.53, "learning_rate": 9.869808510204165e-06, "logits/chosen": -1.5273760557174683, "logits/rejected": -1.5567412376403809, "logps/chosen": -172.6464385986328, "logps/rejected": -217.3315887451172, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.04134368896484375, "rewards/margins": 13.215242385864258, "rewards/rejected": -13.173898696899414, "step": 6910 }, { "epoch": 1.53, "learning_rate": 9.869401853975268e-06, "logits/chosen": -1.3589030504226685, "logits/rejected": -1.3973121643066406, "logps/chosen": -176.55960083007812, "logps/rejected": -170.00741577148438, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 0.4427734315395355, "rewards/margins": 3.896052598953247, "rewards/rejected": -3.4532792568206787, "step": 6911 }, { "epoch": 1.53, "learning_rate": 9.868994572039939e-06, "logits/chosen": -1.7745392322540283, "logits/rejected": -1.8073303699493408, "logps/chosen": -187.10772705078125, "logps/rejected": -143.0003204345703, "loss": 0.3346, "rewards/accuracies": 1.0, "rewards/chosen": -4.567138671875, "rewards/margins": 0.08764505386352539, "rewards/rejected": -4.654783725738525, "step": 6912 }, { "epoch": 1.53, "learning_rate": 9.86858666445051e-06, "logits/chosen": -1.3314366340637207, "logits/rejected": -1.3314366340637207, "logps/chosen": -149.71697998046875, "logps/rejected": -149.71697998046875, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -9.154311180114746, "rewards/margins": 0.0, "rewards/rejected": -9.154311180114746, "step": 6913 }, { "epoch": 1.53, "learning_rate": 9.8681781312594e-06, "logits/chosen": -1.4523142576217651, "logits/rejected": -1.4598314762115479, "logps/chosen": -214.1179962158203, "logps/rejected": -207.25912475585938, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.03396148607134819, "rewards/margins": 9.290881156921387, "rewards/rejected": -9.256919860839844, "step": 6914 }, { "epoch": 1.53, "learning_rate": 9.867768972519101e-06, "logits/chosen": -1.5200260877609253, "logits/rejected": -1.521450400352478, "logps/chosen": -87.34317016601562, "logps/rejected": -123.87458038330078, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.17534255981445312, "rewards/margins": 8.869009017944336, "rewards/rejected": -9.044351577758789, "step": 6915 }, { "epoch": 1.53, "learning_rate": 9.867359188282193e-06, "logits/chosen": -1.6179591417312622, "logits/rejected": -1.6060434579849243, "logps/chosen": -133.9530029296875, "logps/rejected": -174.34754943847656, "loss": 0.0349, "rewards/accuracies": 1.0, "rewards/chosen": -0.7522979974746704, "rewards/margins": 2.690281867980957, "rewards/rejected": -3.442579746246338, "step": 6916 }, { "epoch": 1.53, "learning_rate": 9.86694877860133e-06, "logits/chosen": -1.2726670503616333, "logits/rejected": -1.1561843156814575, "logps/chosen": -205.90936279296875, "logps/rejected": -340.5346374511719, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": 0.9851547479629517, "rewards/margins": 11.382283210754395, "rewards/rejected": -10.397128105163574, "step": 6917 }, { "epoch": 1.53, "learning_rate": 9.866537743529247e-06, "logits/chosen": -1.4671571254730225, "logits/rejected": -1.2473257780075073, "logps/chosen": -162.99127197265625, "logps/rejected": -377.82171630859375, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 0.34976503252983093, "rewards/margins": 3.991403341293335, "rewards/rejected": -3.6416382789611816, "step": 6918 }, { "epoch": 1.53, "learning_rate": 9.866126083118765e-06, "logits/chosen": -1.1176635026931763, "logits/rejected": -1.145231008529663, "logps/chosen": -182.56564331054688, "logps/rejected": -253.64926147460938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.3750671446323395, "rewards/margins": 11.555807113647461, "rewards/rejected": -11.180740356445312, "step": 6919 }, { "epoch": 1.53, "learning_rate": 9.865713797422778e-06, "logits/chosen": -1.691819667816162, "logits/rejected": -1.6082615852355957, "logps/chosen": -225.42063903808594, "logps/rejected": -425.13714599609375, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.20480652153491974, "rewards/margins": 22.114442825317383, "rewards/rejected": -22.319250106811523, "step": 6920 }, { "epoch": 1.53, "learning_rate": 9.865300886494264e-06, "logits/chosen": -1.3896808624267578, "logits/rejected": -1.6157021522521973, "logps/chosen": -224.44081115722656, "logps/rejected": -148.81698608398438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6847305297851562, "rewards/margins": 9.991106033325195, "rewards/rejected": -10.675836563110352, "step": 6921 }, { "epoch": 1.53, "learning_rate": 9.864887350386284e-06, "logits/chosen": -1.7555965185165405, "logits/rejected": -1.7555965185165405, "logps/chosen": -232.64801025390625, "logps/rejected": -232.64801025390625, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -11.717787742614746, "rewards/margins": 0.0, "rewards/rejected": -11.717787742614746, "step": 6922 }, { "epoch": 1.53, "learning_rate": 9.864473189151972e-06, "logits/chosen": -1.3281806707382202, "logits/rejected": -1.3117295503616333, "logps/chosen": -79.23075866699219, "logps/rejected": -122.05824279785156, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -0.5469299554824829, "rewards/margins": 3.437974452972412, "rewards/rejected": -3.9849045276641846, "step": 6923 }, { "epoch": 1.53, "learning_rate": 9.864058402844553e-06, "logits/chosen": -1.2649731636047363, "logits/rejected": -1.274713397026062, "logps/chosen": -208.8082275390625, "logps/rejected": -182.81182861328125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 0.16106872260570526, "rewards/margins": 4.444220066070557, "rewards/rejected": -4.283151149749756, "step": 6924 }, { "epoch": 1.53, "learning_rate": 9.863642991517317e-06, "logits/chosen": -1.295601487159729, "logits/rejected": -1.270201563835144, "logps/chosen": -79.7922592163086, "logps/rejected": -86.9621810913086, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/chosen": -2.1369903087615967, "rewards/margins": 2.4236643314361572, "rewards/rejected": -4.560654640197754, "step": 6925 }, { "epoch": 1.53, "learning_rate": 9.863226955223653e-06, "logits/chosen": -1.8038389682769775, "logits/rejected": -1.7507835626602173, "logps/chosen": -104.97667694091797, "logps/rejected": -160.64442443847656, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1942062377929688, "rewards/margins": 6.154486179351807, "rewards/rejected": -7.348692417144775, "step": 6926 }, { "epoch": 1.53, "learning_rate": 9.862810294017014e-06, "logits/chosen": -1.3833791017532349, "logits/rejected": -1.094202995300293, "logps/chosen": -237.65191650390625, "logps/rejected": -726.5787353515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.8694626092910767, "rewards/margins": 55.206478118896484, "rewards/rejected": -57.0759391784668, "step": 6927 }, { "epoch": 1.53, "learning_rate": 9.86239300795094e-06, "logits/chosen": -1.549008846282959, "logits/rejected": -1.682473063468933, "logps/chosen": -185.57577514648438, "logps/rejected": -154.66685485839844, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.11564941704273224, "rewards/margins": 6.548883438110352, "rewards/rejected": -6.664532661437988, "step": 6928 }, { "epoch": 1.53, "learning_rate": 9.861975097079057e-06, "logits/chosen": -1.6851822137832642, "logits/rejected": -1.724583625793457, "logps/chosen": -108.16409301757812, "logps/rejected": -116.34856414794922, "loss": 0.3469, "rewards/accuracies": 1.0, "rewards/chosen": 0.068090058863163, "rewards/margins": 7.220757484436035, "rewards/rejected": -7.15266752243042, "step": 6929 }, { "epoch": 1.53, "learning_rate": 9.861556561455061e-06, "logits/chosen": -1.3641605377197266, "logits/rejected": -0.7277435660362244, "logps/chosen": -207.7684326171875, "logps/rejected": -844.0428466796875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 1.291900634765625, "rewards/margins": 74.58464050292969, "rewards/rejected": -73.29273986816406, "step": 6930 }, { "epoch": 1.53, "learning_rate": 9.861137401132733e-06, "logits/chosen": -1.3920124769210815, "logits/rejected": -1.3759536743164062, "logps/chosen": -230.17172241210938, "logps/rejected": -299.49554443359375, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -1.9855560064315796, "rewards/margins": 3.6679792404174805, "rewards/rejected": -5.65353536605835, "step": 6931 }, { "epoch": 1.53, "learning_rate": 9.860717616165934e-06, "logits/chosen": -1.7415825128555298, "logits/rejected": -1.6681535243988037, "logps/chosen": -120.71827697753906, "logps/rejected": -168.52957153320312, "loss": 0.4558, "rewards/accuracies": 0.0, "rewards/chosen": -3.9909684658050537, "rewards/margins": -0.3976304531097412, "rewards/rejected": -3.5933380126953125, "step": 6932 }, { "epoch": 1.53, "learning_rate": 9.860297206608606e-06, "logits/chosen": -1.704582691192627, "logits/rejected": -1.8419678211212158, "logps/chosen": -188.55551147460938, "logps/rejected": -161.2732391357422, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 4.407476902008057, "rewards/margins": 12.246467590332031, "rewards/rejected": -7.838990688323975, "step": 6933 }, { "epoch": 1.53, "learning_rate": 9.859876172514773e-06, "logits/chosen": -1.6183737516403198, "logits/rejected": -1.6008963584899902, "logps/chosen": -205.40359497070312, "logps/rejected": -304.64593505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.08324890583753586, "rewards/margins": 19.1119327545166, "rewards/rejected": -19.195180892944336, "step": 6934 }, { "epoch": 1.53, "learning_rate": 9.859454513938534e-06, "logits/chosen": -1.30626380443573, "logits/rejected": -1.2216529846191406, "logps/chosen": -212.859375, "logps/rejected": -356.7967529296875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 2.358302354812622, "rewards/margins": 12.090662956237793, "rewards/rejected": -9.73236083984375, "step": 6935 }, { "epoch": 1.54, "learning_rate": 9.859032230934071e-06, "logits/chosen": -1.7951027154922485, "logits/rejected": -1.9429372549057007, "logps/chosen": -225.8495635986328, "logps/rejected": -138.08001708984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.8225113153457642, "rewards/margins": 11.068546295166016, "rewards/rejected": -10.246034622192383, "step": 6936 }, { "epoch": 1.54, "learning_rate": 9.858609323555646e-06, "logits/chosen": -1.6611653566360474, "logits/rejected": -1.674350619316101, "logps/chosen": -224.75021362304688, "logps/rejected": -309.9112243652344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5090744495391846, "rewards/margins": 8.848945617675781, "rewards/rejected": -10.358019828796387, "step": 6937 }, { "epoch": 1.54, "learning_rate": 9.858185791857604e-06, "logits/chosen": -1.498810887336731, "logits/rejected": -1.5411332845687866, "logps/chosen": -195.50155639648438, "logps/rejected": -157.33126831054688, "loss": 0.0785, "rewards/accuracies": 1.0, "rewards/chosen": -8.77965259552002, "rewards/margins": 2.1312646865844727, "rewards/rejected": -10.910917282104492, "step": 6938 }, { "epoch": 1.54, "learning_rate": 9.857761635894367e-06, "logits/chosen": -1.6885257959365845, "logits/rejected": -1.6756545305252075, "logps/chosen": -85.15353393554688, "logps/rejected": -102.19041442871094, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -0.9584717154502869, "rewards/margins": 3.3383126258850098, "rewards/rejected": -4.296784400939941, "step": 6939 }, { "epoch": 1.54, "learning_rate": 9.857336855720439e-06, "logits/chosen": -1.8625789880752563, "logits/rejected": -1.78947114944458, "logps/chosen": -122.14060974121094, "logps/rejected": -264.93365478515625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.074256896972656, "rewards/margins": 6.998030662536621, "rewards/rejected": -11.072287559509277, "step": 6940 }, { "epoch": 1.54, "learning_rate": 9.856911451390399e-06, "logits/chosen": -1.4773212671279907, "logits/rejected": -1.452830195426941, "logps/chosen": -101.81112670898438, "logps/rejected": -152.00698852539062, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4055542051792145, "rewards/margins": 7.940026760101318, "rewards/rejected": -8.3455810546875, "step": 6941 }, { "epoch": 1.54, "learning_rate": 9.856485422958913e-06, "logits/chosen": -1.5928865671157837, "logits/rejected": -1.6889400482177734, "logps/chosen": -130.45657348632812, "logps/rejected": -219.537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.4035675525665283, "rewards/margins": 13.57050895690918, "rewards/rejected": -12.16694164276123, "step": 6942 }, { "epoch": 1.54, "learning_rate": 9.856058770480726e-06, "logits/chosen": -1.7474836111068726, "logits/rejected": -1.6949859857559204, "logps/chosen": -99.17127990722656, "logps/rejected": -224.85238647460938, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1492935419082642, "rewards/margins": 10.18868637084961, "rewards/rejected": -11.337980270385742, "step": 6943 }, { "epoch": 1.54, "learning_rate": 9.855631494010661e-06, "logits/chosen": -1.7337007522583008, "logits/rejected": -1.811968207359314, "logps/chosen": -224.53607177734375, "logps/rejected": -229.8807373046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9643585085868835, "rewards/margins": 8.47263240814209, "rewards/rejected": -9.436990737915039, "step": 6944 }, { "epoch": 1.54, "learning_rate": 9.855203593603622e-06, "logits/chosen": -1.6324899196624756, "logits/rejected": -1.5897400379180908, "logps/chosen": -114.62081909179688, "logps/rejected": -240.71524047851562, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.7499771118164062, "rewards/margins": 6.617122173309326, "rewards/rejected": -5.86714506149292, "step": 6945 }, { "epoch": 1.54, "learning_rate": 9.85477506931459e-06, "logits/chosen": -1.585444688796997, "logits/rejected": -1.4320670366287231, "logps/chosen": -172.6673583984375, "logps/rejected": -349.52978515625, "loss": 0.2374, "rewards/accuracies": 1.0, "rewards/chosen": -1.356085181236267, "rewards/margins": 10.76145076751709, "rewards/rejected": -12.117535591125488, "step": 6946 }, { "epoch": 1.54, "learning_rate": 9.854345921198637e-06, "logits/chosen": -1.7978529930114746, "logits/rejected": -1.8264849185943604, "logps/chosen": -137.9034423828125, "logps/rejected": -285.1973571777344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.4031295776367188, "rewards/margins": 13.002113342285156, "rewards/rejected": -11.598983764648438, "step": 6947 }, { "epoch": 1.54, "learning_rate": 9.853916149310898e-06, "logits/chosen": -1.5447968244552612, "logits/rejected": -1.5705136060714722, "logps/chosen": -133.5606689453125, "logps/rejected": -94.90579986572266, "loss": 0.1297, "rewards/accuracies": 1.0, "rewards/chosen": -2.5435454845428467, "rewards/margins": 1.2167789936065674, "rewards/rejected": -3.760324478149414, "step": 6948 }, { "epoch": 1.54, "learning_rate": 9.853485753706603e-06, "logits/chosen": -1.5815560817718506, "logits/rejected": -1.6104707717895508, "logps/chosen": -172.44796752929688, "logps/rejected": -149.01499938964844, "loss": 1.0323, "rewards/accuracies": 0.0, "rewards/chosen": -7.679141521453857, "rewards/margins": -1.9289612770080566, "rewards/rejected": -5.750180244445801, "step": 6949 }, { "epoch": 1.54, "learning_rate": 9.853054734441059e-06, "logits/chosen": -1.9001479148864746, "logits/rejected": -1.9001479148864746, "logps/chosen": -250.7119903564453, "logps/rejected": -250.7119903564453, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -10.618408203125, "rewards/margins": 0.0, "rewards/rejected": -10.618408203125, "step": 6950 }, { "epoch": 1.54, "learning_rate": 9.852623091569646e-06, "logits/chosen": -1.400031566619873, "logits/rejected": -1.3983168601989746, "logps/chosen": -196.43014526367188, "logps/rejected": -207.7520294189453, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 2.7058563232421875, "rewards/margins": 7.881104946136475, "rewards/rejected": -5.175248622894287, "step": 6951 }, { "epoch": 1.54, "learning_rate": 9.852190825147831e-06, "logits/chosen": -1.9820560216903687, "logits/rejected": -1.966875433921814, "logps/chosen": -163.27264404296875, "logps/rejected": -249.58348083496094, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -6.299864292144775, "rewards/margins": 2.946589946746826, "rewards/rejected": -9.246454238891602, "step": 6952 }, { "epoch": 1.54, "learning_rate": 9.85175793523116e-06, "logits/chosen": -1.2683429718017578, "logits/rejected": -1.1900782585144043, "logps/chosen": -76.4460678100586, "logps/rejected": -207.37582397460938, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.2391090393066406, "rewards/margins": 7.881922721862793, "rewards/rejected": -9.121031761169434, "step": 6953 }, { "epoch": 1.54, "learning_rate": 9.851324421875256e-06, "logits/chosen": -1.6193897724151611, "logits/rejected": -1.5767176151275635, "logps/chosen": -166.2243194580078, "logps/rejected": -218.50575256347656, "loss": 0.1241, "rewards/accuracies": 1.0, "rewards/chosen": -7.052281856536865, "rewards/margins": 2.6187186241149902, "rewards/rejected": -9.671000480651855, "step": 6954 }, { "epoch": 1.54, "learning_rate": 9.850890285135829e-06, "logits/chosen": -1.2239576578140259, "logits/rejected": -1.13618803024292, "logps/chosen": -163.117431640625, "logps/rejected": -306.47210693359375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 1.0503265857696533, "rewards/margins": 13.75767993927002, "rewards/rejected": -12.707353591918945, "step": 6955 }, { "epoch": 1.54, "learning_rate": 9.850455525068658e-06, "logits/chosen": -1.4923022985458374, "logits/rejected": -1.2765291929244995, "logps/chosen": -105.11750793457031, "logps/rejected": -345.2920227050781, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -1.3330856561660767, "rewards/margins": 10.832280158996582, "rewards/rejected": -12.165366172790527, "step": 6956 }, { "epoch": 1.54, "learning_rate": 9.850020141729615e-06, "logits/chosen": -1.7937391996383667, "logits/rejected": -1.7178254127502441, "logps/chosen": -92.78010559082031, "logps/rejected": -224.35398864746094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.7295334339141846, "rewards/margins": 8.909563064575195, "rewards/rejected": -10.6390962600708, "step": 6957 }, { "epoch": 1.54, "learning_rate": 9.849584135174642e-06, "logits/chosen": -1.8271574974060059, "logits/rejected": -1.9197885990142822, "logps/chosen": -171.14553833007812, "logps/rejected": -182.06130981445312, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.21922913193702698, "rewards/margins": 7.153451442718506, "rewards/rejected": -7.3726806640625, "step": 6958 }, { "epoch": 1.54, "learning_rate": 9.849147505459766e-06, "logits/chosen": -1.5295339822769165, "logits/rejected": -1.5623679161071777, "logps/chosen": -80.07659912109375, "logps/rejected": -88.59696197509766, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.044755697250366, "rewards/margins": 4.897747993469238, "rewards/rejected": -6.942503452301025, "step": 6959 }, { "epoch": 1.54, "learning_rate": 9.848710252641092e-06, "logits/chosen": -1.8119491338729858, "logits/rejected": -1.821997046470642, "logps/chosen": -83.12503051757812, "logps/rejected": -115.0592041015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.03557586669921875, "rewards/margins": 7.300399303436279, "rewards/rejected": -7.335975170135498, "step": 6960 }, { "epoch": 1.54, "learning_rate": 9.848272376774807e-06, "logits/chosen": -1.8271600008010864, "logits/rejected": -1.8160535097122192, "logps/chosen": -156.39706420898438, "logps/rejected": -325.5624694824219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.369944095611572, "rewards/margins": 13.888376235961914, "rewards/rejected": -18.258319854736328, "step": 6961 }, { "epoch": 1.54, "learning_rate": 9.847833877917177e-06, "logits/chosen": -1.4137526750564575, "logits/rejected": -1.385567545890808, "logps/chosen": -95.89200592041016, "logps/rejected": -119.64607238769531, "loss": 0.2286, "rewards/accuracies": 1.0, "rewards/chosen": -2.3686585426330566, "rewards/margins": 0.5456154346466064, "rewards/rejected": -2.914273977279663, "step": 6962 }, { "epoch": 1.54, "learning_rate": 9.847394756124547e-06, "logits/chosen": -1.8513492345809937, "logits/rejected": -1.9470332860946655, "logps/chosen": -140.57443237304688, "logps/rejected": -113.00527954101562, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -4.105688571929932, "rewards/margins": 4.8829026222229, "rewards/rejected": -8.988591194152832, "step": 6963 }, { "epoch": 1.54, "learning_rate": 9.846955011453343e-06, "logits/chosen": -1.3365789651870728, "logits/rejected": -1.5745562314987183, "logps/chosen": -230.95474243164062, "logps/rejected": -142.4517059326172, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 5.208447456359863, "rewards/margins": 12.301429748535156, "rewards/rejected": -7.092981815338135, "step": 6964 }, { "epoch": 1.54, "learning_rate": 9.846514643960072e-06, "logits/chosen": -1.770796537399292, "logits/rejected": -1.7907743453979492, "logps/chosen": -143.5523681640625, "logps/rejected": -212.24655151367188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.0455873012542725, "rewards/margins": 8.166348457336426, "rewards/rejected": -10.211935997009277, "step": 6965 }, { "epoch": 1.54, "learning_rate": 9.846073653701321e-06, "logits/chosen": -1.4984902143478394, "logits/rejected": -1.6247422695159912, "logps/chosen": -175.161376953125, "logps/rejected": -202.8103790283203, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.5915955305099487, "rewards/margins": 9.54455852508545, "rewards/rejected": -7.952963352203369, "step": 6966 }, { "epoch": 1.54, "learning_rate": 9.845632040733754e-06, "logits/chosen": -1.695772647857666, "logits/rejected": -1.7687758207321167, "logps/chosen": -210.3026580810547, "logps/rejected": -147.48025512695312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.11228485405445099, "rewards/margins": 7.30888032913208, "rewards/rejected": -7.196595668792725, "step": 6967 }, { "epoch": 1.54, "learning_rate": 9.845189805114119e-06, "logits/chosen": -1.7238632440567017, "logits/rejected": -1.7098807096481323, "logps/chosen": -78.44432830810547, "logps/rejected": -118.24261474609375, "loss": 0.0491, "rewards/accuracies": 1.0, "rewards/chosen": 0.3358566462993622, "rewards/margins": 2.2785797119140625, "rewards/rejected": -1.9427231550216675, "step": 6968 }, { "epoch": 1.54, "learning_rate": 9.844746946899241e-06, "logits/chosen": -1.5056366920471191, "logits/rejected": -1.6007161140441895, "logps/chosen": -172.57205200195312, "logps/rejected": -176.69461059570312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7819793820381165, "rewards/margins": 12.363064765930176, "rewards/rejected": -13.145044326782227, "step": 6969 }, { "epoch": 1.54, "learning_rate": 9.844303466146027e-06, "logits/chosen": -1.5609501600265503, "logits/rejected": -1.750319480895996, "logps/chosen": -111.05914306640625, "logps/rejected": -109.07636260986328, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.15053558349609375, "rewards/margins": 7.920483589172363, "rewards/rejected": -7.7699480056762695, "step": 6970 }, { "epoch": 1.54, "learning_rate": 9.843859362911463e-06, "logits/chosen": -1.574493646621704, "logits/rejected": -1.571814775466919, "logps/chosen": -113.79048156738281, "logps/rejected": -113.58380126953125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.302603244781494, "rewards/margins": 5.484170436859131, "rewards/rejected": -7.786773681640625, "step": 6971 }, { "epoch": 1.54, "learning_rate": 9.843414637252615e-06, "logits/chosen": -1.7876819372177124, "logits/rejected": -1.8127951622009277, "logps/chosen": -135.4139862060547, "logps/rejected": -136.22203063964844, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.6054520010948181, "rewards/margins": 4.042748928070068, "rewards/rejected": -4.648200988769531, "step": 6972 }, { "epoch": 1.54, "learning_rate": 9.842969289226629e-06, "logits/chosen": -1.9004731178283691, "logits/rejected": -1.8797096014022827, "logps/chosen": -118.0426025390625, "logps/rejected": -192.5693359375, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -2.2360916137695312, "rewards/margins": 3.326551914215088, "rewards/rejected": -5.562643527984619, "step": 6973 }, { "epoch": 1.54, "learning_rate": 9.842523318890733e-06, "logits/chosen": -1.7741177082061768, "logits/rejected": -1.796304702758789, "logps/chosen": -175.72726440429688, "logps/rejected": -192.15423583984375, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": -5.9349799156188965, "rewards/margins": 2.923985004425049, "rewards/rejected": -8.858964920043945, "step": 6974 }, { "epoch": 1.54, "learning_rate": 9.84207672630223e-06, "logits/chosen": -1.4639337062835693, "logits/rejected": -1.4773072004318237, "logps/chosen": -70.0029296875, "logps/rejected": -110.14241790771484, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.269174188375473, "rewards/margins": 5.627681732177734, "rewards/rejected": -5.89685583114624, "step": 6975 }, { "epoch": 1.54, "learning_rate": 9.84162951151851e-06, "logits/chosen": -1.683018445968628, "logits/rejected": -1.6542623043060303, "logps/chosen": -106.88402557373047, "logps/rejected": -147.6147918701172, "loss": 0.0376, "rewards/accuracies": 1.0, "rewards/chosen": -0.6072929501533508, "rewards/margins": 4.017131805419922, "rewards/rejected": -4.624424934387207, "step": 6976 }, { "epoch": 1.54, "learning_rate": 9.841181674597034e-06, "logits/chosen": -1.7214173078536987, "logits/rejected": -1.773628830909729, "logps/chosen": -89.13502502441406, "logps/rejected": -123.93119812011719, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.99828040599823, "rewards/margins": 7.284521102905273, "rewards/rejected": -9.282801628112793, "step": 6977 }, { "epoch": 1.54, "learning_rate": 9.840733215595351e-06, "logits/chosen": -1.564963459968567, "logits/rejected": -1.5030007362365723, "logps/chosen": -108.24568176269531, "logps/rejected": -273.3919677734375, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -3.4057769775390625, "rewards/margins": 5.331949234008789, "rewards/rejected": -8.737726211547852, "step": 6978 }, { "epoch": 1.54, "learning_rate": 9.840284134571088e-06, "logits/chosen": -1.4862656593322754, "logits/rejected": -1.543757438659668, "logps/chosen": -152.5800018310547, "logps/rejected": -135.6536865234375, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.27354127168655396, "rewards/margins": 3.907104730606079, "rewards/rejected": -4.180645942687988, "step": 6979 }, { "epoch": 1.54, "learning_rate": 9.83983443158195e-06, "logits/chosen": -1.5827926397323608, "logits/rejected": -1.53936767578125, "logps/chosen": -100.05067443847656, "logps/rejected": -168.3790740966797, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.3270103633403778, "rewards/margins": 4.312924385070801, "rewards/rejected": -4.639934539794922, "step": 6980 }, { "epoch": 1.55, "learning_rate": 9.839384106685721e-06, "logits/chosen": -1.4249579906463623, "logits/rejected": -1.437293529510498, "logps/chosen": -171.77590942382812, "logps/rejected": -194.64620971679688, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -4.906701564788818, "rewards/margins": 4.278479099273682, "rewards/rejected": -9.1851806640625, "step": 6981 }, { "epoch": 1.55, "learning_rate": 9.838933159940266e-06, "logits/chosen": -1.3523155450820923, "logits/rejected": -1.3910030126571655, "logps/chosen": -138.4010467529297, "logps/rejected": -131.76539611816406, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -2.571310520172119, "rewards/margins": 2.785067558288574, "rewards/rejected": -5.356378078460693, "step": 6982 }, { "epoch": 1.55, "learning_rate": 9.838481591403536e-06, "logits/chosen": -1.5387215614318848, "logits/rejected": -1.195221185684204, "logps/chosen": -166.82376098632812, "logps/rejected": -438.1931457519531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.06395874172449112, "rewards/margins": 23.899703979492188, "rewards/rejected": -23.835744857788086, "step": 6983 }, { "epoch": 1.55, "learning_rate": 9.83802940113355e-06, "logits/chosen": -1.2882651090621948, "logits/rejected": -1.2964179515838623, "logps/chosen": -123.46626281738281, "logps/rejected": -100.7170181274414, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -3.9259095191955566, "rewards/margins": 3.461367130279541, "rewards/rejected": -7.387276649475098, "step": 6984 }, { "epoch": 1.55, "learning_rate": 9.837576589188418e-06, "logits/chosen": -1.4324897527694702, "logits/rejected": -1.4761227369308472, "logps/chosen": -190.56112670898438, "logps/rejected": -159.90261840820312, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.0065826415084302425, "rewards/margins": 4.5828094482421875, "rewards/rejected": -4.589392185211182, "step": 6985 }, { "epoch": 1.55, "learning_rate": 9.837123155626323e-06, "logits/chosen": -1.7020609378814697, "logits/rejected": -1.765413522720337, "logps/chosen": -119.08647155761719, "logps/rejected": -151.45980834960938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1098747253417969, "rewards/margins": 11.197661399841309, "rewards/rejected": -12.307536125183105, "step": 6986 }, { "epoch": 1.55, "learning_rate": 9.836669100505532e-06, "logits/chosen": -1.349183201789856, "logits/rejected": -1.3015162944793701, "logps/chosen": -132.5319061279297, "logps/rejected": -209.39675903320312, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.4635238647460938, "rewards/margins": 7.974614143371582, "rewards/rejected": -11.438138008117676, "step": 6987 }, { "epoch": 1.55, "learning_rate": 9.836214423884387e-06, "logits/chosen": -1.624998927116394, "logits/rejected": -1.624998927116394, "logps/chosen": -188.67312622070312, "logps/rejected": -188.67312622070312, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -8.081518173217773, "rewards/margins": 0.0, "rewards/rejected": -8.081518173217773, "step": 6988 }, { "epoch": 1.55, "learning_rate": 9.835759125821314e-06, "logits/chosen": -1.724898099899292, "logits/rejected": -1.7643027305603027, "logps/chosen": -208.90505981445312, "logps/rejected": -177.90704345703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.03165893629193306, "rewards/margins": 6.045973777770996, "rewards/rejected": -6.077632904052734, "step": 6989 }, { "epoch": 1.55, "learning_rate": 9.83530320637482e-06, "logits/chosen": -1.3197877407073975, "logits/rejected": -1.3729923963546753, "logps/chosen": -46.59702682495117, "logps/rejected": -31.393352508544922, "loss": 0.0683, "rewards/accuracies": 1.0, "rewards/chosen": -0.11650848388671875, "rewards/margins": 2.1751937866210938, "rewards/rejected": -2.2917022705078125, "step": 6990 }, { "epoch": 1.55, "learning_rate": 9.834846665603486e-06, "logits/chosen": -1.663375973701477, "logits/rejected": -1.663375973701477, "logps/chosen": -61.45474624633789, "logps/rejected": -61.45474624633789, "loss": 0.3487, "rewards/accuracies": 0.0, "rewards/chosen": -3.585179567337036, "rewards/margins": 0.0, "rewards/rejected": -3.585179567337036, "step": 6991 }, { "epoch": 1.55, "learning_rate": 9.834389503565978e-06, "logits/chosen": -1.7169827222824097, "logits/rejected": -1.7562004327774048, "logps/chosen": -105.65824890136719, "logps/rejected": -148.0025177001953, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4882179498672485, "rewards/margins": 5.992652893066406, "rewards/rejected": -7.480870723724365, "step": 6992 }, { "epoch": 1.55, "learning_rate": 9.833931720321042e-06, "logits/chosen": -1.6461461782455444, "logits/rejected": -1.6693371534347534, "logps/chosen": -166.58924865722656, "logps/rejected": -112.17508697509766, "loss": 0.3022, "rewards/accuracies": 1.0, "rewards/chosen": -5.489731788635254, "rewards/margins": 0.18626785278320312, "rewards/rejected": -5.675999641418457, "step": 6993 }, { "epoch": 1.55, "learning_rate": 9.833473315927498e-06, "logits/chosen": -1.666608452796936, "logits/rejected": -1.6528303623199463, "logps/chosen": -68.68014526367188, "logps/rejected": -159.09909057617188, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.7046302556991577, "rewards/margins": 4.026336669921875, "rewards/rejected": -5.730967044830322, "step": 6994 }, { "epoch": 1.55, "learning_rate": 9.833014290444254e-06, "logits/chosen": -1.7907081842422485, "logits/rejected": -1.77487313747406, "logps/chosen": -163.6597900390625, "logps/rejected": -160.571044921875, "loss": 0.0497, "rewards/accuracies": 1.0, "rewards/chosen": -5.856462001800537, "rewards/margins": 2.2595725059509277, "rewards/rejected": -8.116034507751465, "step": 6995 }, { "epoch": 1.55, "learning_rate": 9.832554643930292e-06, "logits/chosen": -1.493422031402588, "logits/rejected": -1.5404634475708008, "logps/chosen": -180.19186401367188, "logps/rejected": -169.412353515625, "loss": 0.1065, "rewards/accuracies": 1.0, "rewards/chosen": -4.365405559539795, "rewards/margins": 8.040071487426758, "rewards/rejected": -12.405476570129395, "step": 6996 }, { "epoch": 1.55, "learning_rate": 9.832094376444675e-06, "logits/chosen": -1.617935061454773, "logits/rejected": -1.617935061454773, "logps/chosen": -121.0827865600586, "logps/rejected": -121.0827865600586, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -8.365561485290527, "rewards/margins": 0.0, "rewards/rejected": -8.365561485290527, "step": 6997 }, { "epoch": 1.55, "learning_rate": 9.831633488046547e-06, "logits/chosen": -1.6592389345169067, "logits/rejected": -1.717780590057373, "logps/chosen": -230.56546020507812, "logps/rejected": -220.82131958007812, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.2121338844299316, "rewards/margins": 6.858266353607178, "rewards/rejected": -10.07040023803711, "step": 6998 }, { "epoch": 1.55, "learning_rate": 9.83117197879513e-06, "logits/chosen": -1.6985869407653809, "logits/rejected": -1.6985869407653809, "logps/chosen": -157.11785888671875, "logps/rejected": -157.11785888671875, "loss": 0.3474, "rewards/accuracies": 0.0, "rewards/chosen": -2.284777879714966, "rewards/margins": 0.0, "rewards/rejected": -2.284777879714966, "step": 6999 }, { "epoch": 1.55, "learning_rate": 9.830709848749727e-06, "logits/chosen": -1.4332928657531738, "logits/rejected": -1.429397702217102, "logps/chosen": -32.09432601928711, "logps/rejected": -91.6744155883789, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -2.2111592292785645, "rewards/margins": 5.395979881286621, "rewards/rejected": -7.6071391105651855, "step": 7000 }, { "epoch": 1.55, "learning_rate": 9.830247097969723e-06, "logits/chosen": -2.0411741733551025, "logits/rejected": -2.044067621231079, "logps/chosen": -122.53524780273438, "logps/rejected": -165.76329040527344, "loss": 0.0538, "rewards/accuracies": 1.0, "rewards/chosen": -2.6157264709472656, "rewards/margins": 2.2127685546875, "rewards/rejected": -4.828495025634766, "step": 7001 }, { "epoch": 1.55, "learning_rate": 9.829783726514578e-06, "logits/chosen": -1.3760641813278198, "logits/rejected": -1.3561553955078125, "logps/chosen": -142.96322631835938, "logps/rejected": -217.2899169921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.651911973953247, "rewards/margins": 8.491485595703125, "rewards/rejected": -12.143397331237793, "step": 7002 }, { "epoch": 1.55, "learning_rate": 9.829319734443833e-06, "logits/chosen": -1.4584330320358276, "logits/rejected": -1.4584330320358276, "logps/chosen": -282.85113525390625, "logps/rejected": -282.85113525390625, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -17.050413131713867, "rewards/margins": 0.0, "rewards/rejected": -17.050413131713867, "step": 7003 }, { "epoch": 1.55, "learning_rate": 9.828855121817114e-06, "logits/chosen": -1.5069962739944458, "logits/rejected": -1.3419148921966553, "logps/chosen": -77.346435546875, "logps/rejected": -332.29742431640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.07315979152917862, "rewards/margins": 10.141472816467285, "rewards/rejected": -10.214632987976074, "step": 7004 }, { "epoch": 1.55, "learning_rate": 9.82838988869412e-06, "logits/chosen": -1.9079492092132568, "logits/rejected": -1.8932726383209229, "logps/chosen": -111.92230224609375, "logps/rejected": -130.75192260742188, "loss": 0.3, "rewards/accuracies": 1.0, "rewards/chosen": -2.934368848800659, "rewards/margins": 1.90305495262146, "rewards/rejected": -4.837423801422119, "step": 7005 }, { "epoch": 1.55, "learning_rate": 9.827924035134629e-06, "logits/chosen": -1.7201181650161743, "logits/rejected": -1.8529905080795288, "logps/chosen": -162.143310546875, "logps/rejected": -268.02813720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.7951722145080566, "rewards/margins": 20.70729637145996, "rewards/rejected": -17.912124633789062, "step": 7006 }, { "epoch": 1.55, "learning_rate": 9.827457561198507e-06, "logits/chosen": -1.7706308364868164, "logits/rejected": -1.7886182069778442, "logps/chosen": -114.33784484863281, "logps/rejected": -127.11663818359375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.012531280517578, "rewards/margins": 7.046772003173828, "rewards/rejected": -10.059303283691406, "step": 7007 }, { "epoch": 1.55, "learning_rate": 9.826990466945695e-06, "logits/chosen": -1.692165493965149, "logits/rejected": -1.7485709190368652, "logps/chosen": -252.35186767578125, "logps/rejected": -293.4359130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.241664171218872, "rewards/margins": 11.825514793395996, "rewards/rejected": -14.067178726196289, "step": 7008 }, { "epoch": 1.55, "learning_rate": 9.826522752436211e-06, "logits/chosen": -1.5868926048278809, "logits/rejected": -1.5582056045532227, "logps/chosen": -157.09112548828125, "logps/rejected": -280.5197448730469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.055056095123291, "rewards/margins": 8.633642196655273, "rewards/rejected": -14.688697814941406, "step": 7009 }, { "epoch": 1.55, "learning_rate": 9.826054417730156e-06, "logits/chosen": -1.928990364074707, "logits/rejected": -1.7887067794799805, "logps/chosen": -189.23439025878906, "logps/rejected": -260.9741516113281, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.1049515008926392, "rewards/margins": 6.518832683563232, "rewards/rejected": -7.623784065246582, "step": 7010 }, { "epoch": 1.55, "learning_rate": 9.825585462887709e-06, "logits/chosen": -1.7697763442993164, "logits/rejected": -1.719594120979309, "logps/chosen": -168.4557342529297, "logps/rejected": -239.15658569335938, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -7.061759948730469, "rewards/margins": 5.279510498046875, "rewards/rejected": -12.341270446777344, "step": 7011 }, { "epoch": 1.55, "learning_rate": 9.825115887969131e-06, "logits/chosen": -1.879220962524414, "logits/rejected": -1.6970499753952026, "logps/chosen": -140.41552734375, "logps/rejected": -261.510498046875, "loss": 2.5513, "rewards/accuracies": 0.0, "rewards/chosen": -5.360589027404785, "rewards/margins": -5.095989227294922, "rewards/rejected": -0.26459962129592896, "step": 7012 }, { "epoch": 1.55, "learning_rate": 9.82464569303476e-06, "logits/chosen": -1.6191858053207397, "logits/rejected": -1.5682722330093384, "logps/chosen": -107.690673828125, "logps/rejected": -166.3341522216797, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -2.1022675037384033, "rewards/margins": 3.267259359359741, "rewards/rejected": -5.3695268630981445, "step": 7013 }, { "epoch": 1.55, "learning_rate": 9.824174878145017e-06, "logits/chosen": -1.7974722385406494, "logits/rejected": -1.771643877029419, "logps/chosen": -164.89456176757812, "logps/rejected": -264.9356689453125, "loss": 0.788, "rewards/accuracies": 0.0, "rewards/chosen": -3.5364303588867188, "rewards/margins": -1.3443374633789062, "rewards/rejected": -2.1920928955078125, "step": 7014 }, { "epoch": 1.55, "learning_rate": 9.823703443360398e-06, "logits/chosen": -1.2682431936264038, "logits/rejected": -1.2699682712554932, "logps/chosen": -188.169677734375, "logps/rejected": -248.1049041748047, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6943771243095398, "rewards/margins": 9.451825141906738, "rewards/rejected": -10.146202087402344, "step": 7015 }, { "epoch": 1.55, "learning_rate": 9.823231388741483e-06, "logits/chosen": -1.795454502105713, "logits/rejected": -1.7125377655029297, "logps/chosen": -96.96477508544922, "logps/rejected": -205.64862060546875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.070920705795288, "rewards/margins": 7.316667556762695, "rewards/rejected": -9.387588500976562, "step": 7016 }, { "epoch": 1.55, "learning_rate": 9.822758714348928e-06, "logits/chosen": -1.775547742843628, "logits/rejected": -1.779958724975586, "logps/chosen": -97.89918518066406, "logps/rejected": -188.1689453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.4788414239883423, "rewards/margins": 8.02336311340332, "rewards/rejected": -9.502204895019531, "step": 7017 }, { "epoch": 1.55, "learning_rate": 9.822285420243474e-06, "logits/chosen": -1.6673557758331299, "logits/rejected": -1.6793099641799927, "logps/chosen": -132.16543579101562, "logps/rejected": -174.42135620117188, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -3.044304609298706, "rewards/margins": 4.980368614196777, "rewards/rejected": -8.024673461914062, "step": 7018 }, { "epoch": 1.55, "learning_rate": 9.821811506485934e-06, "logits/chosen": -2.0265655517578125, "logits/rejected": -2.0957255363464355, "logps/chosen": -169.18807983398438, "logps/rejected": -191.09568786621094, "loss": 0.0292, "rewards/accuracies": 1.0, "rewards/chosen": -0.9484359622001648, "rewards/margins": 12.262718200683594, "rewards/rejected": -13.211153984069824, "step": 7019 }, { "epoch": 1.55, "learning_rate": 9.821336973137207e-06, "logits/chosen": -1.5230484008789062, "logits/rejected": -1.6737407445907593, "logps/chosen": -210.74224853515625, "logps/rejected": -131.82696533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.8355071544647217, "rewards/margins": 13.49777889251709, "rewards/rejected": -9.662271499633789, "step": 7020 }, { "epoch": 1.55, "learning_rate": 9.820861820258269e-06, "logits/chosen": -1.4091603755950928, "logits/rejected": -1.3428934812545776, "logps/chosen": -216.97747802734375, "logps/rejected": -333.17034912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.096621990203857, "rewards/margins": 13.778701782226562, "rewards/rejected": -17.875324249267578, "step": 7021 }, { "epoch": 1.55, "learning_rate": 9.820386047910177e-06, "logits/chosen": -1.3480594158172607, "logits/rejected": -1.2822023630142212, "logps/chosen": -161.1858367919922, "logps/rejected": -302.2200012207031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.491110324859619, "rewards/margins": 10.630718231201172, "rewards/rejected": -8.139607429504395, "step": 7022 }, { "epoch": 1.55, "learning_rate": 9.819909656154066e-06, "logits/chosen": -1.4466723203659058, "logits/rejected": -1.5135374069213867, "logps/chosen": -256.97003173828125, "logps/rejected": -164.73033142089844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.4597443342208862, "rewards/margins": 11.042854309082031, "rewards/rejected": -12.502598762512207, "step": 7023 }, { "epoch": 1.55, "learning_rate": 9.81943264505115e-06, "logits/chosen": -1.9575961828231812, "logits/rejected": -1.9751391410827637, "logps/chosen": -85.08021545410156, "logps/rejected": -139.13015747070312, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -0.2666564881801605, "rewards/margins": 3.7146456241607666, "rewards/rejected": -3.98130202293396, "step": 7024 }, { "epoch": 1.55, "learning_rate": 9.818955014662725e-06, "logits/chosen": -1.6965755224227905, "logits/rejected": -1.6870453357696533, "logps/chosen": -201.99754333496094, "logps/rejected": -259.4234619140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.956687927246094, "rewards/margins": 9.027711868286133, "rewards/rejected": -17.984399795532227, "step": 7025 }, { "epoch": 1.56, "learning_rate": 9.818476765050167e-06, "logits/chosen": -1.657996416091919, "logits/rejected": -1.590832233428955, "logps/chosen": -147.89614868164062, "logps/rejected": -266.3477478027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9483169317245483, "rewards/margins": 21.177705764770508, "rewards/rejected": -23.126022338867188, "step": 7026 }, { "epoch": 1.56, "learning_rate": 9.817997896274925e-06, "logits/chosen": -1.2859044075012207, "logits/rejected": -1.1375646591186523, "logps/chosen": -163.8619384765625, "logps/rejected": -431.8406982421875, "loss": 0.0625, "rewards/accuracies": 1.0, "rewards/chosen": 0.014436340890824795, "rewards/margins": 11.54464054107666, "rewards/rejected": -11.530203819274902, "step": 7027 }, { "epoch": 1.56, "learning_rate": 9.817518408398536e-06, "logits/chosen": -1.6415822505950928, "logits/rejected": -1.5850105285644531, "logps/chosen": -105.97895812988281, "logps/rejected": -183.8567657470703, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.2508515119552612, "rewards/margins": 5.7236199378967285, "rewards/rejected": -6.974471569061279, "step": 7028 }, { "epoch": 1.56, "learning_rate": 9.817038301482612e-06, "logits/chosen": -1.5769550800323486, "logits/rejected": -1.5769550800323486, "logps/chosen": -95.76873779296875, "logps/rejected": -95.76873779296875, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -3.8085334300994873, "rewards/margins": 0.0, "rewards/rejected": -3.8085334300994873, "step": 7029 }, { "epoch": 1.56, "learning_rate": 9.81655757558885e-06, "logits/chosen": -1.588238000869751, "logits/rejected": -1.670930027961731, "logps/chosen": -224.43136596679688, "logps/rejected": -183.55740356445312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.593835353851318, "rewards/margins": 5.997415065765381, "rewards/rejected": -10.5912504196167, "step": 7030 }, { "epoch": 1.56, "learning_rate": 9.816076230779014e-06, "logits/chosen": -1.474498987197876, "logits/rejected": -1.5923012495040894, "logps/chosen": -236.7535400390625, "logps/rejected": -223.3831787109375, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.7706069946289062, "rewards/margins": 17.253131866455078, "rewards/rejected": -19.023738861083984, "step": 7031 }, { "epoch": 1.56, "learning_rate": 9.815594267114962e-06, "logits/chosen": -1.4285465478897095, "logits/rejected": -1.4704831838607788, "logps/chosen": -214.5146484375, "logps/rejected": -105.56768798828125, "loss": 2.3238, "rewards/accuracies": 0.0, "rewards/chosen": -12.131396293640137, "rewards/margins": -4.636575222015381, "rewards/rejected": -7.494821071624756, "step": 7032 }, { "epoch": 1.56, "learning_rate": 9.815111684658622e-06, "logits/chosen": -1.596701979637146, "logits/rejected": -1.5870052576065063, "logps/chosen": -132.07833862304688, "logps/rejected": -237.94384765625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": 0.2758773863315582, "rewards/margins": 6.341624736785889, "rewards/rejected": -6.065747261047363, "step": 7033 }, { "epoch": 1.56, "learning_rate": 9.814628483472006e-06, "logits/chosen": -1.233299970626831, "logits/rejected": -1.144899606704712, "logps/chosen": -94.406982421875, "logps/rejected": -195.22779846191406, "loss": 0.5823, "rewards/accuracies": 0.0, "rewards/chosen": -0.2732856869697571, "rewards/margins": -0.7539939880371094, "rewards/rejected": 0.4807083308696747, "step": 7034 }, { "epoch": 1.56, "learning_rate": 9.814144663617204e-06, "logits/chosen": -1.6943435668945312, "logits/rejected": -1.1114721298217773, "logps/chosen": -172.2000274658203, "logps/rejected": -961.6685180664062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.964334011077881, "rewards/margins": 75.09282684326172, "rewards/rejected": -80.05715942382812, "step": 7035 }, { "epoch": 1.56, "learning_rate": 9.813660225156385e-06, "logits/chosen": -1.425537347793579, "logits/rejected": -1.3744316101074219, "logps/chosen": -77.96794891357422, "logps/rejected": -157.25869750976562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1516456604003906, "rewards/margins": 10.738884925842285, "rewards/rejected": -12.890530586242676, "step": 7036 }, { "epoch": 1.56, "learning_rate": 9.813175168151801e-06, "logits/chosen": -1.5621323585510254, "logits/rejected": -1.563962697982788, "logps/chosen": -127.66111755371094, "logps/rejected": -152.62753295898438, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -2.8392434120178223, "rewards/margins": 4.153528690338135, "rewards/rejected": -6.992772102355957, "step": 7037 }, { "epoch": 1.56, "learning_rate": 9.812689492665777e-06, "logits/chosen": -1.5930442810058594, "logits/rejected": -1.3759475946426392, "logps/chosen": -201.13499450683594, "logps/rejected": -432.13873291015625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -11.206245422363281, "rewards/margins": 5.820793151855469, "rewards/rejected": -17.02703857421875, "step": 7038 }, { "epoch": 1.56, "learning_rate": 9.812203198760722e-06, "logits/chosen": -1.3867838382720947, "logits/rejected": -1.3696171045303345, "logps/chosen": -113.15127563476562, "logps/rejected": -179.15850830078125, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -2.2203598022460938, "rewards/margins": 4.882485866546631, "rewards/rejected": -7.102845668792725, "step": 7039 }, { "epoch": 1.56, "learning_rate": 9.811716286499125e-06, "logits/chosen": -1.623559594154358, "logits/rejected": -1.6771233081817627, "logps/chosen": -103.46827697753906, "logps/rejected": -135.17156982421875, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": 2.005209445953369, "rewards/margins": 12.073724746704102, "rewards/rejected": -10.06851577758789, "step": 7040 }, { "epoch": 1.56, "learning_rate": 9.811228755943551e-06, "logits/chosen": -1.5777839422225952, "logits/rejected": -1.6214559078216553, "logps/chosen": -104.55015563964844, "logps/rejected": -154.29551696777344, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.641220808029175, "rewards/margins": 5.610098838806152, "rewards/rejected": -8.251319885253906, "step": 7041 }, { "epoch": 1.56, "learning_rate": 9.810740607156647e-06, "logits/chosen": -1.5492898225784302, "logits/rejected": -1.5492898225784302, "logps/chosen": -211.2724151611328, "logps/rejected": -211.2724151611328, "loss": 0.3534, "rewards/accuracies": 0.0, "rewards/chosen": -11.797484397888184, "rewards/margins": 0.0, "rewards/rejected": -11.797484397888184, "step": 7042 }, { "epoch": 1.56, "learning_rate": 9.810251840201143e-06, "logits/chosen": -1.5789512395858765, "logits/rejected": -1.5288043022155762, "logps/chosen": -184.7110595703125, "logps/rejected": -266.5319519042969, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -5.580529689788818, "rewards/margins": 6.828896999359131, "rewards/rejected": -12.40942668914795, "step": 7043 }, { "epoch": 1.56, "learning_rate": 9.80976245513984e-06, "logits/chosen": -1.5338232517242432, "logits/rejected": -1.3556644916534424, "logps/chosen": -184.29937744140625, "logps/rejected": -358.2014465332031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.3539612293243408, "rewards/margins": 12.552558898925781, "rewards/rejected": -11.19859790802002, "step": 7044 }, { "epoch": 1.56, "learning_rate": 9.809272452035622e-06, "logits/chosen": -1.7153865098953247, "logits/rejected": -1.63082754611969, "logps/chosen": -106.91870880126953, "logps/rejected": -307.75048828125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.14033579826355, "rewards/margins": 12.970213890075684, "rewards/rejected": -16.110549926757812, "step": 7045 }, { "epoch": 1.56, "learning_rate": 9.808781830951457e-06, "logits/chosen": -1.5689762830734253, "logits/rejected": -1.5375691652297974, "logps/chosen": -131.22061157226562, "logps/rejected": -121.28528594970703, "loss": 0.3439, "rewards/accuracies": 1.0, "rewards/chosen": -2.903795003890991, "rewards/margins": 0.010600090026855469, "rewards/rejected": -2.9143950939178467, "step": 7046 }, { "epoch": 1.56, "learning_rate": 9.808290591950386e-06, "logits/chosen": -1.3972777128219604, "logits/rejected": -1.3608174324035645, "logps/chosen": -170.04014587402344, "logps/rejected": -245.98390197753906, "loss": 0.1823, "rewards/accuracies": 1.0, "rewards/chosen": -7.1991119384765625, "rewards/margins": 0.8303327560424805, "rewards/rejected": -8.029444694519043, "step": 7047 }, { "epoch": 1.56, "learning_rate": 9.807798735095533e-06, "logits/chosen": -1.5167224407196045, "logits/rejected": -1.5219573974609375, "logps/chosen": -198.28208923339844, "logps/rejected": -103.61225891113281, "loss": 1.0139, "rewards/accuracies": 0.0, "rewards/chosen": -4.406224250793457, "rewards/margins": -1.8038606643676758, "rewards/rejected": -2.6023635864257812, "step": 7048 }, { "epoch": 1.56, "learning_rate": 9.807306260450098e-06, "logits/chosen": -1.4094151258468628, "logits/rejected": -1.4288946390151978, "logps/chosen": -191.53634643554688, "logps/rejected": -230.1622314453125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.999189853668213, "rewards/margins": 5.766722202301025, "rewards/rejected": -11.765912055969238, "step": 7049 }, { "epoch": 1.56, "learning_rate": 9.806813168077367e-06, "logits/chosen": -1.6911245584487915, "logits/rejected": -1.4104772806167603, "logps/chosen": -163.1063232421875, "logps/rejected": -434.80792236328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 4.41693115234375, "rewards/margins": 11.410146713256836, "rewards/rejected": -6.993216037750244, "step": 7050 }, { "epoch": 1.56, "learning_rate": 9.806319458040701e-06, "logits/chosen": -1.2651666402816772, "logits/rejected": -1.240140676498413, "logps/chosen": -130.6318359375, "logps/rejected": -263.7362060546875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.571887195110321, "rewards/margins": 8.449759483337402, "rewards/rejected": -9.021646499633789, "step": 7051 }, { "epoch": 1.56, "learning_rate": 9.805825130403536e-06, "logits/chosen": -1.286758303642273, "logits/rejected": -1.29822838306427, "logps/chosen": -221.5196533203125, "logps/rejected": -252.1783905029297, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 1.0214217901229858, "rewards/margins": 6.899286270141602, "rewards/rejected": -5.877864360809326, "step": 7052 }, { "epoch": 1.56, "learning_rate": 9.805330185229397e-06, "logits/chosen": -1.3925737142562866, "logits/rejected": -1.5666298866271973, "logps/chosen": -261.91162109375, "logps/rejected": -162.62669372558594, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -6.491462707519531, "rewards/margins": 4.055795669555664, "rewards/rejected": -10.547258377075195, "step": 7053 }, { "epoch": 1.56, "learning_rate": 9.804834622581879e-06, "logits/chosen": -1.373254418373108, "logits/rejected": -1.1185990571975708, "logps/chosen": -301.40106201171875, "logps/rejected": -544.05908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.599865913391113, "rewards/margins": 33.04174041748047, "rewards/rejected": -38.641605377197266, "step": 7054 }, { "epoch": 1.56, "learning_rate": 9.804338442524661e-06, "logits/chosen": -1.259507417678833, "logits/rejected": -1.2640771865844727, "logps/chosen": -65.19523620605469, "logps/rejected": -88.81683349609375, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": 1.316192626953125, "rewards/margins": 2.3417954444885254, "rewards/rejected": -1.0256026983261108, "step": 7055 }, { "epoch": 1.56, "learning_rate": 9.803841645121505e-06, "logits/chosen": -1.1075587272644043, "logits/rejected": -1.1358193159103394, "logps/chosen": -219.2017059326172, "logps/rejected": -250.7388916015625, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -2.2574844360351562, "rewards/margins": 3.8905625343322754, "rewards/rejected": -6.148046970367432, "step": 7056 }, { "epoch": 1.56, "learning_rate": 9.803344230436245e-06, "logits/chosen": -1.3831793069839478, "logits/rejected": -1.1606757640838623, "logps/chosen": -136.02423095703125, "logps/rejected": -389.88494873046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.3864083290100098, "rewards/margins": 11.290031433105469, "rewards/rejected": -13.676440238952637, "step": 7057 }, { "epoch": 1.56, "learning_rate": 9.802846198532798e-06, "logits/chosen": -1.537781834602356, "logits/rejected": -1.537781834602356, "logps/chosen": -131.59535217285156, "logps/rejected": -131.59535217285156, "loss": 0.3499, "rewards/accuracies": 0.0, "rewards/chosen": -5.590972900390625, "rewards/margins": 0.0, "rewards/rejected": -5.590972900390625, "step": 7058 }, { "epoch": 1.56, "learning_rate": 9.80234754947516e-06, "logits/chosen": -1.3622468709945679, "logits/rejected": -1.4283398389816284, "logps/chosen": -212.48284912109375, "logps/rejected": -177.203125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.8968446254730225, "rewards/margins": 4.988641738891602, "rewards/rejected": -7.885486602783203, "step": 7059 }, { "epoch": 1.56, "learning_rate": 9.801848283327406e-06, "logits/chosen": -1.4183590412139893, "logits/rejected": -1.4183590412139893, "logps/chosen": -155.47671508789062, "logps/rejected": -155.47671508789062, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -11.10811996459961, "rewards/margins": 0.0, "rewards/rejected": -11.10811996459961, "step": 7060 }, { "epoch": 1.56, "learning_rate": 9.801348400153692e-06, "logits/chosen": -1.4258475303649902, "logits/rejected": -1.2655359506607056, "logps/chosen": -190.1238555908203, "logps/rejected": -356.647705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.583906650543213, "rewards/margins": 9.372005462646484, "rewards/rejected": -6.788098335266113, "step": 7061 }, { "epoch": 1.56, "learning_rate": 9.800847900018251e-06, "logits/chosen": -1.2647227048873901, "logits/rejected": -1.2279157638549805, "logps/chosen": -165.0115966796875, "logps/rejected": -203.49603271484375, "loss": 0.4141, "rewards/accuracies": 1.0, "rewards/chosen": -7.9534478187561035, "rewards/margins": 0.06929826736450195, "rewards/rejected": -8.022746086120605, "step": 7062 }, { "epoch": 1.56, "learning_rate": 9.800346782985395e-06, "logits/chosen": -1.3342610597610474, "logits/rejected": -1.3342610597610474, "logps/chosen": -248.75579833984375, "logps/rejected": -248.75579833984375, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -11.866320610046387, "rewards/margins": 0.0, "rewards/rejected": -11.866320610046387, "step": 7063 }, { "epoch": 1.56, "learning_rate": 9.799845049119517e-06, "logits/chosen": -1.72230863571167, "logits/rejected": -1.7022655010223389, "logps/chosen": -103.77702331542969, "logps/rejected": -171.5196075439453, "loss": 0.0525, "rewards/accuracies": 1.0, "rewards/chosen": -3.3053605556488037, "rewards/margins": 2.3586862087249756, "rewards/rejected": -5.664046764373779, "step": 7064 }, { "epoch": 1.56, "learning_rate": 9.79934269848509e-06, "logits/chosen": -1.7098231315612793, "logits/rejected": -1.5117946863174438, "logps/chosen": -171.75657653808594, "logps/rejected": -366.2446594238281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.19744873046875, "rewards/margins": 10.012100219726562, "rewards/rejected": -11.209548950195312, "step": 7065 }, { "epoch": 1.56, "learning_rate": 9.798839731146662e-06, "logits/chosen": -1.285627841949463, "logits/rejected": -1.3510658740997314, "logps/chosen": -75.18755340576172, "logps/rejected": -61.987762451171875, "loss": 0.1467, "rewards/accuracies": 1.0, "rewards/chosen": -3.0802040100097656, "rewards/margins": 1.0779390335083008, "rewards/rejected": -4.158143043518066, "step": 7066 }, { "epoch": 1.56, "learning_rate": 9.798336147168865e-06, "logits/chosen": -1.1205551624298096, "logits/rejected": -1.148667812347412, "logps/chosen": -79.08183288574219, "logps/rejected": -108.67538452148438, "loss": 0.1591, "rewards/accuracies": 1.0, "rewards/chosen": -1.3335860967636108, "rewards/margins": 0.9825509786605835, "rewards/rejected": -2.3161370754241943, "step": 7067 }, { "epoch": 1.56, "learning_rate": 9.797831946616408e-06, "logits/chosen": -1.4260329008102417, "logits/rejected": -1.372215986251831, "logps/chosen": -107.88931274414062, "logps/rejected": -143.41175842285156, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.6310333013534546, "rewards/margins": 4.621946811676025, "rewards/rejected": -6.2529802322387695, "step": 7068 }, { "epoch": 1.56, "learning_rate": 9.797327129554081e-06, "logits/chosen": -1.6009693145751953, "logits/rejected": -1.6807615756988525, "logps/chosen": -240.22988891601562, "logps/rejected": -313.2632751464844, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 2.0307953357696533, "rewards/margins": 20.49149513244629, "rewards/rejected": -18.4606990814209, "step": 7069 }, { "epoch": 1.56, "learning_rate": 9.796821696046748e-06, "logits/chosen": -1.1207364797592163, "logits/rejected": -1.1207364797592163, "logps/chosen": -236.30441284179688, "logps/rejected": -236.30441284179688, "loss": 0.3484, "rewards/accuracies": 0.0, "rewards/chosen": -11.401165962219238, "rewards/margins": 0.0, "rewards/rejected": -11.401165962219238, "step": 7070 }, { "epoch": 1.57, "learning_rate": 9.79631564615936e-06, "logits/chosen": -1.7187819480895996, "logits/rejected": -1.6308342218399048, "logps/chosen": -99.42544555664062, "logps/rejected": -240.0388946533203, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.2064208984375, "rewards/margins": 12.167834281921387, "rewards/rejected": -13.374255180358887, "step": 7071 }, { "epoch": 1.57, "learning_rate": 9.79580897995694e-06, "logits/chosen": -1.8024567365646362, "logits/rejected": -1.8049439191818237, "logps/chosen": -118.0888671875, "logps/rejected": -150.9678192138672, "loss": 0.0435, "rewards/accuracies": 1.0, "rewards/chosen": -2.7269234657287598, "rewards/margins": 2.3979673385620117, "rewards/rejected": -5.1248908042907715, "step": 7072 }, { "epoch": 1.57, "learning_rate": 9.795301697504595e-06, "logits/chosen": -1.426138997077942, "logits/rejected": -1.4014328718185425, "logps/chosen": -92.99287414550781, "logps/rejected": -146.6171417236328, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": -1.9840805530548096, "rewards/margins": 2.5370562076568604, "rewards/rejected": -4.52113676071167, "step": 7073 }, { "epoch": 1.57, "learning_rate": 9.794793798867512e-06, "logits/chosen": -1.5919052362442017, "logits/rejected": -1.581038236618042, "logps/chosen": -117.4105224609375, "logps/rejected": -179.21563720703125, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -0.41116562485694885, "rewards/margins": 9.798222541809082, "rewards/rejected": -10.20938777923584, "step": 7074 }, { "epoch": 1.57, "learning_rate": 9.794285284110949e-06, "logits/chosen": -1.3119828701019287, "logits/rejected": -1.192950963973999, "logps/chosen": -95.70513153076172, "logps/rejected": -173.7911376953125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.6234657764434814, "rewards/margins": 5.841594696044922, "rewards/rejected": -8.465060234069824, "step": 7075 }, { "epoch": 1.57, "learning_rate": 9.793776153300253e-06, "logits/chosen": -1.3894152641296387, "logits/rejected": -1.40340256690979, "logps/chosen": -285.798095703125, "logps/rejected": -165.6963348388672, "loss": 0.1537, "rewards/accuracies": 1.0, "rewards/chosen": -2.520263671875, "rewards/margins": 1.0243072509765625, "rewards/rejected": -3.5445709228515625, "step": 7076 }, { "epoch": 1.57, "learning_rate": 9.793266406500847e-06, "logits/chosen": -1.0801085233688354, "logits/rejected": -0.8972711563110352, "logps/chosen": -96.37255859375, "logps/rejected": -258.3408508300781, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.10714035481214523, "rewards/margins": 7.697376728057861, "rewards/rejected": -7.590236186981201, "step": 7077 }, { "epoch": 1.57, "learning_rate": 9.792756043778229e-06, "logits/chosen": -1.323042869567871, "logits/rejected": -1.2994598150253296, "logps/chosen": -119.3333740234375, "logps/rejected": -254.406005859375, "loss": 0.1366, "rewards/accuracies": 1.0, "rewards/chosen": -9.370237350463867, "rewards/margins": 1.1582727432250977, "rewards/rejected": -10.528510093688965, "step": 7078 }, { "epoch": 1.57, "learning_rate": 9.79224506519798e-06, "logits/chosen": -1.4857521057128906, "logits/rejected": -1.2736293077468872, "logps/chosen": -189.17715454101562, "logps/rejected": -380.70904541015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 4.304873943328857, "rewards/margins": 22.950849533081055, "rewards/rejected": -18.64597511291504, "step": 7079 }, { "epoch": 1.57, "learning_rate": 9.791733470825763e-06, "logits/chosen": -1.21564781665802, "logits/rejected": -1.1797434091567993, "logps/chosen": -227.13302612304688, "logps/rejected": -298.651123046875, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -6.185070991516113, "rewards/margins": 7.111978530883789, "rewards/rejected": -13.297049522399902, "step": 7080 }, { "epoch": 1.57, "learning_rate": 9.791221260727313e-06, "logits/chosen": -1.5517375469207764, "logits/rejected": -1.7223522663116455, "logps/chosen": -218.43252563476562, "logps/rejected": -230.5341796875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.1602020263671875, "rewards/margins": 6.891326904296875, "rewards/rejected": -8.051528930664062, "step": 7081 }, { "epoch": 1.57, "learning_rate": 9.790708434968448e-06, "logits/chosen": -1.546787977218628, "logits/rejected": -1.5699806213378906, "logps/chosen": -86.5909423828125, "logps/rejected": -109.15972900390625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2710113525390625, "rewards/margins": 6.529684543609619, "rewards/rejected": -6.258673191070557, "step": 7082 }, { "epoch": 1.57, "learning_rate": 9.790194993615065e-06, "logits/chosen": -1.65703547000885, "logits/rejected": -1.6028929948806763, "logps/chosen": -217.47457885742188, "logps/rejected": -346.98052978515625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.8382843136787415, "rewards/margins": 8.444511413574219, "rewards/rejected": -9.282795906066895, "step": 7083 }, { "epoch": 1.57, "learning_rate": 9.78968093673314e-06, "logits/chosen": -1.155407428741455, "logits/rejected": -1.2503368854522705, "logps/chosen": -178.59014892578125, "logps/rejected": -246.21456909179688, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -5.210028171539307, "rewards/margins": 7.063726902008057, "rewards/rejected": -12.273755073547363, "step": 7084 }, { "epoch": 1.57, "learning_rate": 9.789166264388732e-06, "logits/chosen": -1.4255157709121704, "logits/rejected": -1.4522429704666138, "logps/chosen": -107.67733001708984, "logps/rejected": -151.56796264648438, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.3516883850097656, "rewards/margins": 5.500882625579834, "rewards/rejected": -6.8525710105896, "step": 7085 }, { "epoch": 1.57, "learning_rate": 9.78865097664797e-06, "logits/chosen": -1.3512561321258545, "logits/rejected": -1.208285927772522, "logps/chosen": -130.8850555419922, "logps/rejected": -258.3855895996094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.6925629377365112, "rewards/margins": 11.244199752807617, "rewards/rejected": -12.936762809753418, "step": 7086 }, { "epoch": 1.57, "learning_rate": 9.788135073577069e-06, "logits/chosen": -1.6548569202423096, "logits/rejected": -1.5803216695785522, "logps/chosen": -195.8245849609375, "logps/rejected": -321.53009033203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.3650710582733154, "rewards/margins": 12.611894607543945, "rewards/rejected": -10.24682331085205, "step": 7087 }, { "epoch": 1.57, "learning_rate": 9.787618555242321e-06, "logits/chosen": -1.7171318531036377, "logits/rejected": -1.7621862888336182, "logps/chosen": -135.555419921875, "logps/rejected": -168.46194458007812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.104452610015869, "rewards/margins": 9.554407119750977, "rewards/rejected": -12.658859252929688, "step": 7088 }, { "epoch": 1.57, "learning_rate": 9.787101421710099e-06, "logits/chosen": -1.3333159685134888, "logits/rejected": -1.4584144353866577, "logps/chosen": -270.1455078125, "logps/rejected": -262.3762512207031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.949554443359375, "rewards/margins": 13.534337997436523, "rewards/rejected": -19.4838924407959, "step": 7089 }, { "epoch": 1.57, "learning_rate": 9.786583673046851e-06, "logits/chosen": -1.5765279531478882, "logits/rejected": -1.5893076658248901, "logps/chosen": -176.81219482421875, "logps/rejected": -150.63827514648438, "loss": 0.0842, "rewards/accuracies": 1.0, "rewards/chosen": -3.458282470703125, "rewards/margins": 1.696704387664795, "rewards/rejected": -5.15498685836792, "step": 7090 }, { "epoch": 1.57, "learning_rate": 9.786065309319107e-06, "logits/chosen": -1.5363656282424927, "logits/rejected": -1.4390901327133179, "logps/chosen": -110.52043151855469, "logps/rejected": -304.9251708984375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -2.024916172027588, "rewards/margins": 12.58970832824707, "rewards/rejected": -14.6146240234375, "step": 7091 }, { "epoch": 1.57, "learning_rate": 9.785546330593479e-06, "logits/chosen": -1.3782495260238647, "logits/rejected": -1.3845293521881104, "logps/chosen": -142.04965209960938, "logps/rejected": -161.50665283203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7454010248184204, "rewards/margins": 7.175334453582764, "rewards/rejected": -8.920735359191895, "step": 7092 }, { "epoch": 1.57, "learning_rate": 9.78502673693665e-06, "logits/chosen": -1.4246121644973755, "logits/rejected": -1.322707176208496, "logps/chosen": -100.867919921875, "logps/rejected": -172.72198486328125, "loss": 0.2267, "rewards/accuracies": 1.0, "rewards/chosen": -2.9329726696014404, "rewards/margins": 4.51129150390625, "rewards/rejected": -7.4442644119262695, "step": 7093 }, { "epoch": 1.57, "learning_rate": 9.784506528415388e-06, "logits/chosen": -1.4060133695602417, "logits/rejected": -1.3965262174606323, "logps/chosen": -101.44827270507812, "logps/rejected": -82.77348327636719, "loss": 0.1504, "rewards/accuracies": 1.0, "rewards/chosen": -2.9714722633361816, "rewards/margins": 3.6658310890197754, "rewards/rejected": -6.637303352355957, "step": 7094 }, { "epoch": 1.57, "learning_rate": 9.78398570509654e-06, "logits/chosen": -2.1013734340667725, "logits/rejected": -2.058030128479004, "logps/chosen": -145.30519104003906, "logps/rejected": -217.48504638671875, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -3.03118896484375, "rewards/margins": 3.5320863723754883, "rewards/rejected": -6.563275337219238, "step": 7095 }, { "epoch": 1.57, "learning_rate": 9.783464267047027e-06, "logits/chosen": -1.2479254007339478, "logits/rejected": -1.3976390361785889, "logps/chosen": -302.6328125, "logps/rejected": -199.8318328857422, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.8927154541015625, "rewards/margins": 9.919148445129395, "rewards/rejected": -13.811863899230957, "step": 7096 }, { "epoch": 1.57, "learning_rate": 9.782942214333855e-06, "logits/chosen": -1.7604775428771973, "logits/rejected": -1.7653077840805054, "logps/chosen": -103.59282684326172, "logps/rejected": -121.36660766601562, "loss": 0.066, "rewards/accuracies": 1.0, "rewards/chosen": -2.8483314514160156, "rewards/margins": 2.694983959197998, "rewards/rejected": -5.543315410614014, "step": 7097 }, { "epoch": 1.57, "learning_rate": 9.782419547024108e-06, "logits/chosen": -1.5236469507217407, "logits/rejected": -1.5236469507217407, "logps/chosen": -191.5575714111328, "logps/rejected": -191.5575714111328, "loss": 0.3505, "rewards/accuracies": 0.0, "rewards/chosen": -7.180461406707764, "rewards/margins": 0.0, "rewards/rejected": -7.180461406707764, "step": 7098 }, { "epoch": 1.57, "learning_rate": 9.781896265184944e-06, "logits/chosen": -1.764357566833496, "logits/rejected": -1.8215746879577637, "logps/chosen": -76.35420227050781, "logps/rejected": -95.57384490966797, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.6693008542060852, "rewards/margins": 7.014980316162109, "rewards/rejected": -7.684281349182129, "step": 7099 }, { "epoch": 1.57, "learning_rate": 9.781372368883607e-06, "logits/chosen": -1.2887072563171387, "logits/rejected": -1.2829028367996216, "logps/chosen": -163.65220642089844, "logps/rejected": -193.57965087890625, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": -6.162745952606201, "rewards/margins": 2.141932964324951, "rewards/rejected": -8.304678916931152, "step": 7100 }, { "epoch": 1.57, "learning_rate": 9.780847858187414e-06, "logits/chosen": -1.4901320934295654, "logits/rejected": -1.4878733158111572, "logps/chosen": -99.80802154541016, "logps/rejected": -188.6749267578125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.2564948797225952, "rewards/margins": 7.585022449493408, "rewards/rejected": -8.841517448425293, "step": 7101 }, { "epoch": 1.57, "learning_rate": 9.780322733163766e-06, "logits/chosen": -1.797394037246704, "logits/rejected": -1.6153290271759033, "logps/chosen": -97.16810607910156, "logps/rejected": -308.54095458984375, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.6259918212890625, "rewards/margins": 4.2214674949646, "rewards/rejected": -5.847459316253662, "step": 7102 }, { "epoch": 1.57, "learning_rate": 9.779796993880135e-06, "logits/chosen": -1.3614883422851562, "logits/rejected": -1.3826146125793457, "logps/chosen": -243.8075714111328, "logps/rejected": -216.5508270263672, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 1.08171546459198, "rewards/margins": 9.809839248657227, "rewards/rejected": -8.728123664855957, "step": 7103 }, { "epoch": 1.57, "learning_rate": 9.779270640404082e-06, "logits/chosen": -2.0833871364593506, "logits/rejected": -2.0343830585479736, "logps/chosen": -131.33499145507812, "logps/rejected": -146.92123413085938, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -2.5645782947540283, "rewards/margins": 3.60066294670105, "rewards/rejected": -6.165241241455078, "step": 7104 }, { "epoch": 1.57, "learning_rate": 9.778743672803241e-06, "logits/chosen": -1.5182805061340332, "logits/rejected": -1.130176067352295, "logps/chosen": -162.2830810546875, "logps/rejected": -1020.5802001953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.1196441650390625, "rewards/margins": 72.32038116455078, "rewards/rejected": -72.20073699951172, "step": 7105 }, { "epoch": 1.57, "learning_rate": 9.778216091145325e-06, "logits/chosen": -1.418229341506958, "logits/rejected": -1.4476622343063354, "logps/chosen": -198.95541381835938, "logps/rejected": -213.33050537109375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 0.15812377631664276, "rewards/margins": 4.958624362945557, "rewards/rejected": -4.800500392913818, "step": 7106 }, { "epoch": 1.57, "learning_rate": 9.777687895498128e-06, "logits/chosen": -1.6904199123382568, "logits/rejected": -1.5256268978118896, "logps/chosen": -62.20527648925781, "logps/rejected": -178.74288940429688, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": 0.6355674862861633, "rewards/margins": 4.117063045501709, "rewards/rejected": -3.4814956188201904, "step": 7107 }, { "epoch": 1.57, "learning_rate": 9.777159085929524e-06, "logits/chosen": -1.3463735580444336, "logits/rejected": -1.2243467569351196, "logps/chosen": -106.8293685913086, "logps/rejected": -220.546630859375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.713080644607544, "rewards/margins": 8.054229736328125, "rewards/rejected": -11.76731014251709, "step": 7108 }, { "epoch": 1.57, "learning_rate": 9.776629662507458e-06, "logits/chosen": -1.363047480583191, "logits/rejected": -1.3855260610580444, "logps/chosen": -157.99481201171875, "logps/rejected": -141.42347717285156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.23395691812038422, "rewards/margins": 7.63347864151001, "rewards/rejected": -7.399521827697754, "step": 7109 }, { "epoch": 1.57, "learning_rate": 9.776099625299966e-06, "logits/chosen": -1.517856478691101, "logits/rejected": -1.5178518295288086, "logps/chosen": -82.90657043457031, "logps/rejected": -127.55970764160156, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -0.8544303774833679, "rewards/margins": 3.6883676052093506, "rewards/rejected": -4.542798042297363, "step": 7110 }, { "epoch": 1.57, "learning_rate": 9.775568974375151e-06, "logits/chosen": -1.24394690990448, "logits/rejected": -1.2864092588424683, "logps/chosen": -261.1250915527344, "logps/rejected": -260.11572265625, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": -9.384467124938965, "rewards/margins": 2.890043258666992, "rewards/rejected": -12.274510383605957, "step": 7111 }, { "epoch": 1.57, "learning_rate": 9.775037709801206e-06, "logits/chosen": -1.5341660976409912, "logits/rejected": -1.5836735963821411, "logps/chosen": -154.88800048828125, "logps/rejected": -189.92604064941406, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": -4.741661071777344, "rewards/margins": 2.5092759132385254, "rewards/rejected": -7.250936985015869, "step": 7112 }, { "epoch": 1.57, "learning_rate": 9.774505831646392e-06, "logits/chosen": -1.5566043853759766, "logits/rejected": -1.389586091041565, "logps/chosen": -200.48223876953125, "logps/rejected": -304.468505859375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.13640137016773224, "rewards/margins": 5.15643310546875, "rewards/rejected": -5.020031929016113, "step": 7113 }, { "epoch": 1.57, "learning_rate": 9.773973339979056e-06, "logits/chosen": -1.5114610195159912, "logits/rejected": -1.340500831604004, "logps/chosen": -78.51873779296875, "logps/rejected": -286.43829345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.705736517906189, "rewards/margins": 11.565030097961426, "rewards/rejected": -13.270766258239746, "step": 7114 }, { "epoch": 1.57, "learning_rate": 9.773440234867623e-06, "logits/chosen": -1.3302576541900635, "logits/rejected": -1.3400306701660156, "logps/chosen": -165.9495849609375, "logps/rejected": -195.864013671875, "loss": 0.4326, "rewards/accuracies": 0.0, "rewards/chosen": -7.741654872894287, "rewards/margins": -0.3189725875854492, "rewards/rejected": -7.422682285308838, "step": 7115 }, { "epoch": 1.58, "learning_rate": 9.772906516380594e-06, "logits/chosen": -1.1405583620071411, "logits/rejected": -1.1603416204452515, "logps/chosen": -106.93761444091797, "logps/rejected": -129.83798217773438, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": -3.8368241786956787, "rewards/margins": 2.1493546962738037, "rewards/rejected": -5.986178874969482, "step": 7116 }, { "epoch": 1.58, "learning_rate": 9.772372184586551e-06, "logits/chosen": -1.8328535556793213, "logits/rejected": -1.7674534320831299, "logps/chosen": -109.0983657836914, "logps/rejected": -234.84619140625, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -2.7831153869628906, "rewards/margins": 7.53426456451416, "rewards/rejected": -10.31737995147705, "step": 7117 }, { "epoch": 1.58, "learning_rate": 9.771837239554156e-06, "logits/chosen": -1.3978594541549683, "logits/rejected": -1.3791838884353638, "logps/chosen": -82.24718475341797, "logps/rejected": -174.41990661621094, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.5165001153945923, "rewards/margins": 4.952147006988525, "rewards/rejected": -6.468647003173828, "step": 7118 }, { "epoch": 1.58, "learning_rate": 9.771301681352148e-06, "logits/chosen": -1.6780292987823486, "logits/rejected": -1.6780292987823486, "logps/chosen": -216.1622314453125, "logps/rejected": -216.1622314453125, "loss": 0.4065, "rewards/accuracies": 0.0, "rewards/chosen": -11.135931015014648, "rewards/margins": 0.0, "rewards/rejected": -11.135931015014648, "step": 7119 }, { "epoch": 1.58, "learning_rate": 9.770765510049342e-06, "logits/chosen": -1.5480128526687622, "logits/rejected": -1.4558074474334717, "logps/chosen": -135.30953979492188, "logps/rejected": -178.19224548339844, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.3070305585861206, "rewards/margins": 6.0483856201171875, "rewards/rejected": -7.355416297912598, "step": 7120 }, { "epoch": 1.58, "learning_rate": 9.770228725714637e-06, "logits/chosen": -1.6613951921463013, "logits/rejected": -1.6470177173614502, "logps/chosen": -117.32354736328125, "logps/rejected": -149.92848205566406, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -3.0255584716796875, "rewards/margins": 3.1792826652526855, "rewards/rejected": -6.204841136932373, "step": 7121 }, { "epoch": 1.58, "learning_rate": 9.769691328417008e-06, "logits/chosen": -1.5770293474197388, "logits/rejected": -0.9011025428771973, "logps/chosen": -208.11721801757812, "logps/rejected": -710.1910400390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.847698986530304, "rewards/margins": 42.89809799194336, "rewards/rejected": -43.74579620361328, "step": 7122 }, { "epoch": 1.58, "learning_rate": 9.769153318225509e-06, "logits/chosen": -1.6757389307022095, "logits/rejected": -1.7052379846572876, "logps/chosen": -117.6912841796875, "logps/rejected": -110.68495178222656, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -3.351876974105835, "rewards/margins": 4.098684310913086, "rewards/rejected": -7.4505615234375, "step": 7123 }, { "epoch": 1.58, "learning_rate": 9.768614695209273e-06, "logits/chosen": -1.341792345046997, "logits/rejected": -1.341792345046997, "logps/chosen": -126.34760284423828, "logps/rejected": -126.34760284423828, "loss": 0.3703, "rewards/accuracies": 0.0, "rewards/chosen": -2.196221113204956, "rewards/margins": 0.0, "rewards/rejected": -2.196221113204956, "step": 7124 }, { "epoch": 1.58, "learning_rate": 9.768075459437513e-06, "logits/chosen": -1.3708831071853638, "logits/rejected": -1.4005619287490845, "logps/chosen": -112.35220336914062, "logps/rejected": -135.83029174804688, "loss": 0.0798, "rewards/accuracies": 1.0, "rewards/chosen": -0.5118881464004517, "rewards/margins": 1.7563446760177612, "rewards/rejected": -2.268232822418213, "step": 7125 }, { "epoch": 1.58, "learning_rate": 9.76753561097952e-06, "logits/chosen": -1.6878896951675415, "logits/rejected": -1.6089316606521606, "logps/chosen": -62.120567321777344, "logps/rejected": -210.5635986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8062717914581299, "rewards/margins": 10.557430267333984, "rewards/rejected": -12.363701820373535, "step": 7126 }, { "epoch": 1.58, "learning_rate": 9.766995149904658e-06, "logits/chosen": -1.2724506855010986, "logits/rejected": -1.2472912073135376, "logps/chosen": -131.23422241210938, "logps/rejected": -148.92398071289062, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": -2.6473548412323, "rewards/margins": 2.6183106899261475, "rewards/rejected": -5.265665531158447, "step": 7127 }, { "epoch": 1.58, "learning_rate": 9.766454076282382e-06, "logits/chosen": -1.7224868535995483, "logits/rejected": -1.6972841024398804, "logps/chosen": -108.4178466796875, "logps/rejected": -195.54029846191406, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.3008904457092285, "rewards/margins": 5.802527904510498, "rewards/rejected": -10.103418350219727, "step": 7128 }, { "epoch": 1.58, "learning_rate": 9.765912390182216e-06, "logits/chosen": -1.468040943145752, "logits/rejected": -1.3951780796051025, "logps/chosen": -82.51332092285156, "logps/rejected": -198.19741821289062, "loss": 0.0667, "rewards/accuracies": 1.0, "rewards/chosen": 0.9247817993164062, "rewards/margins": 7.843025207519531, "rewards/rejected": -6.918243408203125, "step": 7129 }, { "epoch": 1.58, "learning_rate": 9.765370091673762e-06, "logits/chosen": -1.5103145837783813, "logits/rejected": -1.5385072231292725, "logps/chosen": -214.39266967773438, "logps/rejected": -173.57232666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.319244384765625, "rewards/margins": 12.79555606842041, "rewards/rejected": -10.476311683654785, "step": 7130 }, { "epoch": 1.58, "learning_rate": 9.764827180826708e-06, "logits/chosen": -1.5074514150619507, "logits/rejected": -1.570304036140442, "logps/chosen": -276.9471740722656, "logps/rejected": -209.4988555908203, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 3.764312744140625, "rewards/margins": 7.883175849914551, "rewards/rejected": -4.118863105773926, "step": 7131 }, { "epoch": 1.58, "learning_rate": 9.764283657710815e-06, "logits/chosen": -1.5741099119186401, "logits/rejected": -1.6461974382400513, "logps/chosen": -114.03880310058594, "logps/rejected": -88.31649017333984, "loss": 0.0739, "rewards/accuracies": 1.0, "rewards/chosen": 0.03852386400103569, "rewards/margins": 7.12066125869751, "rewards/rejected": -7.082137584686279, "step": 7132 }, { "epoch": 1.58, "learning_rate": 9.763739522395926e-06, "logits/chosen": -1.6262630224227905, "logits/rejected": -1.5144202709197998, "logps/chosen": -157.93817138671875, "logps/rejected": -227.16952514648438, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -4.944701671600342, "rewards/margins": 3.719543933868408, "rewards/rejected": -8.66424560546875, "step": 7133 }, { "epoch": 1.58, "learning_rate": 9.76319477495196e-06, "logits/chosen": -1.4190270900726318, "logits/rejected": -1.3721287250518799, "logps/chosen": -242.28445434570312, "logps/rejected": -392.11383056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0379912853240967, "rewards/margins": 13.456100463867188, "rewards/rejected": -15.494091987609863, "step": 7134 }, { "epoch": 1.58, "learning_rate": 9.762649415448916e-06, "logits/chosen": -1.5042024850845337, "logits/rejected": -1.6394567489624023, "logps/chosen": -274.67462158203125, "logps/rejected": -172.7783966064453, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.692071557044983, "rewards/margins": 6.944456577301025, "rewards/rejected": -8.636528015136719, "step": 7135 }, { "epoch": 1.58, "learning_rate": 9.76210344395687e-06, "logits/chosen": -1.3396421670913696, "logits/rejected": -1.3602391481399536, "logps/chosen": -78.1121597290039, "logps/rejected": -69.88797760009766, "loss": 0.4733, "rewards/accuracies": 0.0, "rewards/chosen": -1.8552368879318237, "rewards/margins": -0.4555363655090332, "rewards/rejected": -1.3997005224227905, "step": 7136 }, { "epoch": 1.58, "learning_rate": 9.76155686054598e-06, "logits/chosen": -1.3135087490081787, "logits/rejected": -1.2693547010421753, "logps/chosen": -139.73236083984375, "logps/rejected": -140.43914794921875, "loss": 0.1368, "rewards/accuracies": 1.0, "rewards/chosen": -3.8820817470550537, "rewards/margins": 1.1561095714569092, "rewards/rejected": -5.038191318511963, "step": 7137 }, { "epoch": 1.58, "learning_rate": 9.76100966528648e-06, "logits/chosen": -1.7415032386779785, "logits/rejected": -1.7154699563980103, "logps/chosen": -143.55528259277344, "logps/rejected": -162.30154418945312, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": -4.4129533767700195, "rewards/margins": 2.9449338912963867, "rewards/rejected": -7.357887268066406, "step": 7138 }, { "epoch": 1.58, "learning_rate": 9.760461858248684e-06, "logits/chosen": -1.3491535186767578, "logits/rejected": -1.4329562187194824, "logps/chosen": -254.71575927734375, "logps/rejected": -181.28123474121094, "loss": 0.0345, "rewards/accuracies": 1.0, "rewards/chosen": -3.3191590309143066, "rewards/margins": 2.6377549171447754, "rewards/rejected": -5.956913948059082, "step": 7139 }, { "epoch": 1.58, "learning_rate": 9.759913439502982e-06, "logits/chosen": -1.4093672037124634, "logits/rejected": -1.3962292671203613, "logps/chosen": -180.6833953857422, "logps/rejected": -159.80169677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.7226715087890625, "rewards/margins": 13.71874713897705, "rewards/rejected": -12.996075630187988, "step": 7140 }, { "epoch": 1.58, "learning_rate": 9.759364409119844e-06, "logits/chosen": -1.5018587112426758, "logits/rejected": -1.5077292919158936, "logps/chosen": -180.9423370361328, "logps/rejected": -240.36175537109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 2.4356613159179688, "rewards/margins": 7.64630126953125, "rewards/rejected": -5.210639953613281, "step": 7141 }, { "epoch": 1.58, "learning_rate": 9.758814767169825e-06, "logits/chosen": -1.5357409715652466, "logits/rejected": -1.5436639785766602, "logps/chosen": -139.77381896972656, "logps/rejected": -177.8740997314453, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -1.118525743484497, "rewards/margins": 3.690294027328491, "rewards/rejected": -4.808819770812988, "step": 7142 }, { "epoch": 1.58, "learning_rate": 9.758264513723544e-06, "logits/chosen": -1.6639117002487183, "logits/rejected": -1.6039584875106812, "logps/chosen": -185.9560089111328, "logps/rejected": -448.6794128417969, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.6698639392852783, "rewards/margins": 24.248367309570312, "rewards/rejected": -25.918231964111328, "step": 7143 }, { "epoch": 1.58, "learning_rate": 9.757713648851714e-06, "logits/chosen": -1.135016679763794, "logits/rejected": -1.1541767120361328, "logps/chosen": -238.12188720703125, "logps/rejected": -335.4223327636719, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.5189971923828125, "rewards/margins": 8.034896850585938, "rewards/rejected": -8.55389404296875, "step": 7144 }, { "epoch": 1.58, "learning_rate": 9.757162172625116e-06, "logits/chosen": -1.7709647417068481, "logits/rejected": -1.74626624584198, "logps/chosen": -129.27410888671875, "logps/rejected": -208.7460174560547, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -3.377138614654541, "rewards/margins": 4.631771564483643, "rewards/rejected": -8.008910179138184, "step": 7145 }, { "epoch": 1.58, "learning_rate": 9.756610085114615e-06, "logits/chosen": -1.765195369720459, "logits/rejected": -1.8298239707946777, "logps/chosen": -233.7776641845703, "logps/rejected": -175.40261840820312, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.5784652829170227, "rewards/margins": 7.287789821624756, "rewards/rejected": -7.866255283355713, "step": 7146 }, { "epoch": 1.58, "learning_rate": 9.756057386391154e-06, "logits/chosen": -1.9037892818450928, "logits/rejected": -1.8221328258514404, "logps/chosen": -108.36817932128906, "logps/rejected": -260.1534423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5466240048408508, "rewards/margins": 11.514909744262695, "rewards/rejected": -12.06153392791748, "step": 7147 }, { "epoch": 1.58, "learning_rate": 9.75550407652575e-06, "logits/chosen": -1.6036407947540283, "logits/rejected": -1.119578242301941, "logps/chosen": -255.4708251953125, "logps/rejected": -1031.638427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.333205223083496, "rewards/margins": 83.38152313232422, "rewards/rejected": -92.71472930908203, "step": 7148 }, { "epoch": 1.58, "learning_rate": 9.754950155589504e-06, "logits/chosen": -1.9744527339935303, "logits/rejected": -2.0022027492523193, "logps/chosen": -120.99510955810547, "logps/rejected": -109.22164916992188, "loss": 0.1085, "rewards/accuracies": 1.0, "rewards/chosen": -0.39127883315086365, "rewards/margins": 1.4176483154296875, "rewards/rejected": -1.8089271783828735, "step": 7149 }, { "epoch": 1.58, "learning_rate": 9.754395623653595e-06, "logits/chosen": -1.3516275882720947, "logits/rejected": -1.3288532495498657, "logps/chosen": -210.19326782226562, "logps/rejected": -259.069580078125, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -11.999974250793457, "rewards/margins": 3.704533576965332, "rewards/rejected": -15.704507827758789, "step": 7150 }, { "epoch": 1.58, "learning_rate": 9.753840480789278e-06, "logits/chosen": -1.647858738899231, "logits/rejected": -1.6306343078613281, "logps/chosen": -216.26100158691406, "logps/rejected": -277.46746826171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5056106448173523, "rewards/margins": 14.018155097961426, "rewards/rejected": -14.523765563964844, "step": 7151 }, { "epoch": 1.58, "learning_rate": 9.753284727067886e-06, "logits/chosen": -1.6934149265289307, "logits/rejected": -1.7017602920532227, "logps/chosen": -80.19989013671875, "logps/rejected": -195.829833984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.0128570795059204, "rewards/margins": 8.072758674621582, "rewards/rejected": -9.085616111755371, "step": 7152 }, { "epoch": 1.58, "learning_rate": 9.752728362560834e-06, "logits/chosen": -1.8392043113708496, "logits/rejected": -1.7522928714752197, "logps/chosen": -145.60174560546875, "logps/rejected": -331.45977783203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 3.644064426422119, "rewards/margins": 12.727493286132812, "rewards/rejected": -9.083429336547852, "step": 7153 }, { "epoch": 1.58, "learning_rate": 9.752171387339612e-06, "logits/chosen": -1.5479248762130737, "logits/rejected": -1.511396050453186, "logps/chosen": -86.51951599121094, "logps/rejected": -226.80645751953125, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -0.5125457644462585, "rewards/margins": 10.140707015991211, "rewards/rejected": -10.653252601623535, "step": 7154 }, { "epoch": 1.58, "learning_rate": 9.75161380147579e-06, "logits/chosen": -1.4912033081054688, "logits/rejected": -1.4912033081054688, "logps/chosen": -322.3583984375, "logps/rejected": -322.3583984375, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.662295818328857, "rewards/margins": 0.0, "rewards/rejected": -5.662295818328857, "step": 7155 }, { "epoch": 1.58, "learning_rate": 9.751055605041017e-06, "logits/chosen": -1.5614076852798462, "logits/rejected": -1.586614966392517, "logps/chosen": -156.02256774902344, "logps/rejected": -162.51571655273438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.806133985519409, "rewards/margins": 9.834982872009277, "rewards/rejected": -7.028849124908447, "step": 7156 }, { "epoch": 1.58, "learning_rate": 9.750496798107021e-06, "logits/chosen": -1.593674659729004, "logits/rejected": -1.5588148832321167, "logps/chosen": -155.29861450195312, "logps/rejected": -188.30160522460938, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -2.485577344894409, "rewards/margins": 4.038158416748047, "rewards/rejected": -6.523736000061035, "step": 7157 }, { "epoch": 1.58, "learning_rate": 9.749937380745607e-06, "logits/chosen": -1.3434975147247314, "logits/rejected": -1.3661773204803467, "logps/chosen": -186.22291564941406, "logps/rejected": -212.0179443359375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -4.871913433074951, "rewards/margins": 4.976789951324463, "rewards/rejected": -9.848703384399414, "step": 7158 }, { "epoch": 1.58, "learning_rate": 9.749377353028657e-06, "logits/chosen": -1.3004260063171387, "logits/rejected": -1.3277794122695923, "logps/chosen": -268.78076171875, "logps/rejected": -305.32659912109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.575030505657196, "rewards/margins": 17.91060447692871, "rewards/rejected": -18.48563575744629, "step": 7159 }, { "epoch": 1.58, "learning_rate": 9.748816715028135e-06, "logits/chosen": -1.441741704940796, "logits/rejected": -1.4252952337265015, "logps/chosen": -170.77468872070312, "logps/rejected": -151.09954833984375, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -2.9922378063201904, "rewards/margins": 3.3178622722625732, "rewards/rejected": -6.310100078582764, "step": 7160 }, { "epoch": 1.58, "learning_rate": 9.748255466816081e-06, "logits/chosen": -1.5397567749023438, "logits/rejected": -1.5397567749023438, "logps/chosen": -128.87646484375, "logps/rejected": -128.87646484375, "loss": 0.3486, "rewards/accuracies": 0.0, "rewards/chosen": -5.110486030578613, "rewards/margins": 0.0, "rewards/rejected": -5.110486030578613, "step": 7161 }, { "epoch": 1.59, "learning_rate": 9.747693608464614e-06, "logits/chosen": -2.0184719562530518, "logits/rejected": -2.0058019161224365, "logps/chosen": -115.77537536621094, "logps/rejected": -113.95448303222656, "loss": 0.1596, "rewards/accuracies": 1.0, "rewards/chosen": -1.234326958656311, "rewards/margins": 0.9851714372634888, "rewards/rejected": -2.2194983959198, "step": 7162 }, { "epoch": 1.59, "learning_rate": 9.74713114004593e-06, "logits/chosen": -1.574389934539795, "logits/rejected": -1.5587290525436401, "logps/chosen": -102.10785675048828, "logps/rejected": -218.1426544189453, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.6099274158477783, "rewards/margins": 9.4331636428833, "rewards/rejected": -12.0430908203125, "step": 7163 }, { "epoch": 1.59, "learning_rate": 9.746568061632308e-06, "logits/chosen": -1.3281240463256836, "logits/rejected": -1.3503931760787964, "logps/chosen": -139.12985229492188, "logps/rejected": -196.5989227294922, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.198933482170105, "rewards/margins": 10.183741569519043, "rewards/rejected": -11.382675170898438, "step": 7164 }, { "epoch": 1.59, "learning_rate": 9.746004373296099e-06, "logits/chosen": -1.6134860515594482, "logits/rejected": -1.6188809871673584, "logps/chosen": -192.95626831054688, "logps/rejected": -298.01531982421875, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -6.83458948135376, "rewards/margins": 5.466623783111572, "rewards/rejected": -12.301213264465332, "step": 7165 }, { "epoch": 1.59, "learning_rate": 9.745440075109738e-06, "logits/chosen": -1.9537787437438965, "logits/rejected": -1.9385244846343994, "logps/chosen": -118.16120910644531, "logps/rejected": -173.42889404296875, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -4.456973552703857, "rewards/margins": 2.871976852416992, "rewards/rejected": -7.32895040512085, "step": 7166 }, { "epoch": 1.59, "learning_rate": 9.744875167145735e-06, "logits/chosen": -1.8849585056304932, "logits/rejected": -1.853041172027588, "logps/chosen": -187.64300537109375, "logps/rejected": -282.71514892578125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.4912598133087158, "rewards/margins": 6.585507392883301, "rewards/rejected": -8.076766967773438, "step": 7167 }, { "epoch": 1.59, "learning_rate": 9.74430964947668e-06, "logits/chosen": -1.5724679231643677, "logits/rejected": -1.5113269090652466, "logps/chosen": -151.56736755371094, "logps/rejected": -288.41851806640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3809280395507812, "rewards/margins": 6.266091823577881, "rewards/rejected": -7.647019863128662, "step": 7168 }, { "epoch": 1.59, "learning_rate": 9.74374352217524e-06, "logits/chosen": -1.2144958972930908, "logits/rejected": -0.7825767397880554, "logps/chosen": -174.107666015625, "logps/rejected": -1063.8875732421875, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -6.286418437957764, "rewards/margins": 85.69461822509766, "rewards/rejected": -91.98103332519531, "step": 7169 }, { "epoch": 1.59, "learning_rate": 9.743176785314159e-06, "logits/chosen": -1.7622313499450684, "logits/rejected": -1.7622313499450684, "logps/chosen": -95.73046875, "logps/rejected": -95.73046875, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.969823360443115, "rewards/margins": 0.0, "rewards/rejected": -4.969823360443115, "step": 7170 }, { "epoch": 1.59, "learning_rate": 9.742609438966265e-06, "logits/chosen": -1.9418175220489502, "logits/rejected": -1.9599629640579224, "logps/chosen": -195.0576629638672, "logps/rejected": -104.2542724609375, "loss": 3.2993, "rewards/accuracies": 0.0, "rewards/chosen": -8.901328086853027, "rewards/margins": -6.596981048583984, "rewards/rejected": -2.304347276687622, "step": 7171 }, { "epoch": 1.59, "learning_rate": 9.74204148320446e-06, "logits/chosen": -1.6937905550003052, "logits/rejected": -1.7479095458984375, "logps/chosen": -145.17684936523438, "logps/rejected": -157.50411987304688, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.786424398422241, "rewards/margins": 7.743207931518555, "rewards/rejected": -11.529632568359375, "step": 7172 }, { "epoch": 1.59, "learning_rate": 9.741472918101722e-06, "logits/chosen": -1.3596937656402588, "logits/rejected": -1.3929487466812134, "logps/chosen": -163.19500732421875, "logps/rejected": -227.62374877929688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.1337220668792725, "rewards/margins": 6.633175849914551, "rewards/rejected": -9.766898155212402, "step": 7173 }, { "epoch": 1.59, "learning_rate": 9.740903743731113e-06, "logits/chosen": -1.2415357828140259, "logits/rejected": -1.1185771226882935, "logps/chosen": -131.22084045410156, "logps/rejected": -177.37167358398438, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": 0.627825915813446, "rewards/margins": 3.7119812965393066, "rewards/rejected": -3.084155321121216, "step": 7174 }, { "epoch": 1.59, "learning_rate": 9.74033396016577e-06, "logits/chosen": -1.4134702682495117, "logits/rejected": -1.332352876663208, "logps/chosen": -61.53932571411133, "logps/rejected": -198.190673828125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.5730671286582947, "rewards/margins": 6.465271472930908, "rewards/rejected": -7.038338661193848, "step": 7175 }, { "epoch": 1.59, "learning_rate": 9.739763567478908e-06, "logits/chosen": -1.4933441877365112, "logits/rejected": -1.4754090309143066, "logps/chosen": -81.25288391113281, "logps/rejected": -88.71448516845703, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.0854660272598267, "rewards/margins": 5.448675632476807, "rewards/rejected": -6.534141540527344, "step": 7176 }, { "epoch": 1.59, "learning_rate": 9.739192565743822e-06, "logits/chosen": -1.7976453304290771, "logits/rejected": -1.7976453304290771, "logps/chosen": -112.37965393066406, "logps/rejected": -112.37965393066406, "loss": 0.3611, "rewards/accuracies": 0.0, "rewards/chosen": -5.871967315673828, "rewards/margins": 0.0, "rewards/rejected": -5.871967315673828, "step": 7177 }, { "epoch": 1.59, "learning_rate": 9.738620955033883e-06, "logits/chosen": -1.1710830926895142, "logits/rejected": -1.1710830926895142, "logps/chosen": -219.77474975585938, "logps/rejected": -219.77474975585938, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -9.82113265991211, "rewards/margins": 0.0, "rewards/rejected": -9.82113265991211, "step": 7178 }, { "epoch": 1.59, "learning_rate": 9.738048735422545e-06, "logits/chosen": -1.558397889137268, "logits/rejected": -1.548050880432129, "logps/chosen": -162.7323760986328, "logps/rejected": -248.54739379882812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2662689685821533, "rewards/margins": 10.866475105285645, "rewards/rejected": -13.132743835449219, "step": 7179 }, { "epoch": 1.59, "learning_rate": 9.737475906983333e-06, "logits/chosen": -1.3741199970245361, "logits/rejected": -1.3690651655197144, "logps/chosen": -89.79046630859375, "logps/rejected": -176.27906799316406, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.7014482021331787, "rewards/margins": 4.516237258911133, "rewards/rejected": -7.217685222625732, "step": 7180 }, { "epoch": 1.59, "learning_rate": 9.736902469789855e-06, "logits/chosen": -1.0072365999221802, "logits/rejected": -0.9219515323638916, "logps/chosen": -156.78334045410156, "logps/rejected": -184.16561889648438, "loss": 0.1422, "rewards/accuracies": 1.0, "rewards/chosen": 1.061238169670105, "rewards/margins": 1.5668015480041504, "rewards/rejected": -0.5055633783340454, "step": 7181 }, { "epoch": 1.59, "learning_rate": 9.736328423915797e-06, "logits/chosen": -1.5468050241470337, "logits/rejected": -1.5468050241470337, "logps/chosen": -116.43577575683594, "logps/rejected": -116.43577575683594, "loss": 0.3637, "rewards/accuracies": 0.0, "rewards/chosen": -5.200135231018066, "rewards/margins": 0.0, "rewards/rejected": -5.200135231018066, "step": 7182 }, { "epoch": 1.59, "learning_rate": 9.735753769434923e-06, "logits/chosen": -1.6797912120819092, "logits/rejected": -1.626328945159912, "logps/chosen": -135.40731811523438, "logps/rejected": -253.70628356933594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.767465353012085, "rewards/margins": 10.209671974182129, "rewards/rejected": -12.977137565612793, "step": 7183 }, { "epoch": 1.59, "learning_rate": 9.735178506421075e-06, "logits/chosen": -1.6239776611328125, "logits/rejected": -1.6773191690444946, "logps/chosen": -231.69189453125, "logps/rejected": -187.50823974609375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -9.331369400024414, "rewards/margins": 5.034553527832031, "rewards/rejected": -14.365922927856445, "step": 7184 }, { "epoch": 1.59, "learning_rate": 9.73460263494817e-06, "logits/chosen": -1.3011655807495117, "logits/rejected": -1.358964204788208, "logps/chosen": -185.09881591796875, "logps/rejected": -149.17697143554688, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -2.2718474864959717, "rewards/margins": 3.302748918533325, "rewards/rejected": -5.574596405029297, "step": 7185 }, { "epoch": 1.59, "learning_rate": 9.734026155090208e-06, "logits/chosen": -1.3328866958618164, "logits/rejected": -1.16713285446167, "logps/chosen": -139.9384002685547, "logps/rejected": -272.22802734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.22035065293312073, "rewards/margins": 7.303797721862793, "rewards/rejected": -7.524148464202881, "step": 7186 }, { "epoch": 1.59, "learning_rate": 9.733449066921268e-06, "logits/chosen": -1.5296558141708374, "logits/rejected": -1.4368054866790771, "logps/chosen": -227.638916015625, "logps/rejected": -405.8413391113281, "loss": 0.0895, "rewards/accuracies": 1.0, "rewards/chosen": -2.011789083480835, "rewards/margins": 11.185198783874512, "rewards/rejected": -13.196988105773926, "step": 7187 }, { "epoch": 1.59, "learning_rate": 9.7328713705155e-06, "logits/chosen": -1.2240490913391113, "logits/rejected": -1.2073544263839722, "logps/chosen": -147.19769287109375, "logps/rejected": -208.04318237304688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7990921139717102, "rewards/margins": 8.855359077453613, "rewards/rejected": -9.654451370239258, "step": 7188 }, { "epoch": 1.59, "learning_rate": 9.732293065947138e-06, "logits/chosen": -1.3477810621261597, "logits/rejected": -1.3657361268997192, "logps/chosen": -129.25701904296875, "logps/rejected": -91.3060073852539, "loss": 0.1316, "rewards/accuracies": 1.0, "rewards/chosen": -0.8286522030830383, "rewards/margins": 1.2005860805511475, "rewards/rejected": -2.029238224029541, "step": 7189 }, { "epoch": 1.59, "learning_rate": 9.731714153290492e-06, "logits/chosen": -1.3108803033828735, "logits/rejected": -1.3939082622528076, "logps/chosen": -251.4493865966797, "logps/rejected": -271.07672119140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.7380263805389404, "rewards/margins": 8.215431213378906, "rewards/rejected": -10.953457832336426, "step": 7190 }, { "epoch": 1.59, "learning_rate": 9.731134632619954e-06, "logits/chosen": -1.7024309635162354, "logits/rejected": -1.7056009769439697, "logps/chosen": -145.55552673339844, "logps/rejected": -97.6524887084961, "loss": 1.128, "rewards/accuracies": 0.0, "rewards/chosen": -4.648708343505859, "rewards/margins": -2.145085096359253, "rewards/rejected": -2.5036232471466064, "step": 7191 }, { "epoch": 1.59, "learning_rate": 9.73055450400999e-06, "logits/chosen": -1.7909824848175049, "logits/rejected": -1.7884280681610107, "logps/chosen": -104.11531829833984, "logps/rejected": -170.36795043945312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.4576927423477173, "rewards/margins": 7.557616710662842, "rewards/rejected": -9.01530933380127, "step": 7192 }, { "epoch": 1.59, "learning_rate": 9.729973767535142e-06, "logits/chosen": -1.4598010778427124, "logits/rejected": -1.3682430982589722, "logps/chosen": -148.8775634765625, "logps/rejected": -237.8443145751953, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": -10.82231330871582, "rewards/margins": 2.419846534729004, "rewards/rejected": -13.242159843444824, "step": 7193 }, { "epoch": 1.59, "learning_rate": 9.729392423270036e-06, "logits/chosen": -1.5392427444458008, "logits/rejected": -1.443290114402771, "logps/chosen": -176.95916748046875, "logps/rejected": -328.7254333496094, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.639172375202179, "rewards/margins": 14.01080322265625, "rewards/rejected": -14.649975776672363, "step": 7194 }, { "epoch": 1.59, "learning_rate": 9.728810471289374e-06, "logits/chosen": -1.3032587766647339, "logits/rejected": -1.3602193593978882, "logps/chosen": -173.4756622314453, "logps/rejected": -156.5319366455078, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2422378063201904, "rewards/margins": 8.257314682006836, "rewards/rejected": -10.499552726745605, "step": 7195 }, { "epoch": 1.59, "learning_rate": 9.728227911667934e-06, "logits/chosen": -1.6376750469207764, "logits/rejected": -1.7557717561721802, "logps/chosen": -170.51168823242188, "logps/rejected": -198.28115844726562, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.125461101531982, "rewards/margins": 8.576810836791992, "rewards/rejected": -15.702272415161133, "step": 7196 }, { "epoch": 1.59, "learning_rate": 9.727644744480571e-06, "logits/chosen": -1.4267008304595947, "logits/rejected": -1.2986091375350952, "logps/chosen": -98.39775085449219, "logps/rejected": -228.34246826171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.9842621088027954, "rewards/margins": 12.66086196899414, "rewards/rejected": -11.676599502563477, "step": 7197 }, { "epoch": 1.59, "learning_rate": 9.727060969802226e-06, "logits/chosen": -1.837723970413208, "logits/rejected": -1.0668034553527832, "logps/chosen": -219.21084594726562, "logps/rejected": -1138.5943603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.562521457672119, "rewards/margins": 98.71481323242188, "rewards/rejected": -101.27733612060547, "step": 7198 }, { "epoch": 1.59, "learning_rate": 9.726476587707908e-06, "logits/chosen": -1.4971832036972046, "logits/rejected": -1.7054804563522339, "logps/chosen": -329.331298828125, "logps/rejected": -227.00563049316406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.796322822570801, "rewards/margins": 7.813512802124023, "rewards/rejected": -15.609835624694824, "step": 7199 }, { "epoch": 1.59, "learning_rate": 9.725891598272711e-06, "logits/chosen": -1.846504807472229, "logits/rejected": -1.0331346988677979, "logps/chosen": -178.93356323242188, "logps/rejected": -755.4224853515625, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": 1.4322906732559204, "rewards/margins": 48.69259262084961, "rewards/rejected": -47.26030349731445, "step": 7200 }, { "epoch": 1.59, "learning_rate": 9.725306001571806e-06, "logits/chosen": -1.504392147064209, "logits/rejected": -1.0935338735580444, "logps/chosen": -153.7740478515625, "logps/rejected": -395.48272705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.193368673324585, "rewards/margins": 12.264307022094727, "rewards/rejected": -9.070938110351562, "step": 7201 }, { "epoch": 1.59, "learning_rate": 9.72471979768044e-06, "logits/chosen": -1.2676916122436523, "logits/rejected": -1.344587802886963, "logps/chosen": -238.64761352539062, "logps/rejected": -159.67921447753906, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.9219651222229004, "rewards/margins": 4.804023742675781, "rewards/rejected": -7.725988864898682, "step": 7202 }, { "epoch": 1.59, "learning_rate": 9.724132986673935e-06, "logits/chosen": -1.3770618438720703, "logits/rejected": -1.3146666288375854, "logps/chosen": -127.9462890625, "logps/rejected": -330.79852294921875, "loss": 0.0618, "rewards/accuracies": 1.0, "rewards/chosen": 0.05784912034869194, "rewards/margins": 9.522836685180664, "rewards/rejected": -9.464987754821777, "step": 7203 }, { "epoch": 1.59, "learning_rate": 9.723545568627699e-06, "logits/chosen": -1.3104866743087769, "logits/rejected": -1.3142486810684204, "logps/chosen": -131.4361572265625, "logps/rejected": -153.474853515625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.502432346343994, "rewards/margins": 5.801846981048584, "rewards/rejected": -8.304279327392578, "step": 7204 }, { "epoch": 1.59, "learning_rate": 9.722957543617211e-06, "logits/chosen": -1.682657241821289, "logits/rejected": -1.557572364807129, "logps/chosen": -165.64532470703125, "logps/rejected": -266.9561767578125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -4.048587322235107, "rewards/margins": 5.033413410186768, "rewards/rejected": -9.082000732421875, "step": 7205 }, { "epoch": 1.59, "learning_rate": 9.722368911718034e-06, "logits/chosen": -1.1672661304473877, "logits/rejected": -1.1761109828948975, "logps/chosen": -23.422544479370117, "logps/rejected": -39.00568389892578, "loss": 0.0491, "rewards/accuracies": 1.0, "rewards/chosen": -0.9017720222473145, "rewards/margins": 2.2952682971954346, "rewards/rejected": -3.197040319442749, "step": 7206 }, { "epoch": 1.6, "learning_rate": 9.721779673005805e-06, "logits/chosen": -1.4356532096862793, "logits/rejected": -1.3873542547225952, "logps/chosen": -177.04335021972656, "logps/rejected": -319.79083251953125, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.7543838620185852, "rewards/margins": 4.749149799346924, "rewards/rejected": -5.503533840179443, "step": 7207 }, { "epoch": 1.6, "learning_rate": 9.721189827556237e-06, "logits/chosen": -1.2083700895309448, "logits/rejected": -1.1412169933319092, "logps/chosen": -227.1905975341797, "logps/rejected": -295.6150817871094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8842849731445312, "rewards/margins": 13.878752708435059, "rewards/rejected": -15.76303768157959, "step": 7208 }, { "epoch": 1.6, "learning_rate": 9.720599375445125e-06, "logits/chosen": -1.6604931354522705, "logits/rejected": -1.6773346662521362, "logps/chosen": -135.34622192382812, "logps/rejected": -151.46780395507812, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.4802413880825043, "rewards/margins": 3.5079591274261475, "rewards/rejected": -3.9882004261016846, "step": 7209 }, { "epoch": 1.6, "learning_rate": 9.720008316748344e-06, "logits/chosen": -1.4772238731384277, "logits/rejected": -1.457903265953064, "logps/chosen": -75.65008544921875, "logps/rejected": -131.42108154296875, "loss": 1.756, "rewards/accuracies": 1.0, "rewards/chosen": -0.7263687252998352, "rewards/margins": 5.915194511413574, "rewards/rejected": -6.641563415527344, "step": 7210 }, { "epoch": 1.6, "learning_rate": 9.719416651541839e-06, "logits/chosen": -1.7095043659210205, "logits/rejected": -1.6742935180664062, "logps/chosen": -141.55186462402344, "logps/rejected": -203.77978515625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.909266710281372, "rewards/margins": 5.654248237609863, "rewards/rejected": -7.563514709472656, "step": 7211 }, { "epoch": 1.6, "learning_rate": 9.718824379901639e-06, "logits/chosen": -1.5244545936584473, "logits/rejected": -1.1946247816085815, "logps/chosen": -127.12605285644531, "logps/rejected": -512.94580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3075592517852783, "rewards/margins": 11.99478530883789, "rewards/rejected": -14.30234432220459, "step": 7212 }, { "epoch": 1.6, "learning_rate": 9.718231501903851e-06, "logits/chosen": -1.4829097986221313, "logits/rejected": -1.4306656122207642, "logps/chosen": -218.33892822265625, "logps/rejected": -275.90826416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6283416748046875, "rewards/margins": 9.91356086730957, "rewards/rejected": -12.541902542114258, "step": 7213 }, { "epoch": 1.6, "learning_rate": 9.717638017624657e-06, "logits/chosen": -1.4339176416397095, "logits/rejected": -1.4661420583724976, "logps/chosen": -183.9859619140625, "logps/rejected": -181.30271911621094, "loss": 1.166, "rewards/accuracies": 0.0, "rewards/chosen": -6.852700233459473, "rewards/margins": -1.0854449272155762, "rewards/rejected": -5.7672553062438965, "step": 7214 }, { "epoch": 1.6, "learning_rate": 9.717043927140319e-06, "logits/chosen": -1.6146827936172485, "logits/rejected": -1.4063433408737183, "logps/chosen": -133.87948608398438, "logps/rejected": -263.390625, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": -4.430454254150391, "rewards/margins": 2.9506492614746094, "rewards/rejected": -7.381103515625, "step": 7215 }, { "epoch": 1.6, "learning_rate": 9.716449230527175e-06, "logits/chosen": -1.1889798641204834, "logits/rejected": -1.1889798641204834, "logps/chosen": -89.92981719970703, "logps/rejected": -89.92981719970703, "loss": 0.356, "rewards/accuracies": 0.0, "rewards/chosen": -6.037003993988037, "rewards/margins": 0.0, "rewards/rejected": -6.037003993988037, "step": 7216 }, { "epoch": 1.6, "learning_rate": 9.715853927861643e-06, "logits/chosen": -1.4911783933639526, "logits/rejected": -1.5705698728561401, "logps/chosen": -244.9736328125, "logps/rejected": -160.06784057617188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.3247741758823395, "rewards/margins": 5.7531633377075195, "rewards/rejected": -6.077937602996826, "step": 7217 }, { "epoch": 1.6, "learning_rate": 9.71525801922022e-06, "logits/chosen": -1.3231028318405151, "logits/rejected": -1.214328408241272, "logps/chosen": -104.37757110595703, "logps/rejected": -219.3454132080078, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.00321888923645, "rewards/margins": 6.922686576843262, "rewards/rejected": -8.925905227661133, "step": 7218 }, { "epoch": 1.6, "learning_rate": 9.714661504679474e-06, "logits/chosen": -1.4949047565460205, "logits/rejected": -1.3504267930984497, "logps/chosen": -128.2598114013672, "logps/rejected": -264.19384765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.830859422683716, "rewards/margins": 6.722345352172852, "rewards/rejected": -9.553204536437988, "step": 7219 }, { "epoch": 1.6, "learning_rate": 9.71406438431606e-06, "logits/chosen": -1.6241015195846558, "logits/rejected": -1.586000919342041, "logps/chosen": -176.06320190429688, "logps/rejected": -208.7090606689453, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -6.629425048828125, "rewards/margins": 3.0805740356445312, "rewards/rejected": -9.709999084472656, "step": 7220 }, { "epoch": 1.6, "learning_rate": 9.713466658206703e-06, "logits/chosen": -1.5870225429534912, "logits/rejected": -1.589836597442627, "logps/chosen": -112.91511535644531, "logps/rejected": -126.30268859863281, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.9287155866622925, "rewards/margins": 3.756371021270752, "rewards/rejected": -5.685086727142334, "step": 7221 }, { "epoch": 1.6, "learning_rate": 9.712868326428213e-06, "logits/chosen": -1.4925235509872437, "logits/rejected": -1.5200220346450806, "logps/chosen": -168.23849487304688, "logps/rejected": -164.14085388183594, "loss": 0.5985, "rewards/accuracies": 0.0, "rewards/chosen": -7.480278015136719, "rewards/margins": -0.8286895751953125, "rewards/rejected": -6.651588439941406, "step": 7222 }, { "epoch": 1.6, "learning_rate": 9.712269389057471e-06, "logits/chosen": -1.8525344133377075, "logits/rejected": -1.8492573499679565, "logps/chosen": -127.37004089355469, "logps/rejected": -100.60307312011719, "loss": 0.4117, "rewards/accuracies": 0.0, "rewards/chosen": -3.9168288707733154, "rewards/margins": -0.2446894645690918, "rewards/rejected": -3.6721394062042236, "step": 7223 }, { "epoch": 1.6, "learning_rate": 9.711669846171443e-06, "logits/chosen": -1.5900208950042725, "logits/rejected": -1.6066770553588867, "logps/chosen": -73.92588806152344, "logps/rejected": -128.05551147460938, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 1.1140228509902954, "rewards/margins": 8.868901252746582, "rewards/rejected": -7.754878520965576, "step": 7224 }, { "epoch": 1.6, "learning_rate": 9.711069697847165e-06, "logits/chosen": -1.6317646503448486, "logits/rejected": -1.630942940711975, "logps/chosen": -170.82122802734375, "logps/rejected": -197.13449096679688, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": -2.215344190597534, "rewards/margins": 6.449180603027344, "rewards/rejected": -8.664525032043457, "step": 7225 }, { "epoch": 1.6, "learning_rate": 9.710468944161755e-06, "logits/chosen": -1.802825927734375, "logits/rejected": -1.7791372537612915, "logps/chosen": -134.17349243164062, "logps/rejected": -170.37713623046875, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -2.5362777709960938, "rewards/margins": 3.041189670562744, "rewards/rejected": -5.577467441558838, "step": 7226 }, { "epoch": 1.6, "learning_rate": 9.70986758519241e-06, "logits/chosen": -1.6347486972808838, "logits/rejected": -1.5100772380828857, "logps/chosen": -107.24203491210938, "logps/rejected": -206.4038848876953, "loss": 0.3494, "rewards/accuracies": 1.0, "rewards/chosen": -3.0869667530059814, "rewards/margins": 5.168407440185547, "rewards/rejected": -8.25537395477295, "step": 7227 }, { "epoch": 1.6, "learning_rate": 9.709265621016401e-06, "logits/chosen": -1.5407079458236694, "logits/rejected": -1.6443840265274048, "logps/chosen": -206.18470764160156, "logps/rejected": -148.1751708984375, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -4.658515930175781, "rewards/margins": 6.36380672454834, "rewards/rejected": -11.022322654724121, "step": 7228 }, { "epoch": 1.6, "learning_rate": 9.708663051711083e-06, "logits/chosen": -1.9208636283874512, "logits/rejected": -1.9208636283874512, "logps/chosen": -165.75149536132812, "logps/rejected": -165.75149536132812, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": 0.06656189262866974, "rewards/margins": 0.0, "rewards/rejected": 0.06656189262866974, "step": 7229 }, { "epoch": 1.6, "learning_rate": 9.708059877353881e-06, "logits/chosen": -1.2137563228607178, "logits/rejected": -1.2137563228607178, "logps/chosen": -173.36102294921875, "logps/rejected": -173.36102294921875, "loss": 0.3611, "rewards/accuracies": 0.0, "rewards/chosen": -7.068971157073975, "rewards/margins": 0.0, "rewards/rejected": -7.068971157073975, "step": 7230 }, { "epoch": 1.6, "learning_rate": 9.707456098022303e-06, "logits/chosen": -1.7940962314605713, "logits/rejected": -1.889299988746643, "logps/chosen": -108.53269958496094, "logps/rejected": -89.04281616210938, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -2.8168869018554688, "rewards/margins": 3.886788845062256, "rewards/rejected": -6.703675746917725, "step": 7231 }, { "epoch": 1.6, "learning_rate": 9.706851713793932e-06, "logits/chosen": -1.8845272064208984, "logits/rejected": -1.7446177005767822, "logps/chosen": -217.79481506347656, "logps/rejected": -259.2756652832031, "loss": 0.0674, "rewards/accuracies": 1.0, "rewards/chosen": -4.117961406707764, "rewards/margins": 2.1583709716796875, "rewards/rejected": -6.276332378387451, "step": 7232 }, { "epoch": 1.6, "learning_rate": 9.706246724746433e-06, "logits/chosen": -1.547701120376587, "logits/rejected": -1.5236804485321045, "logps/chosen": -64.32784271240234, "logps/rejected": -104.86192321777344, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.9759136438369751, "rewards/margins": 4.843885898590088, "rewards/rejected": -5.819799423217773, "step": 7233 }, { "epoch": 1.6, "learning_rate": 9.705641130957541e-06, "logits/chosen": -1.3470529317855835, "logits/rejected": -1.1161596775054932, "logps/chosen": -251.21688842773438, "logps/rejected": -511.2457275390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3767945766448975, "rewards/margins": 14.263225555419922, "rewards/rejected": -16.6400203704834, "step": 7234 }, { "epoch": 1.6, "learning_rate": 9.705034932505076e-06, "logits/chosen": -1.2484685182571411, "logits/rejected": -1.2885433435440063, "logps/chosen": -106.70091247558594, "logps/rejected": -100.71170043945312, "loss": 0.5073, "rewards/accuracies": 1.0, "rewards/chosen": -1.0454505681991577, "rewards/margins": 0.9698175191879272, "rewards/rejected": -2.015268087387085, "step": 7235 }, { "epoch": 1.6, "learning_rate": 9.704428129466934e-06, "logits/chosen": -1.5725845098495483, "logits/rejected": -1.5784480571746826, "logps/chosen": -108.99095153808594, "logps/rejected": -147.322265625, "loss": 0.232, "rewards/accuracies": 1.0, "rewards/chosen": -2.3320770263671875, "rewards/margins": 0.5273277759552002, "rewards/rejected": -2.8594048023223877, "step": 7236 }, { "epoch": 1.6, "learning_rate": 9.703820721921085e-06, "logits/chosen": -1.4161547422409058, "logits/rejected": -1.5947929620742798, "logps/chosen": -225.0872039794922, "logps/rejected": -200.78131103515625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.5942764282226562, "rewards/margins": 6.809486389160156, "rewards/rejected": -7.4037628173828125, "step": 7237 }, { "epoch": 1.6, "learning_rate": 9.703212709945583e-06, "logits/chosen": -1.5982698202133179, "logits/rejected": -1.6153533458709717, "logps/chosen": -129.51300048828125, "logps/rejected": -172.3699493408203, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -2.055591583251953, "rewards/margins": 3.4514641761779785, "rewards/rejected": -5.507055759429932, "step": 7238 }, { "epoch": 1.6, "learning_rate": 9.70260409361855e-06, "logits/chosen": -1.5709333419799805, "logits/rejected": -1.50198233127594, "logps/chosen": -121.24429321289062, "logps/rejected": -202.16465759277344, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.5107803344726562, "rewards/margins": 6.055814266204834, "rewards/rejected": -7.56659460067749, "step": 7239 }, { "epoch": 1.6, "learning_rate": 9.701994873018198e-06, "logits/chosen": -1.1753778457641602, "logits/rejected": -1.213842749595642, "logps/chosen": -215.97015380859375, "logps/rejected": -233.114501953125, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -3.9265410900115967, "rewards/margins": 3.5956971645355225, "rewards/rejected": -7.522238254547119, "step": 7240 }, { "epoch": 1.6, "learning_rate": 9.70138504822281e-06, "logits/chosen": -1.5141428709030151, "logits/rejected": -1.5878489017486572, "logps/chosen": -118.02925109863281, "logps/rejected": -82.72755432128906, "loss": 0.0787, "rewards/accuracies": 1.0, "rewards/chosen": -2.081308126449585, "rewards/margins": 1.7689313888549805, "rewards/rejected": -3.8502395153045654, "step": 7241 }, { "epoch": 1.6, "learning_rate": 9.700774619310744e-06, "logits/chosen": -1.4234867095947266, "logits/rejected": -1.4582571983337402, "logps/chosen": -231.8022003173828, "logps/rejected": -184.85975646972656, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.25265809893608093, "rewards/margins": 5.94109582901001, "rewards/rejected": -5.6884379386901855, "step": 7242 }, { "epoch": 1.6, "learning_rate": 9.700163586360438e-06, "logits/chosen": -1.6678297519683838, "logits/rejected": -1.6678297519683838, "logps/chosen": -166.08192443847656, "logps/rejected": -166.08192443847656, "loss": 0.3487, "rewards/accuracies": 0.0, "rewards/chosen": -8.270008087158203, "rewards/margins": 0.0, "rewards/rejected": -8.270008087158203, "step": 7243 }, { "epoch": 1.6, "learning_rate": 9.699551949450412e-06, "logits/chosen": -1.6226446628570557, "logits/rejected": -1.6148359775543213, "logps/chosen": -89.7964859008789, "logps/rejected": -112.27033996582031, "loss": 0.0799, "rewards/accuracies": 1.0, "rewards/chosen": -1.6960182189941406, "rewards/margins": 6.526033401489258, "rewards/rejected": -8.222051620483398, "step": 7244 }, { "epoch": 1.6, "learning_rate": 9.698939708659258e-06, "logits/chosen": -1.2179855108261108, "logits/rejected": -1.161881446838379, "logps/chosen": -120.14236450195312, "logps/rejected": -181.3836212158203, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -5.479257106781006, "rewards/margins": 3.714646816253662, "rewards/rejected": -9.193903923034668, "step": 7245 }, { "epoch": 1.6, "learning_rate": 9.698326864065646e-06, "logits/chosen": -1.6293889284133911, "logits/rejected": -1.7566145658493042, "logps/chosen": -233.5858154296875, "logps/rejected": -141.82398986816406, "loss": 0.1475, "rewards/accuracies": 1.0, "rewards/chosen": -4.332104682922363, "rewards/margins": 1.0991597175598145, "rewards/rejected": -5.431264400482178, "step": 7246 }, { "epoch": 1.6, "learning_rate": 9.697713415748327e-06, "logits/chosen": -1.5569483041763306, "logits/rejected": -1.6397931575775146, "logps/chosen": -200.14730834960938, "logps/rejected": -185.4576416015625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.1751251220703125, "rewards/margins": 6.923845291137695, "rewards/rejected": -9.098970413208008, "step": 7247 }, { "epoch": 1.6, "learning_rate": 9.697099363786127e-06, "logits/chosen": -1.059332013130188, "logits/rejected": -1.0791946649551392, "logps/chosen": -173.56517028808594, "logps/rejected": -124.41363525390625, "loss": 0.0676, "rewards/accuracies": 1.0, "rewards/chosen": -4.935908794403076, "rewards/margins": 1.9325895309448242, "rewards/rejected": -6.8684983253479, "step": 7248 }, { "epoch": 1.6, "learning_rate": 9.69648470825795e-06, "logits/chosen": -1.237768530845642, "logits/rejected": -1.272264838218689, "logps/chosen": -85.56261444091797, "logps/rejected": -124.07758331298828, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -0.5498436093330383, "rewards/margins": 2.8725671768188477, "rewards/rejected": -3.422410726547241, "step": 7249 }, { "epoch": 1.6, "learning_rate": 9.695869449242779e-06, "logits/chosen": -1.6892499923706055, "logits/rejected": -1.6268596649169922, "logps/chosen": -117.07965850830078, "logps/rejected": -191.76803588867188, "loss": 0.5505, "rewards/accuracies": 1.0, "rewards/chosen": -3.909391164779663, "rewards/margins": 0.6861960887908936, "rewards/rejected": -4.595587253570557, "step": 7250 }, { "epoch": 1.6, "learning_rate": 9.695253586819672e-06, "logits/chosen": -1.3217052221298218, "logits/rejected": -1.044492483139038, "logps/chosen": -150.6412353515625, "logps/rejected": -348.0464172363281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 2.8101747035980225, "rewards/margins": 10.183249473571777, "rewards/rejected": -7.373074531555176, "step": 7251 }, { "epoch": 1.61, "learning_rate": 9.694637121067764e-06, "logits/chosen": -1.3036526441574097, "logits/rejected": -1.471584677696228, "logps/chosen": -347.4933166503906, "logps/rejected": -212.11734008789062, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.2186737060546875, "rewards/margins": 8.322196006774902, "rewards/rejected": -9.54086971282959, "step": 7252 }, { "epoch": 1.61, "learning_rate": 9.694020052066275e-06, "logits/chosen": -1.1722809076309204, "logits/rejected": -1.1504275798797607, "logps/chosen": -117.73974609375, "logps/rejected": -161.55947875976562, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.9270217418670654, "rewards/margins": 5.363894462585449, "rewards/rejected": -8.290916442871094, "step": 7253 }, { "epoch": 1.61, "learning_rate": 9.693402379894492e-06, "logits/chosen": -1.2620004415512085, "logits/rejected": -1.3694546222686768, "logps/chosen": -265.2352294921875, "logps/rejected": -191.72366333007812, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 1.000421166419983, "rewards/margins": 5.237329006195068, "rewards/rejected": -4.236907958984375, "step": 7254 }, { "epoch": 1.61, "learning_rate": 9.692784104631785e-06, "logits/chosen": -1.787466049194336, "logits/rejected": -1.7814741134643555, "logps/chosen": -112.36581420898438, "logps/rejected": -183.11419677734375, "loss": 0.3469, "rewards/accuracies": 1.0, "rewards/chosen": -1.0418792963027954, "rewards/margins": 7.374851703643799, "rewards/rejected": -8.416730880737305, "step": 7255 }, { "epoch": 1.61, "learning_rate": 9.692165226357603e-06, "logits/chosen": -1.847659945487976, "logits/rejected": -1.6736582517623901, "logps/chosen": -86.37322235107422, "logps/rejected": -195.5209197998047, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.15356674790382385, "rewards/margins": 6.152883052825928, "rewards/rejected": -6.306449890136719, "step": 7256 }, { "epoch": 1.61, "learning_rate": 9.691545745151469e-06, "logits/chosen": -1.4663159847259521, "logits/rejected": -1.3791303634643555, "logps/chosen": -114.2646484375, "logps/rejected": -217.95257568359375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.2566719055175781, "rewards/margins": 8.818246841430664, "rewards/rejected": -9.074918746948242, "step": 7257 }, { "epoch": 1.61, "learning_rate": 9.690925661092984e-06, "logits/chosen": -1.7528959512710571, "logits/rejected": -1.7778456211090088, "logps/chosen": -118.8506851196289, "logps/rejected": -180.2049560546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.11137318611145, "rewards/margins": 6.116029739379883, "rewards/rejected": -9.227402687072754, "step": 7258 }, { "epoch": 1.61, "learning_rate": 9.690304974261828e-06, "logits/chosen": -1.6002583503723145, "logits/rejected": -1.5412490367889404, "logps/chosen": -127.31108093261719, "logps/rejected": -267.92559814453125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.4841552674770355, "rewards/margins": 11.7033052444458, "rewards/rejected": -11.219149589538574, "step": 7259 }, { "epoch": 1.61, "learning_rate": 9.689683684737758e-06, "logits/chosen": -1.6363229751586914, "logits/rejected": -1.592782735824585, "logps/chosen": -106.22331237792969, "logps/rejected": -198.00289916992188, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -2.436473846435547, "rewards/margins": 4.502094268798828, "rewards/rejected": -6.938568115234375, "step": 7260 }, { "epoch": 1.61, "learning_rate": 9.68906179260061e-06, "logits/chosen": -1.6405503749847412, "logits/rejected": -1.6395846605300903, "logps/chosen": -184.03756713867188, "logps/rejected": -199.95753479003906, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -2.791757345199585, "rewards/margins": 3.603461980819702, "rewards/rejected": -6.395219326019287, "step": 7261 }, { "epoch": 1.61, "learning_rate": 9.688439297930292e-06, "logits/chosen": -1.6579474210739136, "logits/rejected": -1.6186161041259766, "logps/chosen": -101.15055847167969, "logps/rejected": -185.63946533203125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.9747536182403564, "rewards/margins": 4.600865364074707, "rewards/rejected": -6.575618743896484, "step": 7262 }, { "epoch": 1.61, "learning_rate": 9.687816200806795e-06, "logits/chosen": -1.4308351278305054, "logits/rejected": -1.4257419109344482, "logps/chosen": -129.0670623779297, "logps/rejected": -166.24472045898438, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -4.2794976234436035, "rewards/margins": 3.0040740966796875, "rewards/rejected": -7.283571720123291, "step": 7263 }, { "epoch": 1.61, "learning_rate": 9.687192501310186e-06, "logits/chosen": -1.3095896244049072, "logits/rejected": -1.2652393579483032, "logps/chosen": -221.59799194335938, "logps/rejected": -345.83636474609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.7170990109443665, "rewards/margins": 18.772781372070312, "rewards/rejected": -18.055683135986328, "step": 7264 }, { "epoch": 1.61, "learning_rate": 9.68656819952061e-06, "logits/chosen": -1.2883363962173462, "logits/rejected": -1.3568367958068848, "logps/chosen": -191.94583129882812, "logps/rejected": -200.0293731689453, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -6.358944892883301, "rewards/margins": 4.437822341918945, "rewards/rejected": -10.796767234802246, "step": 7265 }, { "epoch": 1.61, "learning_rate": 9.685943295518283e-06, "logits/chosen": -1.436982274055481, "logits/rejected": -1.3338663578033447, "logps/chosen": -234.05470275878906, "logps/rejected": -339.5492858886719, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": 0.7318130731582642, "rewards/margins": 3.259812831878662, "rewards/rejected": -2.5279998779296875, "step": 7266 }, { "epoch": 1.61, "learning_rate": 9.685317789383509e-06, "logits/chosen": -1.4220918416976929, "logits/rejected": -1.3384265899658203, "logps/chosen": -94.29788208007812, "logps/rejected": -281.4433898925781, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": 0.3422348201274872, "rewards/margins": 8.667141914367676, "rewards/rejected": -8.324907302856445, "step": 7267 }, { "epoch": 1.61, "learning_rate": 9.684691681196664e-06, "logits/chosen": -1.9249999523162842, "logits/rejected": -1.864101767539978, "logps/chosen": -161.40414428710938, "logps/rejected": -349.08392333984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6700379848480225, "rewards/margins": 12.466981887817383, "rewards/rejected": -16.137020111083984, "step": 7268 }, { "epoch": 1.61, "learning_rate": 9.684064971038196e-06, "logits/chosen": -1.394837498664856, "logits/rejected": -1.3643754720687866, "logps/chosen": -104.58401489257812, "logps/rejected": -304.8695068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0059425830841064, "rewards/margins": 19.14583969116211, "rewards/rejected": -20.151782989501953, "step": 7269 }, { "epoch": 1.61, "learning_rate": 9.683437658988642e-06, "logits/chosen": -1.178571105003357, "logits/rejected": -1.24058198928833, "logps/chosen": -115.81474304199219, "logps/rejected": -138.88922119140625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -2.2002594470977783, "rewards/margins": 4.69931697845459, "rewards/rejected": -6.899576663970947, "step": 7270 }, { "epoch": 1.61, "learning_rate": 9.682809745128607e-06, "logits/chosen": -1.2554973363876343, "logits/rejected": -1.203055739402771, "logps/chosen": -119.04005432128906, "logps/rejected": -148.10476684570312, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.3753654956817627, "rewards/margins": 6.505223274230957, "rewards/rejected": -8.88058853149414, "step": 7271 }, { "epoch": 1.61, "learning_rate": 9.682181229538776e-06, "logits/chosen": -1.1072136163711548, "logits/rejected": -2.36993408203125, "logps/chosen": -218.72866821289062, "logps/rejected": -242.11477661132812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.6466217041015625, "rewards/margins": 21.998226165771484, "rewards/rejected": -18.351604461669922, "step": 7272 }, { "epoch": 1.61, "learning_rate": 9.681552112299914e-06, "logits/chosen": -1.5840134620666504, "logits/rejected": -1.5375467538833618, "logps/chosen": -141.06350708007812, "logps/rejected": -318.7617492675781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.328070044517517, "rewards/margins": 10.045108795166016, "rewards/rejected": -11.373178482055664, "step": 7273 }, { "epoch": 1.61, "learning_rate": 9.680922393492858e-06, "logits/chosen": -1.8318606615066528, "logits/rejected": -1.8362282514572144, "logps/chosen": -100.29000091552734, "logps/rejected": -126.42699432373047, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": -1.9939979314804077, "rewards/margins": 7.896503925323486, "rewards/rejected": -9.890501976013184, "step": 7274 }, { "epoch": 1.61, "learning_rate": 9.68029207319853e-06, "logits/chosen": -1.1803737878799438, "logits/rejected": -1.179782509803772, "logps/chosen": -242.67440795898438, "logps/rejected": -245.28160095214844, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": -1.0199798345565796, "rewards/margins": 9.57316780090332, "rewards/rejected": -10.593147277832031, "step": 7275 }, { "epoch": 1.61, "learning_rate": 9.679661151497919e-06, "logits/chosen": -1.6967194080352783, "logits/rejected": -1.6977726221084595, "logps/chosen": -121.64649200439453, "logps/rejected": -143.99453735351562, "loss": 0.1263, "rewards/accuracies": 1.0, "rewards/chosen": -3.01413893699646, "rewards/margins": 3.6332128047943115, "rewards/rejected": -6.6473517417907715, "step": 7276 }, { "epoch": 1.61, "learning_rate": 9.6790296284721e-06, "logits/chosen": -1.4304884672164917, "logits/rejected": -1.3762249946594238, "logps/chosen": -137.8617401123047, "logps/rejected": -186.50607299804688, "loss": 0.053, "rewards/accuracies": 1.0, "rewards/chosen": -2.360264539718628, "rewards/margins": 2.1922900676727295, "rewards/rejected": -4.552554607391357, "step": 7277 }, { "epoch": 1.61, "learning_rate": 9.678397504202222e-06, "logits/chosen": -1.344224214553833, "logits/rejected": -1.5532753467559814, "logps/chosen": -208.59503173828125, "logps/rejected": -149.73106384277344, "loss": 0.1666, "rewards/accuracies": 1.0, "rewards/chosen": -4.349048137664795, "rewards/margins": 0.9278793334960938, "rewards/rejected": -5.276927471160889, "step": 7278 }, { "epoch": 1.61, "learning_rate": 9.677764778769512e-06, "logits/chosen": -1.5600792169570923, "logits/rejected": -1.4697396755218506, "logps/chosen": -164.37081909179688, "logps/rejected": -311.4349060058594, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/chosen": -3.5406875610351562, "rewards/margins": 2.8853564262390137, "rewards/rejected": -6.42604398727417, "step": 7279 }, { "epoch": 1.61, "learning_rate": 9.677131452255272e-06, "logits/chosen": -1.1234893798828125, "logits/rejected": -1.0672625303268433, "logps/chosen": -164.7671661376953, "logps/rejected": -240.64431762695312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.7678422927856445, "rewards/margins": 5.846231460571289, "rewards/rejected": -12.614073753356934, "step": 7280 }, { "epoch": 1.61, "learning_rate": 9.676497524740885e-06, "logits/chosen": -1.6019697189331055, "logits/rejected": -1.6019697189331055, "logps/chosen": -240.07086181640625, "logps/rejected": -240.07086181640625, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -7.697902202606201, "rewards/margins": 0.0, "rewards/rejected": -7.697902202606201, "step": 7281 }, { "epoch": 1.61, "learning_rate": 9.675862996307808e-06, "logits/chosen": -1.5017644166946411, "logits/rejected": -1.5350728034973145, "logps/chosen": -80.60514831542969, "logps/rejected": -85.2940902709961, "loss": 0.2753, "rewards/accuracies": 1.0, "rewards/chosen": -2.648092031478882, "rewards/margins": 0.3244752883911133, "rewards/rejected": -2.972567319869995, "step": 7282 }, { "epoch": 1.61, "learning_rate": 9.675227867037576e-06, "logits/chosen": -1.6805378198623657, "logits/rejected": -1.6805378198623657, "logps/chosen": -202.14613342285156, "logps/rejected": -202.14613342285156, "loss": 0.3782, "rewards/accuracies": 0.0, "rewards/chosen": -3.7390382289886475, "rewards/margins": 0.0, "rewards/rejected": -3.7390382289886475, "step": 7283 }, { "epoch": 1.61, "learning_rate": 9.674592137011801e-06, "logits/chosen": -1.2335842847824097, "logits/rejected": -1.1898044347763062, "logps/chosen": -114.54450988769531, "logps/rejected": -205.99761962890625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.2779908180236816, "rewards/margins": 5.903666973114014, "rewards/rejected": -8.181657791137695, "step": 7284 }, { "epoch": 1.61, "learning_rate": 9.673955806312175e-06, "logits/chosen": -1.7555241584777832, "logits/rejected": -1.736004114151001, "logps/chosen": -154.26112365722656, "logps/rejected": -198.94766235351562, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.5706375241279602, "rewards/margins": 8.254209518432617, "rewards/rejected": -8.824847221374512, "step": 7285 }, { "epoch": 1.61, "learning_rate": 9.673318875020463e-06, "logits/chosen": -1.0752300024032593, "logits/rejected": -1.0625115633010864, "logps/chosen": -185.85702514648438, "logps/rejected": -254.0926055908203, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.5498520135879517, "rewards/margins": 8.862327575683594, "rewards/rejected": -9.412179946899414, "step": 7286 }, { "epoch": 1.61, "learning_rate": 9.67268134321851e-06, "logits/chosen": -1.453490972518921, "logits/rejected": -1.4562437534332275, "logps/chosen": -144.27455139160156, "logps/rejected": -153.58334350585938, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -1.54570472240448, "rewards/margins": 3.3472890853881836, "rewards/rejected": -4.892993927001953, "step": 7287 }, { "epoch": 1.61, "learning_rate": 9.672043210988237e-06, "logits/chosen": -1.6328344345092773, "logits/rejected": -1.6436206102371216, "logps/chosen": -140.4722900390625, "logps/rejected": -173.54220581054688, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -3.379870653152466, "rewards/margins": 3.1023590564727783, "rewards/rejected": -6.482229709625244, "step": 7288 }, { "epoch": 1.61, "learning_rate": 9.671404478411645e-06, "logits/chosen": -1.1062829494476318, "logits/rejected": -1.1546391248703003, "logps/chosen": -105.76515197753906, "logps/rejected": -105.03040313720703, "loss": 1.8972, "rewards/accuracies": 1.0, "rewards/chosen": -2.9561004638671875, "rewards/margins": 2.904475212097168, "rewards/rejected": -5.8605756759643555, "step": 7289 }, { "epoch": 1.61, "learning_rate": 9.670765145570804e-06, "logits/chosen": -1.6555501222610474, "logits/rejected": -1.708670735359192, "logps/chosen": -125.13850402832031, "logps/rejected": -148.75387573242188, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.2584640979766846, "rewards/margins": 4.810708999633789, "rewards/rejected": -6.069173336029053, "step": 7290 }, { "epoch": 1.61, "learning_rate": 9.670125212547872e-06, "logits/chosen": -1.4336037635803223, "logits/rejected": -1.3797777891159058, "logps/chosen": -74.84664154052734, "logps/rejected": -117.49922180175781, "loss": 0.1574, "rewards/accuracies": 1.0, "rewards/chosen": 0.5722709894180298, "rewards/margins": 5.369921684265137, "rewards/rejected": -4.7976508140563965, "step": 7291 }, { "epoch": 1.61, "learning_rate": 9.669484679425077e-06, "logits/chosen": -1.7320469617843628, "logits/rejected": -1.1563193798065186, "logps/chosen": -154.2410430908203, "logps/rejected": -1261.357177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5518600940704346, "rewards/margins": 109.5653076171875, "rewards/rejected": -112.1171646118164, "step": 7292 }, { "epoch": 1.61, "learning_rate": 9.668843546284725e-06, "logits/chosen": -1.888538122177124, "logits/rejected": -1.4997769594192505, "logps/chosen": -80.44590759277344, "logps/rejected": -310.2173156738281, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -1.0076531171798706, "rewards/margins": 3.136655330657959, "rewards/rejected": -4.144308567047119, "step": 7293 }, { "epoch": 1.61, "learning_rate": 9.668201813209202e-06, "logits/chosen": -1.8982261419296265, "logits/rejected": -1.8766934871673584, "logps/chosen": -127.2581558227539, "logps/rejected": -185.38272094726562, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.8886361122131348, "rewards/margins": 5.415598392486572, "rewards/rejected": -8.304234504699707, "step": 7294 }, { "epoch": 1.61, "learning_rate": 9.667559480280968e-06, "logits/chosen": -1.4549466371536255, "logits/rejected": -1.3728151321411133, "logps/chosen": -215.3368377685547, "logps/rejected": -399.82421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.694696068763733, "rewards/margins": 11.498394966125488, "rewards/rejected": -13.19309139251709, "step": 7295 }, { "epoch": 1.61, "learning_rate": 9.66691654758256e-06, "logits/chosen": -1.537473440170288, "logits/rejected": -1.496596097946167, "logps/chosen": -114.79991149902344, "logps/rejected": -173.07293701171875, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -1.9629074335098267, "rewards/margins": 3.9030275344848633, "rewards/rejected": -5.8659348487854, "step": 7296 }, { "epoch": 1.62, "learning_rate": 9.666273015196595e-06, "logits/chosen": -1.38302481174469, "logits/rejected": -0.8621290326118469, "logps/chosen": -218.349365234375, "logps/rejected": -649.1397094726562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.747589111328125, "rewards/margins": 54.13648223876953, "rewards/rejected": -51.388893127441406, "step": 7297 }, { "epoch": 1.62, "learning_rate": 9.665628883205765e-06, "logits/chosen": -1.9828263521194458, "logits/rejected": -1.9591333866119385, "logps/chosen": -141.25265502929688, "logps/rejected": -132.00575256347656, "loss": 1.0869, "rewards/accuracies": 0.0, "rewards/chosen": -6.536698341369629, "rewards/margins": -2.0530967712402344, "rewards/rejected": -4.4836015701293945, "step": 7298 }, { "epoch": 1.62, "learning_rate": 9.66498415169284e-06, "logits/chosen": -1.6643155813217163, "logits/rejected": -1.6971027851104736, "logps/chosen": -139.78553771972656, "logps/rejected": -161.6653594970703, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -4.838554382324219, "rewards/margins": 3.636540412902832, "rewards/rejected": -8.47509479522705, "step": 7299 }, { "epoch": 1.62, "learning_rate": 9.664338820740664e-06, "logits/chosen": -1.5093770027160645, "logits/rejected": -1.4036482572555542, "logps/chosen": -120.53665161132812, "logps/rejected": -211.12081909179688, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.5260368585586548, "rewards/margins": 4.864258766174316, "rewards/rejected": -6.390295505523682, "step": 7300 }, { "epoch": 1.62, "learning_rate": 9.663692890432164e-06, "logits/chosen": -1.7693023681640625, "logits/rejected": -1.7281622886657715, "logps/chosen": -194.08155822753906, "logps/rejected": -215.11083984375, "loss": 0.5382, "rewards/accuracies": 0.0, "rewards/chosen": -7.044841766357422, "rewards/margins": -0.6550588607788086, "rewards/rejected": -6.389782905578613, "step": 7301 }, { "epoch": 1.62, "learning_rate": 9.663046360850338e-06, "logits/chosen": -1.80073881149292, "logits/rejected": -1.4886741638183594, "logps/chosen": -118.3360824584961, "logps/rejected": -402.26458740234375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.4036705493927002, "rewards/margins": 6.388199806213379, "rewards/rejected": -7.7918701171875, "step": 7302 }, { "epoch": 1.62, "learning_rate": 9.662399232078264e-06, "logits/chosen": -1.313010811805725, "logits/rejected": -1.2991644144058228, "logps/chosen": -162.86279296875, "logps/rejected": -180.95436096191406, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 1.3296951055526733, "rewards/margins": 7.873456001281738, "rewards/rejected": -6.543760776519775, "step": 7303 }, { "epoch": 1.62, "learning_rate": 9.661751504199097e-06, "logits/chosen": -1.5126850605010986, "logits/rejected": -1.5766613483428955, "logps/chosen": -173.13479614257812, "logps/rejected": -202.79774475097656, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.7643158435821533, "rewards/margins": 5.7326765060424805, "rewards/rejected": -7.496992588043213, "step": 7304 }, { "epoch": 1.62, "learning_rate": 9.661103177296069e-06, "logits/chosen": -1.288779377937317, "logits/rejected": -1.0917179584503174, "logps/chosen": -75.78972625732422, "logps/rejected": -318.2720031738281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.9293079376220703, "rewards/margins": 7.962522506713867, "rewards/rejected": -9.891830444335938, "step": 7305 }, { "epoch": 1.62, "learning_rate": 9.660454251452487e-06, "logits/chosen": -1.7815351486206055, "logits/rejected": -1.5909020900726318, "logps/chosen": -126.58223724365234, "logps/rejected": -416.18475341796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.915937900543213, "rewards/margins": 22.782203674316406, "rewards/rejected": -26.69814109802246, "step": 7306 }, { "epoch": 1.62, "learning_rate": 9.659804726751737e-06, "logits/chosen": -1.9348182678222656, "logits/rejected": -1.8405613899230957, "logps/chosen": -62.75871276855469, "logps/rejected": -276.81707763671875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 2.7521049976348877, "rewards/margins": 7.373274803161621, "rewards/rejected": -4.6211700439453125, "step": 7307 }, { "epoch": 1.62, "learning_rate": 9.659154603277283e-06, "logits/chosen": -1.7818272113800049, "logits/rejected": -2.388207197189331, "logps/chosen": -114.5955581665039, "logps/rejected": -322.8564453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.08898773044347763, "rewards/margins": 21.2412109375, "rewards/rejected": -21.330198287963867, "step": 7308 }, { "epoch": 1.62, "learning_rate": 9.658503881112661e-06, "logits/chosen": -1.5010061264038086, "logits/rejected": -1.5010061264038086, "logps/chosen": -185.19915771484375, "logps/rejected": -185.19915771484375, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -6.358187198638916, "rewards/margins": 0.0, "rewards/rejected": -6.358187198638916, "step": 7309 }, { "epoch": 1.62, "learning_rate": 9.65785256034149e-06, "logits/chosen": -1.6128946542739868, "logits/rejected": -1.6128946542739868, "logps/chosen": -109.98480224609375, "logps/rejected": -109.98480224609375, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -5.023429870605469, "rewards/margins": 0.0, "rewards/rejected": -5.023429870605469, "step": 7310 }, { "epoch": 1.62, "learning_rate": 9.657200641047462e-06, "logits/chosen": -2.1212170124053955, "logits/rejected": -2.0258827209472656, "logps/chosen": -119.78837585449219, "logps/rejected": -194.747802734375, "loss": 0.1932, "rewards/accuracies": 1.0, "rewards/chosen": -0.7974792718887329, "rewards/margins": 1.0448760986328125, "rewards/rejected": -1.8423553705215454, "step": 7311 }, { "epoch": 1.62, "learning_rate": 9.656548123314346e-06, "logits/chosen": -1.9079630374908447, "logits/rejected": -1.895268440246582, "logps/chosen": -159.22250366210938, "logps/rejected": -187.98297119140625, "loss": 0.0491, "rewards/accuracies": 1.0, "rewards/chosen": -0.8968856930732727, "rewards/margins": 5.338996887207031, "rewards/rejected": -6.235882759094238, "step": 7312 }, { "epoch": 1.62, "learning_rate": 9.655895007225992e-06, "logits/chosen": -1.582384705543518, "logits/rejected": -1.5462461709976196, "logps/chosen": -218.23959350585938, "logps/rejected": -179.66793823242188, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.8305450677871704, "rewards/margins": 6.703624248504639, "rewards/rejected": -7.5341691970825195, "step": 7313 }, { "epoch": 1.62, "learning_rate": 9.655241292866321e-06, "logits/chosen": -1.4794195890426636, "logits/rejected": -1.4219307899475098, "logps/chosen": -123.78413391113281, "logps/rejected": -168.04530334472656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.27044373750686646, "rewards/margins": 7.502852916717529, "rewards/rejected": -7.2324090003967285, "step": 7314 }, { "epoch": 1.62, "learning_rate": 9.654586980319335e-06, "logits/chosen": -1.7706282138824463, "logits/rejected": -1.7746769189834595, "logps/chosen": -94.31810760498047, "logps/rejected": -228.72840881347656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6678978204727173, "rewards/margins": 10.173516273498535, "rewards/rejected": -10.841414451599121, "step": 7315 }, { "epoch": 1.62, "learning_rate": 9.653932069669112e-06, "logits/chosen": -1.7396072149276733, "logits/rejected": -1.76711905002594, "logps/chosen": -177.28521728515625, "logps/rejected": -104.45585632324219, "loss": 1.2716, "rewards/accuracies": 0.0, "rewards/chosen": -6.092401027679443, "rewards/margins": -2.461355447769165, "rewards/rejected": -3.6310455799102783, "step": 7316 }, { "epoch": 1.62, "learning_rate": 9.653276560999805e-06, "logits/chosen": -1.403617024421692, "logits/rejected": -1.384384274482727, "logps/chosen": -91.39115905761719, "logps/rejected": -168.4334716796875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.2450302839279175, "rewards/margins": 6.5242695808410645, "rewards/rejected": -7.7692999839782715, "step": 7317 }, { "epoch": 1.62, "learning_rate": 9.652620454395647e-06, "logits/chosen": -1.322403073310852, "logits/rejected": -1.0956560373306274, "logps/chosen": -230.5696563720703, "logps/rejected": -401.3518371582031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.937910556793213, "rewards/margins": 13.507926940917969, "rewards/rejected": -10.570016860961914, "step": 7318 }, { "epoch": 1.62, "learning_rate": 9.651963749940944e-06, "logits/chosen": -1.561766266822815, "logits/rejected": -1.561766266822815, "logps/chosen": -259.7043151855469, "logps/rejected": -259.7043151855469, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -13.943575859069824, "rewards/margins": 0.0, "rewards/rejected": -13.943575859069824, "step": 7319 }, { "epoch": 1.62, "learning_rate": 9.651306447720083e-06, "logits/chosen": -1.8162165880203247, "logits/rejected": -1.8180190324783325, "logps/chosen": -84.677734375, "logps/rejected": -139.08441162109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.2236404418945312, "rewards/margins": 9.27740478515625, "rewards/rejected": -10.501045227050781, "step": 7320 }, { "epoch": 1.62, "learning_rate": 9.650648547817524e-06, "logits/chosen": -1.4717767238616943, "logits/rejected": -1.47426438331604, "logps/chosen": -148.9015655517578, "logps/rejected": -163.54104614257812, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -5.661670684814453, "rewards/margins": 3.5133676528930664, "rewards/rejected": -9.17503833770752, "step": 7321 }, { "epoch": 1.62, "learning_rate": 9.649990050317806e-06, "logits/chosen": -1.499448537826538, "logits/rejected": -1.5890356302261353, "logps/chosen": -193.08901977539062, "logps/rejected": -251.853515625, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9457977414131165, "rewards/margins": 13.363901138305664, "rewards/rejected": -14.309699058532715, "step": 7322 }, { "epoch": 1.62, "learning_rate": 9.649330955305547e-06, "logits/chosen": -1.556917428970337, "logits/rejected": -1.531486988067627, "logps/chosen": -111.15390014648438, "logps/rejected": -222.37283325195312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.807855248451233, "rewards/margins": 10.458731651306152, "rewards/rejected": -12.266587257385254, "step": 7323 }, { "epoch": 1.62, "learning_rate": 9.648671262865434e-06, "logits/chosen": -1.9745526313781738, "logits/rejected": -1.9063159227371216, "logps/chosen": -105.32821655273438, "logps/rejected": -253.19625854492188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5088233947753906, "rewards/margins": 9.378920555114746, "rewards/rejected": -10.887743949890137, "step": 7324 }, { "epoch": 1.62, "learning_rate": 9.648010973082243e-06, "logits/chosen": -1.6116193532943726, "logits/rejected": -1.6420964002609253, "logps/chosen": -144.06044006347656, "logps/rejected": -96.79940795898438, "loss": 0.5415, "rewards/accuracies": 1.0, "rewards/chosen": -1.7323166131973267, "rewards/margins": 0.7407304048538208, "rewards/rejected": -2.4730470180511475, "step": 7325 }, { "epoch": 1.62, "learning_rate": 9.647350086040812e-06, "logits/chosen": -1.6470746994018555, "logits/rejected": -1.6296179294586182, "logps/chosen": -151.41159057617188, "logps/rejected": -134.41494750976562, "loss": 0.0664, "rewards/accuracies": 1.0, "rewards/chosen": -4.500268459320068, "rewards/margins": 1.955674171447754, "rewards/rejected": -6.455942630767822, "step": 7326 }, { "epoch": 1.62, "learning_rate": 9.646688601826068e-06, "logits/chosen": -1.4032217264175415, "logits/rejected": -0.9655948877334595, "logps/chosen": -104.25940704345703, "logps/rejected": -1032.001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6647591590881348, "rewards/margins": 90.22982025146484, "rewards/rejected": -92.89457702636719, "step": 7327 }, { "epoch": 1.62, "learning_rate": 9.646026520523008e-06, "logits/chosen": -1.3726387023925781, "logits/rejected": -1.362342357635498, "logps/chosen": -119.28657531738281, "logps/rejected": -84.37718963623047, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.02915344201028347, "rewards/margins": 6.346899509429932, "rewards/rejected": -6.317746162414551, "step": 7328 }, { "epoch": 1.62, "learning_rate": 9.64536384221671e-06, "logits/chosen": -1.5893898010253906, "logits/rejected": -1.4733272790908813, "logps/chosen": -94.80230712890625, "logps/rejected": -286.6973876953125, "loss": 0.1833, "rewards/accuracies": 1.0, "rewards/chosen": 0.7214736938476562, "rewards/margins": 12.880187034606934, "rewards/rejected": -12.158713340759277, "step": 7329 }, { "epoch": 1.62, "learning_rate": 9.644700566992324e-06, "logits/chosen": -1.8765043020248413, "logits/rejected": -1.6409494876861572, "logps/chosen": -129.5570068359375, "logps/rejected": -359.88018798828125, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": 5.364514350891113, "rewards/margins": 18.277076721191406, "rewards/rejected": -12.912562370300293, "step": 7330 }, { "epoch": 1.62, "learning_rate": 9.644036694935083e-06, "logits/chosen": -1.5007424354553223, "logits/rejected": -1.4734892845153809, "logps/chosen": -135.91738891601562, "logps/rejected": -340.1467590332031, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.4203430116176605, "rewards/margins": 18.175865173339844, "rewards/rejected": -17.755521774291992, "step": 7331 }, { "epoch": 1.62, "learning_rate": 9.64337222613029e-06, "logits/chosen": -1.6123850345611572, "logits/rejected": -1.5674210786819458, "logps/chosen": -94.97622680664062, "logps/rejected": -122.71055603027344, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.259164422750473, "rewards/margins": 5.296804904937744, "rewards/rejected": -5.037640571594238, "step": 7332 }, { "epoch": 1.62, "learning_rate": 9.642707160663326e-06, "logits/chosen": -1.4298067092895508, "logits/rejected": -1.3987236022949219, "logps/chosen": -182.94976806640625, "logps/rejected": -301.3013000488281, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": 0.6312164664268494, "rewards/margins": 15.745388984680176, "rewards/rejected": -15.11417293548584, "step": 7333 }, { "epoch": 1.62, "learning_rate": 9.642041498619655e-06, "logits/chosen": -1.580467700958252, "logits/rejected": -1.5521421432495117, "logps/chosen": -144.07260131835938, "logps/rejected": -240.4197235107422, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.272110939025879, "rewards/margins": 6.68992805480957, "rewards/rejected": -12.96203899383545, "step": 7334 }, { "epoch": 1.62, "learning_rate": 9.64137524008481e-06, "logits/chosen": -1.3958181142807007, "logits/rejected": -1.409321665763855, "logps/chosen": -216.32640075683594, "logps/rejected": -162.8498077392578, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -2.6358354091644287, "rewards/margins": 4.454329490661621, "rewards/rejected": -7.090165138244629, "step": 7335 }, { "epoch": 1.62, "learning_rate": 9.640708385144403e-06, "logits/chosen": -1.7130060195922852, "logits/rejected": -1.657369613647461, "logps/chosen": -74.25408935546875, "logps/rejected": -198.9627227783203, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.4449424743652344, "rewards/margins": 5.202311038970947, "rewards/rejected": -7.647253513336182, "step": 7336 }, { "epoch": 1.62, "learning_rate": 9.640040933884126e-06, "logits/chosen": -1.6627295017242432, "logits/rejected": -1.6627295017242432, "logps/chosen": -78.29763793945312, "logps/rejected": -78.29763793945312, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -1.1895760297775269, "rewards/margins": 0.0, "rewards/rejected": -1.1895760297775269, "step": 7337 }, { "epoch": 1.62, "learning_rate": 9.639372886389743e-06, "logits/chosen": -1.8173208236694336, "logits/rejected": -1.5181916952133179, "logps/chosen": -117.9530029296875, "logps/rejected": -312.2051086425781, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.824589490890503, "rewards/margins": 5.078442573547363, "rewards/rejected": -7.903031826019287, "step": 7338 }, { "epoch": 1.62, "learning_rate": 9.638704242747097e-06, "logits/chosen": -1.4954400062561035, "logits/rejected": -1.4954400062561035, "logps/chosen": -286.8571472167969, "logps/rejected": -286.8571472167969, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -14.058270454406738, "rewards/margins": 0.0, "rewards/rejected": -14.058270454406738, "step": 7339 }, { "epoch": 1.62, "learning_rate": 9.638035003042108e-06, "logits/chosen": -1.991243600845337, "logits/rejected": -1.9624872207641602, "logps/chosen": -178.50827026367188, "logps/rejected": -212.35797119140625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.7938781976699829, "rewards/margins": 4.782034397125244, "rewards/rejected": -5.5759124755859375, "step": 7340 }, { "epoch": 1.62, "learning_rate": 9.637365167360769e-06, "logits/chosen": -1.4696961641311646, "logits/rejected": -1.4300544261932373, "logps/chosen": -173.22802734375, "logps/rejected": -217.29046630859375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 2.923065185546875, "rewards/margins": 14.159080505371094, "rewards/rejected": -11.236015319824219, "step": 7341 }, { "epoch": 1.63, "learning_rate": 9.636694735789153e-06, "logits/chosen": -1.5503216981887817, "logits/rejected": -1.5503216981887817, "logps/chosen": -164.21994018554688, "logps/rejected": -164.21994018554688, "loss": 0.3488, "rewards/accuracies": 0.0, "rewards/chosen": -1.8191956281661987, "rewards/margins": 0.0, "rewards/rejected": -1.8191956281661987, "step": 7342 }, { "epoch": 1.63, "learning_rate": 9.636023708413412e-06, "logits/chosen": -1.46968412399292, "logits/rejected": -1.5013262033462524, "logps/chosen": -192.96571350097656, "logps/rejected": -231.9774169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9463912844657898, "rewards/margins": 9.853594779968262, "rewards/rejected": -10.799985885620117, "step": 7343 }, { "epoch": 1.63, "learning_rate": 9.635352085319768e-06, "logits/chosen": -1.3416264057159424, "logits/rejected": -1.5241252183914185, "logps/chosen": -350.30291748046875, "logps/rejected": -223.13189697265625, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -9.252695083618164, "rewards/margins": 3.13238525390625, "rewards/rejected": -12.385080337524414, "step": 7344 }, { "epoch": 1.63, "learning_rate": 9.634679866594525e-06, "logits/chosen": -1.949282169342041, "logits/rejected": -1.9517515897750854, "logps/chosen": -132.0863800048828, "logps/rejected": -87.45320129394531, "loss": 1.4559, "rewards/accuracies": 0.0, "rewards/chosen": -9.713248252868652, "rewards/margins": -2.8557934761047363, "rewards/rejected": -6.857454776763916, "step": 7345 }, { "epoch": 1.63, "learning_rate": 9.63400705232406e-06, "logits/chosen": -1.5608270168304443, "logits/rejected": -1.5567526817321777, "logps/chosen": -181.1753387451172, "logps/rejected": -302.8162841796875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7667007446289062, "rewards/margins": 18.040529251098633, "rewards/rejected": -19.80722999572754, "step": 7346 }, { "epoch": 1.63, "learning_rate": 9.633333642594828e-06, "logits/chosen": -1.7115358114242554, "logits/rejected": -1.7115358114242554, "logps/chosen": -258.70501708984375, "logps/rejected": -258.70501708984375, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -9.88793659210205, "rewards/margins": 0.0, "rewards/rejected": -9.88793659210205, "step": 7347 }, { "epoch": 1.63, "learning_rate": 9.632659637493362e-06, "logits/chosen": -1.7754533290863037, "logits/rejected": -1.7139352560043335, "logps/chosen": -170.8411865234375, "logps/rejected": -130.3162384033203, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.05409088358283043, "rewards/margins": 8.78352165222168, "rewards/rejected": -8.83761215209961, "step": 7348 }, { "epoch": 1.63, "learning_rate": 9.631985037106268e-06, "logits/chosen": -1.3863046169281006, "logits/rejected": -1.4317933320999146, "logps/chosen": -225.48236083984375, "logps/rejected": -190.01473999023438, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.26453858613967896, "rewards/margins": 5.841874599456787, "rewards/rejected": -6.1064133644104, "step": 7349 }, { "epoch": 1.63, "learning_rate": 9.631309841520233e-06, "logits/chosen": -1.7486032247543335, "logits/rejected": -1.6282910108566284, "logps/chosen": -198.91122436523438, "logps/rejected": -295.2237243652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 6.3807525634765625, "rewards/margins": 14.258308410644531, "rewards/rejected": -7.877555847167969, "step": 7350 }, { "epoch": 1.63, "learning_rate": 9.630634050822016e-06, "logits/chosen": -1.9197081327438354, "logits/rejected": -1.906274437904358, "logps/chosen": -83.50205993652344, "logps/rejected": -169.90695190429688, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -1.5631935596466064, "rewards/margins": 3.5636727809906006, "rewards/rejected": -5.126866340637207, "step": 7351 }, { "epoch": 1.63, "learning_rate": 9.629957665098458e-06, "logits/chosen": -1.5341705083847046, "logits/rejected": -1.4873381853103638, "logps/chosen": -126.83154296875, "logps/rejected": -136.58592224121094, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": 1.2204986810684204, "rewards/margins": 3.7555899620056152, "rewards/rejected": -2.5350914001464844, "step": 7352 }, { "epoch": 1.63, "learning_rate": 9.629280684436467e-06, "logits/chosen": -1.704460620880127, "logits/rejected": -1.6906018257141113, "logps/chosen": -199.29226684570312, "logps/rejected": -211.1112823486328, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": 1.3265961408615112, "rewards/margins": 3.344853401184082, "rewards/rejected": -2.0182571411132812, "step": 7353 }, { "epoch": 1.63, "learning_rate": 9.628603108923037e-06, "logits/chosen": -1.6911906003952026, "logits/rejected": -1.720664143562317, "logps/chosen": -112.98165893554688, "logps/rejected": -219.49136352539062, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.169718936085701, "rewards/margins": 9.966596603393555, "rewards/rejected": -9.79687786102295, "step": 7354 }, { "epoch": 1.63, "learning_rate": 9.627924938645234e-06, "logits/chosen": -1.5156595706939697, "logits/rejected": -1.538546085357666, "logps/chosen": -131.80148315429688, "logps/rejected": -120.08312225341797, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -4.314647197723389, "rewards/margins": 3.41684627532959, "rewards/rejected": -7.7314934730529785, "step": 7355 }, { "epoch": 1.63, "learning_rate": 9.627246173690202e-06, "logits/chosen": -1.6633609533309937, "logits/rejected": -1.5414749383926392, "logps/chosen": -135.21182250976562, "logps/rejected": -210.84600830078125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 4.955163478851318, "rewards/margins": 5.146829128265381, "rewards/rejected": -0.1916656494140625, "step": 7356 }, { "epoch": 1.63, "learning_rate": 9.62656681414516e-06, "logits/chosen": -1.505348801612854, "logits/rejected": -1.4962913990020752, "logps/chosen": -84.09016418457031, "logps/rejected": -110.92462921142578, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -0.13521118462085724, "rewards/margins": 3.7266128063201904, "rewards/rejected": -3.8618240356445312, "step": 7357 }, { "epoch": 1.63, "learning_rate": 9.625886860097406e-06, "logits/chosen": -1.4485646486282349, "logits/rejected": -1.5882879495620728, "logps/chosen": -222.46495056152344, "logps/rejected": -193.0913848876953, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.7320022583007812, "rewards/margins": 13.36491584777832, "rewards/rejected": -11.632913589477539, "step": 7358 }, { "epoch": 1.63, "learning_rate": 9.62520631163431e-06, "logits/chosen": -1.6278607845306396, "logits/rejected": -1.6736501455307007, "logps/chosen": -185.98468017578125, "logps/rejected": -145.51121520996094, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031890869140625, "rewards/margins": 4.463120460510254, "rewards/rejected": -4.466309547424316, "step": 7359 }, { "epoch": 1.63, "learning_rate": 9.62452516884332e-06, "logits/chosen": -1.8979169130325317, "logits/rejected": -1.8795307874679565, "logps/chosen": -130.72918701171875, "logps/rejected": -191.9159393310547, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.4061431884765625, "rewards/margins": 5.342747688293457, "rewards/rejected": -6.7488908767700195, "step": 7360 }, { "epoch": 1.63, "learning_rate": 9.623843431811964e-06, "logits/chosen": -1.361977458000183, "logits/rejected": -1.32529878616333, "logps/chosen": -159.15774536132812, "logps/rejected": -276.2721862792969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.7647552490234375, "rewards/margins": 15.80155086517334, "rewards/rejected": -15.036795616149902, "step": 7361 }, { "epoch": 1.63, "learning_rate": 9.623161100627842e-06, "logits/chosen": -1.533022165298462, "logits/rejected": -1.5648059844970703, "logps/chosen": -132.78343200683594, "logps/rejected": -116.61502838134766, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.9499679803848267, "rewards/margins": 4.29299783706665, "rewards/rejected": -5.2429656982421875, "step": 7362 }, { "epoch": 1.63, "learning_rate": 9.622478175378634e-06, "logits/chosen": -1.5888220071792603, "logits/rejected": -1.665801763534546, "logps/chosen": -149.0104217529297, "logps/rejected": -150.77828979492188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 3.462999105453491, "rewards/margins": 12.664981842041016, "rewards/rejected": -9.201982498168945, "step": 7363 }, { "epoch": 1.63, "learning_rate": 9.62179465615209e-06, "logits/chosen": -1.4468772411346436, "logits/rejected": -1.4625637531280518, "logps/chosen": -202.98204040527344, "logps/rejected": -225.03480529785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5706161260604858, "rewards/margins": 10.690167427062988, "rewards/rejected": -12.260783195495605, "step": 7364 }, { "epoch": 1.63, "learning_rate": 9.621110543036047e-06, "logits/chosen": -1.7112706899642944, "logits/rejected": -1.5656509399414062, "logps/chosen": -189.00503540039062, "logps/rejected": -369.30792236328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.746636986732483, "rewards/margins": 7.6807074546813965, "rewards/rejected": -9.42734432220459, "step": 7365 }, { "epoch": 1.63, "learning_rate": 9.620425836118406e-06, "logits/chosen": -1.7919725179672241, "logits/rejected": -1.740177869796753, "logps/chosen": -108.3818130493164, "logps/rejected": -190.68734741210938, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6145774722099304, "rewards/margins": 6.157725811004639, "rewards/rejected": -6.772303104400635, "step": 7366 }, { "epoch": 1.63, "learning_rate": 9.619740535487151e-06, "logits/chosen": -1.422167420387268, "logits/rejected": -1.422167420387268, "logps/chosen": -209.91510009765625, "logps/rejected": -209.91510009765625, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -8.260859489440918, "rewards/margins": 0.0, "rewards/rejected": -8.260859489440918, "step": 7367 }, { "epoch": 1.63, "learning_rate": 9.619054641230343e-06, "logits/chosen": -1.547537088394165, "logits/rejected": -1.4028502702713013, "logps/chosen": -181.49063110351562, "logps/rejected": -412.18212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.965869188308716, "rewards/margins": 19.400239944458008, "rewards/rejected": -22.36610984802246, "step": 7368 }, { "epoch": 1.63, "learning_rate": 9.618368153436119e-06, "logits/chosen": -2.0201950073242188, "logits/rejected": -2.2036828994750977, "logps/chosen": -249.62567138671875, "logps/rejected": -384.4639892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.40781569480896, "rewards/margins": 27.191104888916016, "rewards/rejected": -24.783288955688477, "step": 7369 }, { "epoch": 1.63, "learning_rate": 9.617681072192688e-06, "logits/chosen": -1.6052247285842896, "logits/rejected": -1.6257622241973877, "logps/chosen": -129.58737182617188, "logps/rejected": -159.870849609375, "loss": 0.3469, "rewards/accuracies": 1.0, "rewards/chosen": -1.1360915899276733, "rewards/margins": 7.335909366607666, "rewards/rejected": -8.472001075744629, "step": 7370 }, { "epoch": 1.63, "learning_rate": 9.616993397588342e-06, "logits/chosen": -1.4177231788635254, "logits/rejected": -1.4449849128723145, "logps/chosen": -238.50637817382812, "logps/rejected": -241.90798950195312, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.7656219005584717, "rewards/margins": 6.011869430541992, "rewards/rejected": -8.777491569519043, "step": 7371 }, { "epoch": 1.63, "learning_rate": 9.61630512971144e-06, "logits/chosen": -1.7872068881988525, "logits/rejected": -1.8457268476486206, "logps/chosen": -198.2820281982422, "logps/rejected": -175.64308166503906, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": 0.9139358401298523, "rewards/margins": 12.798626899719238, "rewards/rejected": -11.88469123840332, "step": 7372 }, { "epoch": 1.63, "learning_rate": 9.61561626865043e-06, "logits/chosen": -1.5335743427276611, "logits/rejected": -1.492173433303833, "logps/chosen": -76.42845916748047, "logps/rejected": -123.09884643554688, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -0.9482048153877258, "rewards/margins": 5.067562580108643, "rewards/rejected": -6.015767574310303, "step": 7373 }, { "epoch": 1.63, "learning_rate": 9.614926814493822e-06, "logits/chosen": -1.1824721097946167, "logits/rejected": -1.117470622062683, "logps/chosen": -148.74417114257812, "logps/rejected": -242.15769958496094, "loss": 0.1262, "rewards/accuracies": 1.0, "rewards/chosen": -8.023019790649414, "rewards/margins": 1.2492504119873047, "rewards/rejected": -9.272270202636719, "step": 7374 }, { "epoch": 1.63, "learning_rate": 9.614236767330214e-06, "logits/chosen": -1.5607349872589111, "logits/rejected": -1.4870599508285522, "logps/chosen": -214.99781799316406, "logps/rejected": -311.0430603027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1981492042541504, "rewards/margins": 10.998384475708008, "rewards/rejected": -13.196533203125, "step": 7375 }, { "epoch": 1.63, "learning_rate": 9.613546127248272e-06, "logits/chosen": -1.7000888586044312, "logits/rejected": -1.7395756244659424, "logps/chosen": -134.41497802734375, "logps/rejected": -105.7989273071289, "loss": 0.1261, "rewards/accuracies": 1.0, "rewards/chosen": -6.582398891448975, "rewards/margins": 1.2563767433166504, "rewards/rejected": -7.838775634765625, "step": 7376 }, { "epoch": 1.63, "learning_rate": 9.612854894336746e-06, "logits/chosen": -1.5274053812026978, "logits/rejected": -1.5018399953842163, "logps/chosen": -123.64842224121094, "logps/rejected": -199.0508270263672, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.7595360279083252, "rewards/margins": 13.32326889038086, "rewards/rejected": -15.082804679870605, "step": 7377 }, { "epoch": 1.63, "learning_rate": 9.612163068684453e-06, "logits/chosen": -1.7327324151992798, "logits/rejected": -1.657568335533142, "logps/chosen": -105.49993896484375, "logps/rejected": -202.6048583984375, "loss": 0.13, "rewards/accuracies": 1.0, "rewards/chosen": -1.046453833580017, "rewards/margins": 1.2194350957870483, "rewards/rejected": -2.2658889293670654, "step": 7378 }, { "epoch": 1.63, "learning_rate": 9.611470650380293e-06, "logits/chosen": -1.6044652462005615, "logits/rejected": -1.6619914770126343, "logps/chosen": -326.7288513183594, "logps/rejected": -292.43939208984375, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -7.271910190582275, "rewards/margins": 4.049510478973389, "rewards/rejected": -11.321420669555664, "step": 7379 }, { "epoch": 1.63, "learning_rate": 9.61077763951324e-06, "logits/chosen": -1.6987144947052002, "logits/rejected": -1.6543821096420288, "logps/chosen": -117.32395935058594, "logps/rejected": -256.7735900878906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.48779296875, "rewards/margins": 7.793710708618164, "rewards/rejected": -12.281503677368164, "step": 7380 }, { "epoch": 1.63, "learning_rate": 9.610084036172346e-06, "logits/chosen": -1.631164312362671, "logits/rejected": -1.5525579452514648, "logps/chosen": -151.68356323242188, "logps/rejected": -304.90704345703125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.9404388666152954, "rewards/margins": 18.350868225097656, "rewards/rejected": -17.410429000854492, "step": 7381 }, { "epoch": 1.63, "learning_rate": 9.609389840446734e-06, "logits/chosen": -1.6508352756500244, "logits/rejected": -1.6339647769927979, "logps/chosen": -174.61033630371094, "logps/rejected": -267.7779235839844, "loss": 0.1978, "rewards/accuracies": 1.0, "rewards/chosen": -1.08824622631073, "rewards/margins": 1.9613739252090454, "rewards/rejected": -3.0496201515197754, "step": 7382 }, { "epoch": 1.63, "learning_rate": 9.60869505242561e-06, "logits/chosen": -1.3402067422866821, "logits/rejected": -1.1011499166488647, "logps/chosen": -131.77716064453125, "logps/rejected": -340.436279296875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.3724923133850098, "rewards/margins": 8.719755172729492, "rewards/rejected": -12.092247009277344, "step": 7383 }, { "epoch": 1.63, "learning_rate": 9.60799967219825e-06, "logits/chosen": -1.6009036302566528, "logits/rejected": -1.576826810836792, "logps/chosen": -122.05535888671875, "logps/rejected": -132.94288635253906, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -0.09561081230640411, "rewards/margins": 4.483835220336914, "rewards/rejected": -4.579445838928223, "step": 7384 }, { "epoch": 1.63, "learning_rate": 9.607303699854009e-06, "logits/chosen": -1.8303873538970947, "logits/rejected": -1.779326319694519, "logps/chosen": -84.84551239013672, "logps/rejected": -198.7784881591797, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 1.0632346868515015, "rewards/margins": 5.808333873748779, "rewards/rejected": -4.745099067687988, "step": 7385 }, { "epoch": 1.63, "learning_rate": 9.606607135482318e-06, "logits/chosen": -1.687989354133606, "logits/rejected": -1.6143920421600342, "logps/chosen": -172.60984802246094, "logps/rejected": -245.0330352783203, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.3447860479354858, "rewards/margins": 15.557530403137207, "rewards/rejected": -14.21274471282959, "step": 7386 }, { "epoch": 1.64, "learning_rate": 9.605909979172683e-06, "logits/chosen": -1.9369958639144897, "logits/rejected": -1.8568761348724365, "logps/chosen": -92.05408477783203, "logps/rejected": -165.2876434326172, "loss": 0.2696, "rewards/accuracies": 1.0, "rewards/chosen": -3.152853012084961, "rewards/margins": 0.35381197929382324, "rewards/rejected": -3.506664991378784, "step": 7387 }, { "epoch": 1.64, "learning_rate": 9.60521223101469e-06, "logits/chosen": -1.19404935836792, "logits/rejected": -1.1655606031417847, "logps/chosen": -127.86363220214844, "logps/rejected": -221.31423950195312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.341989278793335, "rewards/margins": 6.375759124755859, "rewards/rejected": -9.717748641967773, "step": 7388 }, { "epoch": 1.64, "learning_rate": 9.604513891097995e-06, "logits/chosen": -1.7575637102127075, "logits/rejected": -1.72371506690979, "logps/chosen": -81.37825012207031, "logps/rejected": -169.93209838867188, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.935130000114441, "rewards/margins": 5.718450546264648, "rewards/rejected": -7.653580665588379, "step": 7389 }, { "epoch": 1.64, "learning_rate": 9.603814959512334e-06, "logits/chosen": -1.5297093391418457, "logits/rejected": -1.619990587234497, "logps/chosen": -94.41168975830078, "logps/rejected": -66.62120056152344, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": -0.3132209777832031, "rewards/margins": 2.7064411640167236, "rewards/rejected": -3.0196621417999268, "step": 7390 }, { "epoch": 1.64, "learning_rate": 9.603115436347519e-06, "logits/chosen": -1.6929807662963867, "logits/rejected": -1.6495076417922974, "logps/chosen": -115.93594360351562, "logps/rejected": -241.49099731445312, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -2.241790771484375, "rewards/margins": 4.937086582183838, "rewards/rejected": -7.178877353668213, "step": 7391 }, { "epoch": 1.64, "learning_rate": 9.602415321693434e-06, "logits/chosen": -1.7332276105880737, "logits/rejected": -1.7030315399169922, "logps/chosen": -98.80272674560547, "logps/rejected": -176.29791259765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0013748407363892, "rewards/margins": 8.312067985534668, "rewards/rejected": -9.313443183898926, "step": 7392 }, { "epoch": 1.64, "learning_rate": 9.601714615640046e-06, "logits/chosen": -1.2907963991165161, "logits/rejected": -1.3032889366149902, "logps/chosen": -80.37222290039062, "logps/rejected": -184.45425415039062, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.750342607498169, "rewards/margins": 4.933745384216309, "rewards/rejected": -6.684088230133057, "step": 7393 }, { "epoch": 1.64, "learning_rate": 9.601013318277391e-06, "logits/chosen": -1.9393720626831055, "logits/rejected": -1.967915415763855, "logps/chosen": -124.40309143066406, "logps/rejected": -144.88790893554688, "loss": 0.0522, "rewards/accuracies": 1.0, "rewards/chosen": -3.026599168777466, "rewards/margins": 2.217743158340454, "rewards/rejected": -5.24434232711792, "step": 7394 }, { "epoch": 1.64, "learning_rate": 9.600311429695586e-06, "logits/chosen": -1.615206003189087, "logits/rejected": -1.6100891828536987, "logps/chosen": -130.00421142578125, "logps/rejected": -183.62319946289062, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.1894928216934204, "rewards/margins": 6.276408672332764, "rewards/rejected": -7.4659013748168945, "step": 7395 }, { "epoch": 1.64, "learning_rate": 9.59960894998482e-06, "logits/chosen": -1.6129112243652344, "logits/rejected": -1.6879732608795166, "logps/chosen": -194.5781707763672, "logps/rejected": -225.1976318359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.1264771223068237, "rewards/margins": 8.251673698425293, "rewards/rejected": -7.125196933746338, "step": 7396 }, { "epoch": 1.64, "learning_rate": 9.598905879235362e-06, "logits/chosen": -1.3713011741638184, "logits/rejected": -1.4046868085861206, "logps/chosen": -140.53219604492188, "logps/rejected": -147.1024169921875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 1.2911804914474487, "rewards/margins": 5.899623394012451, "rewards/rejected": -4.608442783355713, "step": 7397 }, { "epoch": 1.64, "learning_rate": 9.598202217537554e-06, "logits/chosen": -1.5294650793075562, "logits/rejected": -1.5434101819992065, "logps/chosen": -200.93283081054688, "logps/rejected": -195.64859008789062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.9834945797920227, "rewards/margins": 9.157244682312012, "rewards/rejected": -10.140739440917969, "step": 7398 }, { "epoch": 1.64, "learning_rate": 9.597497964981815e-06, "logits/chosen": -1.6014939546585083, "logits/rejected": -1.5073421001434326, "logps/chosen": -191.38418579101562, "logps/rejected": -282.462646484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 2.0315582752227783, "rewards/margins": 15.147577285766602, "rewards/rejected": -13.116019248962402, "step": 7399 }, { "epoch": 1.64, "learning_rate": 9.59679312165864e-06, "logits/chosen": -1.6367838382720947, "logits/rejected": -1.7156853675842285, "logps/chosen": -164.34230041503906, "logps/rejected": -156.08526611328125, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": -4.665535926818848, "rewards/margins": 2.490570068359375, "rewards/rejected": -7.156105995178223, "step": 7400 }, { "epoch": 1.64, "learning_rate": 9.596087687658598e-06, "logits/chosen": -1.7715572118759155, "logits/rejected": -1.7607141733169556, "logps/chosen": -158.92735290527344, "logps/rejected": -207.2126007080078, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 0.144093319773674, "rewards/margins": 6.127423286437988, "rewards/rejected": -5.983329772949219, "step": 7401 }, { "epoch": 1.64, "learning_rate": 9.595381663072335e-06, "logits/chosen": -1.4941089153289795, "logits/rejected": -1.4941089153289795, "logps/chosen": -151.45155334472656, "logps/rejected": -151.45155334472656, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -10.193404197692871, "rewards/margins": 0.0, "rewards/rejected": -10.193404197692871, "step": 7402 }, { "epoch": 1.64, "learning_rate": 9.594675047990578e-06, "logits/chosen": -1.7012765407562256, "logits/rejected": -1.7012765407562256, "logps/chosen": -140.33590698242188, "logps/rejected": -140.33590698242188, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -8.479789733886719, "rewards/margins": 0.0, "rewards/rejected": -8.479789733886719, "step": 7403 }, { "epoch": 1.64, "learning_rate": 9.593967842504121e-06, "logits/chosen": -2.0477051734924316, "logits/rejected": -2.107363700866699, "logps/chosen": -97.09727478027344, "logps/rejected": -94.4478759765625, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.1957473754882812, "rewards/margins": 4.818984508514404, "rewards/rejected": -7.0147318840026855, "step": 7404 }, { "epoch": 1.64, "learning_rate": 9.593260046703842e-06, "logits/chosen": -1.8396395444869995, "logits/rejected": -1.8396395444869995, "logps/chosen": -212.90936279296875, "logps/rejected": -212.90936279296875, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -3.078234910964966, "rewards/margins": 0.0, "rewards/rejected": -3.078234910964966, "step": 7405 }, { "epoch": 1.64, "learning_rate": 9.592551660680687e-06, "logits/chosen": -1.427821397781372, "logits/rejected": -1.4344977140426636, "logps/chosen": -158.630126953125, "logps/rejected": -148.27867126464844, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5155258178710938, "rewards/margins": 5.9011993408203125, "rewards/rejected": -6.416725158691406, "step": 7406 }, { "epoch": 1.64, "learning_rate": 9.591842684525685e-06, "logits/chosen": -1.6987175941467285, "logits/rejected": -1.7420130968093872, "logps/chosen": -259.11993408203125, "logps/rejected": -242.11087036132812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0403259992599487, "rewards/margins": 8.991689682006836, "rewards/rejected": -10.032015800476074, "step": 7407 }, { "epoch": 1.64, "learning_rate": 9.591133118329936e-06, "logits/chosen": -1.8522073030471802, "logits/rejected": -1.8617621660232544, "logps/chosen": -76.82245635986328, "logps/rejected": -154.53530883789062, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.8203186392784119, "rewards/margins": 5.7759294509887695, "rewards/rejected": -6.596248149871826, "step": 7408 }, { "epoch": 1.64, "learning_rate": 9.590422962184619e-06, "logits/chosen": -1.6133537292480469, "logits/rejected": -1.6133537292480469, "logps/chosen": -205.5187225341797, "logps/rejected": -205.5187225341797, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -12.224069595336914, "rewards/margins": 0.0, "rewards/rejected": -12.224069595336914, "step": 7409 }, { "epoch": 1.64, "learning_rate": 9.589712216180986e-06, "logits/chosen": -1.9963923692703247, "logits/rejected": -1.8219372034072876, "logps/chosen": -105.180419921875, "logps/rejected": -260.67083740234375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.26282262802124, "rewards/margins": 7.286633014678955, "rewards/rejected": -11.549455642700195, "step": 7410 }, { "epoch": 1.64, "learning_rate": 9.589000880410366e-06, "logits/chosen": -1.5144195556640625, "logits/rejected": -1.4348200559616089, "logps/chosen": -131.84234619140625, "logps/rejected": -229.09408569335938, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": 0.070648193359375, "rewards/margins": 9.352167129516602, "rewards/rejected": -9.281518936157227, "step": 7411 }, { "epoch": 1.64, "learning_rate": 9.588288954964164e-06, "logits/chosen": -1.510953426361084, "logits/rejected": -1.6411755084991455, "logps/chosen": -229.68130493164062, "logps/rejected": -179.12234497070312, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -3.170945882797241, "rewards/margins": 4.11372184753418, "rewards/rejected": -7.28466796875, "step": 7412 }, { "epoch": 1.64, "learning_rate": 9.587576439933862e-06, "logits/chosen": -1.4488801956176758, "logits/rejected": -1.3777837753295898, "logps/chosen": -269.516357421875, "logps/rejected": -222.35621643066406, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 4.076489448547363, "rewards/margins": 14.819589614868164, "rewards/rejected": -10.7431001663208, "step": 7413 }, { "epoch": 1.64, "learning_rate": 9.586863335411017e-06, "logits/chosen": -1.8359993696212769, "logits/rejected": -1.7278331518173218, "logps/chosen": -197.80101013183594, "logps/rejected": -249.9287109375, "loss": 1.4155, "rewards/accuracies": 0.0, "rewards/chosen": -6.465709209442139, "rewards/margins": -2.753300666809082, "rewards/rejected": -3.7124085426330566, "step": 7414 }, { "epoch": 1.64, "learning_rate": 9.586149641487257e-06, "logits/chosen": -1.4568111896514893, "logits/rejected": -1.387277603149414, "logps/chosen": -79.55648803710938, "logps/rejected": -274.96807861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2239288091659546, "rewards/margins": 14.031949043273926, "rewards/rejected": -15.255877494812012, "step": 7415 }, { "epoch": 1.64, "learning_rate": 9.585435358254295e-06, "logits/chosen": -1.4581626653671265, "logits/rejected": -1.3710434436798096, "logps/chosen": -73.99524688720703, "logps/rejected": -239.3585968017578, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.4840980768203735, "rewards/margins": 7.3205180168151855, "rewards/rejected": -8.80461597442627, "step": 7416 }, { "epoch": 1.64, "learning_rate": 9.584720485803912e-06, "logits/chosen": -1.619685411453247, "logits/rejected": -1.7108336687088013, "logps/chosen": -162.2825927734375, "logps/rejected": -115.81218719482422, "loss": 0.0546, "rewards/accuracies": 1.0, "rewards/chosen": -1.0614197254180908, "rewards/margins": 2.1589972972869873, "rewards/rejected": -3.220417022705078, "step": 7417 }, { "epoch": 1.64, "learning_rate": 9.584005024227967e-06, "logits/chosen": -1.6875406503677368, "logits/rejected": -1.7270092964172363, "logps/chosen": -220.78546142578125, "logps/rejected": -204.96102905273438, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -4.260592937469482, "rewards/margins": 5.5132365226745605, "rewards/rejected": -9.773829460144043, "step": 7418 }, { "epoch": 1.64, "learning_rate": 9.583288973618398e-06, "logits/chosen": -1.7659916877746582, "logits/rejected": -1.6328284740447998, "logps/chosen": -161.59503173828125, "logps/rejected": -201.38661193847656, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": -1.7042144536972046, "rewards/margins": 2.1522841453552246, "rewards/rejected": -3.8564987182617188, "step": 7419 }, { "epoch": 1.64, "learning_rate": 9.582572334067213e-06, "logits/chosen": -1.606062889099121, "logits/rejected": -1.4889328479766846, "logps/chosen": -189.14332580566406, "logps/rejected": -216.80758666992188, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": -1.429377794265747, "rewards/margins": 2.1266798973083496, "rewards/rejected": -3.5560576915740967, "step": 7420 }, { "epoch": 1.64, "learning_rate": 9.581855105666497e-06, "logits/chosen": -1.6450707912445068, "logits/rejected": -1.647731065750122, "logps/chosen": -97.77890014648438, "logps/rejected": -120.43047332763672, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": 0.7640472650527954, "rewards/margins": 2.954875946044922, "rewards/rejected": -2.190828800201416, "step": 7421 }, { "epoch": 1.64, "learning_rate": 9.581137288508417e-06, "logits/chosen": -1.8665257692337036, "logits/rejected": -1.8665257692337036, "logps/chosen": -91.06129455566406, "logps/rejected": -91.06129455566406, "loss": 0.7739, "rewards/accuracies": 0.0, "rewards/chosen": -6.726961612701416, "rewards/margins": 0.0, "rewards/rejected": -6.726961612701416, "step": 7422 }, { "epoch": 1.64, "learning_rate": 9.580418882685208e-06, "logits/chosen": -1.364283561706543, "logits/rejected": -1.237917184829712, "logps/chosen": -96.30071258544922, "logps/rejected": -253.72438049316406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0638245344161987, "rewards/margins": 10.616533279418945, "rewards/rejected": -11.680357933044434, "step": 7423 }, { "epoch": 1.64, "learning_rate": 9.579699888289184e-06, "logits/chosen": -1.5130505561828613, "logits/rejected": -1.6285932064056396, "logps/chosen": -197.78526306152344, "logps/rejected": -122.38970947265625, "loss": 0.0975, "rewards/accuracies": 1.0, "rewards/chosen": -1.3179810047149658, "rewards/margins": 1.5355515480041504, "rewards/rejected": -2.853532552719116, "step": 7424 }, { "epoch": 1.64, "learning_rate": 9.578980305412733e-06, "logits/chosen": -1.4741382598876953, "logits/rejected": -1.4277361631393433, "logps/chosen": -191.07566833496094, "logps/rejected": -201.8766326904297, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -1.321711778640747, "rewards/margins": 6.378421783447266, "rewards/rejected": -7.700133800506592, "step": 7425 }, { "epoch": 1.64, "learning_rate": 9.57826013414832e-06, "logits/chosen": -1.9246670007705688, "logits/rejected": -1.9214669466018677, "logps/chosen": -127.32173156738281, "logps/rejected": -156.06350708007812, "loss": 0.134, "rewards/accuracies": 1.0, "rewards/chosen": -4.14236307144165, "rewards/margins": 1.1849861145019531, "rewards/rejected": -5.3273491859436035, "step": 7426 }, { "epoch": 1.64, "learning_rate": 9.577539374588486e-06, "logits/chosen": -1.497222900390625, "logits/rejected": -1.3430750370025635, "logps/chosen": -161.2987060546875, "logps/rejected": -253.55413818359375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 1.0684127807617188, "rewards/margins": 9.97057056427002, "rewards/rejected": -8.9021577835083, "step": 7427 }, { "epoch": 1.64, "learning_rate": 9.576818026825846e-06, "logits/chosen": -1.689002275466919, "logits/rejected": -1.7397016286849976, "logps/chosen": -82.50299072265625, "logps/rejected": -87.5415267944336, "loss": 0.0884, "rewards/accuracies": 1.0, "rewards/chosen": 0.6129593253135681, "rewards/margins": 1.6437842845916748, "rewards/rejected": -1.0308250188827515, "step": 7428 }, { "epoch": 1.64, "learning_rate": 9.57609609095309e-06, "logits/chosen": -1.4768730401992798, "logits/rejected": -1.4889018535614014, "logps/chosen": -203.49769592285156, "logps/rejected": -130.45587158203125, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -0.2551162838935852, "rewards/margins": 3.546952962875366, "rewards/rejected": -3.8020691871643066, "step": 7429 }, { "epoch": 1.64, "learning_rate": 9.57537356706299e-06, "logits/chosen": -1.7414737939834595, "logits/rejected": -1.8022844791412354, "logps/chosen": -190.6616973876953, "logps/rejected": -147.94375610351562, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -5.765515327453613, "rewards/margins": 5.596653938293457, "rewards/rejected": -11.36216926574707, "step": 7430 }, { "epoch": 1.64, "learning_rate": 9.574650455248384e-06, "logits/chosen": -1.8034510612487793, "logits/rejected": -1.7448126077651978, "logps/chosen": -145.1778106689453, "logps/rejected": -230.95675659179688, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -0.302001953125, "rewards/margins": 4.573208808898926, "rewards/rejected": -4.875210762023926, "step": 7431 }, { "epoch": 1.64, "learning_rate": 9.573926755602194e-06, "logits/chosen": -1.445074200630188, "logits/rejected": -1.4297397136688232, "logps/chosen": -115.49876403808594, "logps/rejected": -252.50453186035156, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.6699159145355225, "rewards/margins": 7.083761215209961, "rewards/rejected": -10.753677368164062, "step": 7432 }, { "epoch": 1.65, "learning_rate": 9.573202468217408e-06, "logits/chosen": -1.3303066492080688, "logits/rejected": -1.164355993270874, "logps/chosen": -228.88931274414062, "logps/rejected": -356.64093017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.1696197986602783, "rewards/margins": 13.02275562286377, "rewards/rejected": -10.85313606262207, "step": 7433 }, { "epoch": 1.65, "learning_rate": 9.572477593187101e-06, "logits/chosen": -1.875502586364746, "logits/rejected": -1.8428542613983154, "logps/chosen": -100.93380737304688, "logps/rejected": -188.4715576171875, "loss": 0.3474, "rewards/accuracies": 1.0, "rewards/chosen": -0.16643905639648438, "rewards/margins": 6.363048553466797, "rewards/rejected": -6.529487609863281, "step": 7434 }, { "epoch": 1.65, "learning_rate": 9.571752130604414e-06, "logits/chosen": -1.736473560333252, "logits/rejected": -1.7853472232818604, "logps/chosen": -221.58761596679688, "logps/rejected": -224.95233154296875, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -2.714764356613159, "rewards/margins": 3.677386522293091, "rewards/rejected": -6.39215087890625, "step": 7435 }, { "epoch": 1.65, "learning_rate": 9.571026080562569e-06, "logits/chosen": -1.7612427473068237, "logits/rejected": -1.6902306079864502, "logps/chosen": -119.34893035888672, "logps/rejected": -192.1479949951172, "loss": 0.0991, "rewards/accuracies": 1.0, "rewards/chosen": -1.8940620422363281, "rewards/margins": 4.6963114738464355, "rewards/rejected": -6.590373516082764, "step": 7436 }, { "epoch": 1.65, "learning_rate": 9.57029944315486e-06, "logits/chosen": -1.3550760746002197, "logits/rejected": -1.3243153095245361, "logps/chosen": -118.396240234375, "logps/rejected": -131.1068878173828, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.3645293712615967, "rewards/margins": 4.915980339050293, "rewards/rejected": -7.280509948730469, "step": 7437 }, { "epoch": 1.65, "learning_rate": 9.569572218474662e-06, "logits/chosen": -1.5518803596496582, "logits/rejected": -1.3458211421966553, "logps/chosen": -146.9995574951172, "logps/rejected": -418.84674072265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.2281112670898438, "rewards/margins": 8.485661506652832, "rewards/rejected": -6.257550239562988, "step": 7438 }, { "epoch": 1.65, "learning_rate": 9.568844406615416e-06, "logits/chosen": -1.7657331228256226, "logits/rejected": -1.7696237564086914, "logps/chosen": -158.87051391601562, "logps/rejected": -181.72132873535156, "loss": 0.3446, "rewards/accuracies": 1.0, "rewards/chosen": -2.5803468227386475, "rewards/margins": 0.00829315185546875, "rewards/rejected": -2.588639974594116, "step": 7439 }, { "epoch": 1.65, "learning_rate": 9.568116007670647e-06, "logits/chosen": -1.4066146612167358, "logits/rejected": -1.3661398887634277, "logps/chosen": -170.69876098632812, "logps/rejected": -133.58245849609375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.581455230712891, "rewards/margins": 6.090868949890137, "rewards/rejected": -10.672324180603027, "step": 7440 }, { "epoch": 1.65, "learning_rate": 9.567387021733954e-06, "logits/chosen": -2.30596661567688, "logits/rejected": -2.5106048583984375, "logps/chosen": -243.04925537109375, "logps/rejected": -277.97088623046875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.9511536359786987, "rewards/margins": 8.033309936523438, "rewards/rejected": -9.984463691711426, "step": 7441 }, { "epoch": 1.65, "learning_rate": 9.566657448899009e-06, "logits/chosen": -1.6147750616073608, "logits/rejected": -1.5470725297927856, "logps/chosen": -64.78317260742188, "logps/rejected": -159.83804321289062, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.417999267578125, "rewards/margins": 7.932064056396484, "rewards/rejected": -8.35006332397461, "step": 7442 }, { "epoch": 1.65, "learning_rate": 9.565927289259558e-06, "logits/chosen": -1.5909894704818726, "logits/rejected": -1.6598447561264038, "logps/chosen": -271.9482421875, "logps/rejected": -203.95089721679688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.726782202720642, "rewards/margins": 6.077906608581543, "rewards/rejected": -7.804688930511475, "step": 7443 }, { "epoch": 1.65, "learning_rate": 9.565196542909425e-06, "logits/chosen": -1.558653712272644, "logits/rejected": -1.5236189365386963, "logps/chosen": -127.09606170654297, "logps/rejected": -165.8319091796875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 1.7385109663009644, "rewards/margins": 4.3030924797058105, "rewards/rejected": -2.5645813941955566, "step": 7444 }, { "epoch": 1.65, "learning_rate": 9.564465209942512e-06, "logits/chosen": -1.2476706504821777, "logits/rejected": -1.2476706504821777, "logps/chosen": -334.2859802246094, "logps/rejected": -334.2859802246094, "loss": 0.3473, "rewards/accuracies": 0.0, "rewards/chosen": -11.360603332519531, "rewards/margins": 0.0, "rewards/rejected": -11.360603332519531, "step": 7445 }, { "epoch": 1.65, "learning_rate": 9.563733290452795e-06, "logits/chosen": -1.4968072175979614, "logits/rejected": -1.4660556316375732, "logps/chosen": -114.28428649902344, "logps/rejected": -187.47817993164062, "loss": 0.0369, "rewards/accuracies": 1.0, "rewards/chosen": -1.514260172843933, "rewards/margins": 2.570262908935547, "rewards/rejected": -4.0845232009887695, "step": 7446 }, { "epoch": 1.65, "learning_rate": 9.56300078453432e-06, "logits/chosen": -1.579875111579895, "logits/rejected": -1.5825914144515991, "logps/chosen": -115.56417846679688, "logps/rejected": -260.95660400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.966334581375122, "rewards/margins": 13.87126636505127, "rewards/rejected": -15.837600708007812, "step": 7447 }, { "epoch": 1.65, "learning_rate": 9.562267692281212e-06, "logits/chosen": -1.5886006355285645, "logits/rejected": -1.5501549243927002, "logps/chosen": -75.92153930664062, "logps/rejected": -105.09400177001953, "loss": 0.7354, "rewards/accuracies": 0.0, "rewards/chosen": -2.900440216064453, "rewards/margins": -1.2098647356033325, "rewards/rejected": -1.6905754804611206, "step": 7448 }, { "epoch": 1.65, "learning_rate": 9.561534013787671e-06, "logits/chosen": -1.4016634225845337, "logits/rejected": -1.3517608642578125, "logps/chosen": -94.07632446289062, "logps/rejected": -263.3285827636719, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.4791831970214844, "rewards/margins": 8.282283782958984, "rewards/rejected": -9.761466979980469, "step": 7449 }, { "epoch": 1.65, "learning_rate": 9.560799749147977e-06, "logits/chosen": -1.6795152425765991, "logits/rejected": -1.4819209575653076, "logps/chosen": -120.47792053222656, "logps/rejected": -291.98016357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2326805591583252, "rewards/margins": 9.386574745178223, "rewards/rejected": -10.619255065917969, "step": 7450 }, { "epoch": 1.65, "learning_rate": 9.56006489845648e-06, "logits/chosen": -1.4595727920532227, "logits/rejected": -1.4077016115188599, "logps/chosen": -177.12966918945312, "logps/rejected": -220.6425323486328, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": 3.6682984828948975, "rewards/margins": 9.963685989379883, "rewards/rejected": -6.295387268066406, "step": 7451 }, { "epoch": 1.65, "learning_rate": 9.559329461807605e-06, "logits/chosen": -1.3388012647628784, "logits/rejected": -1.0535924434661865, "logps/chosen": -193.68862915039062, "logps/rejected": -469.7943115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.17407074570655823, "rewards/margins": 30.14600372314453, "rewards/rejected": -30.3200740814209, "step": 7452 }, { "epoch": 1.65, "learning_rate": 9.558593439295853e-06, "logits/chosen": -1.6250529289245605, "logits/rejected": -1.7273101806640625, "logps/chosen": -258.9921875, "logps/rejected": -195.0775146484375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.8739959597587585, "rewards/margins": 9.727340698242188, "rewards/rejected": -10.601336479187012, "step": 7453 }, { "epoch": 1.65, "learning_rate": 9.557856831015805e-06, "logits/chosen": -1.3304095268249512, "logits/rejected": -1.3473267555236816, "logps/chosen": -147.36798095703125, "logps/rejected": -123.5632095336914, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/chosen": -7.006761074066162, "rewards/margins": 3.3507113456726074, "rewards/rejected": -10.35747241973877, "step": 7454 }, { "epoch": 1.65, "learning_rate": 9.55711963706211e-06, "logits/chosen": -1.3386030197143555, "logits/rejected": -1.2381412982940674, "logps/chosen": -162.49295043945312, "logps/rejected": -173.19000244140625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.5278077125549316, "rewards/margins": 6.580394268035889, "rewards/rejected": -9.10820198059082, "step": 7455 }, { "epoch": 1.65, "learning_rate": 9.556381857529497e-06, "logits/chosen": -1.558550477027893, "logits/rejected": -1.600759506225586, "logps/chosen": -169.8780517578125, "logps/rejected": -108.32711791992188, "loss": 0.1628, "rewards/accuracies": 1.0, "rewards/chosen": -7.274588108062744, "rewards/margins": 0.9567685127258301, "rewards/rejected": -8.231356620788574, "step": 7456 }, { "epoch": 1.65, "learning_rate": 9.555643492512767e-06, "logits/chosen": -1.556453824043274, "logits/rejected": -1.3167243003845215, "logps/chosen": -163.7218017578125, "logps/rejected": -370.6568603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1832672357559204, "rewards/margins": 9.361727714538574, "rewards/rejected": -10.544995307922363, "step": 7457 }, { "epoch": 1.65, "learning_rate": 9.554904542106802e-06, "logits/chosen": -1.2388737201690674, "logits/rejected": -1.1755201816558838, "logps/chosen": -99.61280822753906, "logps/rejected": -270.9391174316406, "loss": 0.0424, "rewards/accuracies": 1.0, "rewards/chosen": -0.36871033906936646, "rewards/margins": 9.029258728027344, "rewards/rejected": -9.397969245910645, "step": 7458 }, { "epoch": 1.65, "learning_rate": 9.55416500640655e-06, "logits/chosen": -1.8126720190048218, "logits/rejected": -1.7883292436599731, "logps/chosen": -166.0131378173828, "logps/rejected": -245.66558837890625, "loss": 0.1224, "rewards/accuracies": 1.0, "rewards/chosen": 0.6269363760948181, "rewards/margins": 10.377873420715332, "rewards/rejected": -9.750937461853027, "step": 7459 }, { "epoch": 1.65, "learning_rate": 9.553424885507045e-06, "logits/chosen": -1.6572812795639038, "logits/rejected": -1.6011228561401367, "logps/chosen": -116.23101043701172, "logps/rejected": -213.75076293945312, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.94146728515625, "rewards/margins": 5.9840087890625, "rewards/rejected": -9.92547607421875, "step": 7460 }, { "epoch": 1.65, "learning_rate": 9.552684179503389e-06, "logits/chosen": -1.4755079746246338, "logits/rejected": -1.4322528839111328, "logps/chosen": -107.09046936035156, "logps/rejected": -194.98757934570312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.0518807172775269, "rewards/margins": 7.737859725952148, "rewards/rejected": -8.789740562438965, "step": 7461 }, { "epoch": 1.65, "learning_rate": 9.551942888490759e-06, "logits/chosen": -1.6063886880874634, "logits/rejected": -1.5184327363967896, "logps/chosen": -99.47505187988281, "logps/rejected": -205.0041046142578, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.5164756774902344, "rewards/margins": 7.802728652954102, "rewards/rejected": -10.319204330444336, "step": 7462 }, { "epoch": 1.65, "learning_rate": 9.55120101256441e-06, "logits/chosen": -1.6869510412216187, "logits/rejected": -1.6044570207595825, "logps/chosen": -88.50408935546875, "logps/rejected": -143.43850708007812, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.0719925165176392, "rewards/margins": 5.306622505187988, "rewards/rejected": -6.378614902496338, "step": 7463 }, { "epoch": 1.65, "learning_rate": 9.550458551819672e-06, "logits/chosen": -1.6544524431228638, "logits/rejected": -1.6617209911346436, "logps/chosen": -167.05438232421875, "logps/rejected": -157.8212890625, "loss": 0.0591, "rewards/accuracies": 1.0, "rewards/chosen": -1.632222056388855, "rewards/margins": 4.12412691116333, "rewards/rejected": -5.756349086761475, "step": 7464 }, { "epoch": 1.65, "learning_rate": 9.54971550635195e-06, "logits/chosen": -1.5513304471969604, "logits/rejected": -1.567150354385376, "logps/chosen": -102.14326477050781, "logps/rejected": -123.62633514404297, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.3853866755962372, "rewards/margins": 9.913517951965332, "rewards/rejected": -9.528131484985352, "step": 7465 }, { "epoch": 1.65, "learning_rate": 9.548971876256721e-06, "logits/chosen": -2.1183342933654785, "logits/rejected": -1.98008394241333, "logps/chosen": -77.73150634765625, "logps/rejected": -326.37896728515625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.8352905511856079, "rewards/margins": 15.811322212219238, "rewards/rejected": -14.976031303405762, "step": 7466 }, { "epoch": 1.65, "learning_rate": 9.548227661629541e-06, "logits/chosen": -1.8570353984832764, "logits/rejected": -1.8625266551971436, "logps/chosen": -94.40584564208984, "logps/rejected": -88.4303970336914, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": 1.6172767877578735, "rewards/margins": 4.032493591308594, "rewards/rejected": -2.4152169227600098, "step": 7467 }, { "epoch": 1.65, "learning_rate": 9.547482862566043e-06, "logits/chosen": -1.2063066959381104, "logits/rejected": -1.1986281871795654, "logps/chosen": -102.51545715332031, "logps/rejected": -150.64288330078125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": 0.14050140976905823, "rewards/margins": 4.212582588195801, "rewards/rejected": -4.072081089019775, "step": 7468 }, { "epoch": 1.65, "learning_rate": 9.546737479161926e-06, "logits/chosen": -1.3537342548370361, "logits/rejected": -1.3319491147994995, "logps/chosen": -87.09809875488281, "logps/rejected": -148.03521728515625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.470733642578125, "rewards/margins": 5.7067461013793945, "rewards/rejected": -6.1774797439575195, "step": 7469 }, { "epoch": 1.65, "learning_rate": 9.545991511512975e-06, "logits/chosen": -1.4044396877288818, "logits/rejected": -1.2017379999160767, "logps/chosen": -245.15087890625, "logps/rejected": -368.4482421875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.809637546539307, "rewards/margins": 7.001234531402588, "rewards/rejected": -11.810872077941895, "step": 7470 }, { "epoch": 1.65, "learning_rate": 9.545244959715041e-06, "logits/chosen": -1.3788483142852783, "logits/rejected": -1.590932846069336, "logps/chosen": -238.66598510742188, "logps/rejected": -165.59902954101562, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -6.047107219696045, "rewards/margins": 5.857823848724365, "rewards/rejected": -11.90493106842041, "step": 7471 }, { "epoch": 1.65, "learning_rate": 9.544497823864058e-06, "logits/chosen": -1.4368765354156494, "logits/rejected": -1.478151798248291, "logps/chosen": -156.7240753173828, "logps/rejected": -156.93362426757812, "loss": 0.4362, "rewards/accuracies": 0.0, "rewards/chosen": -5.922910213470459, "rewards/margins": -0.330594539642334, "rewards/rejected": -5.592315673828125, "step": 7472 }, { "epoch": 1.65, "learning_rate": 9.543750104056029e-06, "logits/chosen": -1.8049033880233765, "logits/rejected": -1.4871686697006226, "logps/chosen": -101.66912841796875, "logps/rejected": -361.59375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.7266845703125, "rewards/margins": 8.433445930480957, "rewards/rejected": -11.160130500793457, "step": 7473 }, { "epoch": 1.65, "learning_rate": 9.543001800387034e-06, "logits/chosen": -1.3976867198944092, "logits/rejected": -1.4684444665908813, "logps/chosen": -185.92527770996094, "logps/rejected": -190.6088104248047, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -2.1542954444885254, "rewards/margins": 7.235392093658447, "rewards/rejected": -9.389687538146973, "step": 7474 }, { "epoch": 1.65, "learning_rate": 9.54225291295323e-06, "logits/chosen": -1.5566059350967407, "logits/rejected": -1.1499574184417725, "logps/chosen": -217.80966186523438, "logps/rejected": -1176.8759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.2919845581054688, "rewards/margins": 108.73049926757812, "rewards/rejected": -106.43851470947266, "step": 7475 }, { "epoch": 1.65, "learning_rate": 9.541503441850844e-06, "logits/chosen": -1.8265058994293213, "logits/rejected": -1.9086511135101318, "logps/chosen": -138.83480834960938, "logps/rejected": -109.82894897460938, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.458242893218994, "rewards/margins": 5.267674922943115, "rewards/rejected": -7.725917816162109, "step": 7476 }, { "epoch": 1.65, "learning_rate": 9.540753387176183e-06, "logits/chosen": -1.1466429233551025, "logits/rejected": -1.1084812879562378, "logps/chosen": -147.20358276367188, "logps/rejected": -320.5875244140625, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -1.7276214361190796, "rewards/margins": 3.389834403991699, "rewards/rejected": -5.117455959320068, "step": 7477 }, { "epoch": 1.66, "learning_rate": 9.54000274902563e-06, "logits/chosen": -1.6480045318603516, "logits/rejected": -1.6319481134414673, "logps/chosen": -192.22952270507812, "logps/rejected": -125.79259490966797, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.3889130353927612, "rewards/margins": 7.5063934326171875, "rewards/rejected": -8.895306587219238, "step": 7478 }, { "epoch": 1.66, "learning_rate": 9.539251527495636e-06, "logits/chosen": -1.6223667860031128, "logits/rejected": -1.665809988975525, "logps/chosen": -176.3780517578125, "logps/rejected": -160.37808227539062, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -8.603724479675293, "rewards/margins": 3.411688804626465, "rewards/rejected": -12.015413284301758, "step": 7479 }, { "epoch": 1.66, "learning_rate": 9.538499722682733e-06, "logits/chosen": -1.7820589542388916, "logits/rejected": -1.7079193592071533, "logps/chosen": -99.29316711425781, "logps/rejected": -156.54544067382812, "loss": 0.0582, "rewards/accuracies": 1.0, "rewards/chosen": -3.461332082748413, "rewards/margins": 5.06007194519043, "rewards/rejected": -8.521404266357422, "step": 7480 }, { "epoch": 1.66, "learning_rate": 9.537747334683524e-06, "logits/chosen": -1.35710608959198, "logits/rejected": -1.3186542987823486, "logps/chosen": -230.96173095703125, "logps/rejected": -169.08538818359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.82135009765625, "rewards/margins": 5.926509857177734, "rewards/rejected": -7.747859954833984, "step": 7481 }, { "epoch": 1.66, "learning_rate": 9.536994363594694e-06, "logits/chosen": -1.7085341215133667, "logits/rejected": -1.7085341215133667, "logps/chosen": -87.83403015136719, "logps/rejected": -87.83403015136719, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.4220261573791504, "rewards/margins": 0.0, "rewards/rejected": -2.4220261573791504, "step": 7482 }, { "epoch": 1.66, "learning_rate": 9.536240809512994e-06, "logits/chosen": -1.405863642692566, "logits/rejected": -0.8538798093795776, "logps/chosen": -111.35450744628906, "logps/rejected": -729.246826171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0569305419921875, "rewards/margins": 63.280982971191406, "rewards/rejected": -64.3379135131836, "step": 7483 }, { "epoch": 1.66, "learning_rate": 9.535486672535255e-06, "logits/chosen": -1.83994460105896, "logits/rejected": -1.7886971235275269, "logps/chosen": -102.52151489257812, "logps/rejected": -170.78102111816406, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.3567413091659546, "rewards/margins": 4.072072505950928, "rewards/rejected": -5.428813934326172, "step": 7484 }, { "epoch": 1.66, "learning_rate": 9.53473195275838e-06, "logits/chosen": -1.4018281698226929, "logits/rejected": -1.3890568017959595, "logps/chosen": -120.77505493164062, "logps/rejected": -103.63252258300781, "loss": 0.3569, "rewards/accuracies": 0.0, "rewards/chosen": -2.2271668910980225, "rewards/margins": -0.04092264175415039, "rewards/rejected": -2.186244249343872, "step": 7485 }, { "epoch": 1.66, "learning_rate": 9.53397665027935e-06, "logits/chosen": -1.6130362749099731, "logits/rejected": -1.5575892925262451, "logps/chosen": -89.39839172363281, "logps/rejected": -181.41734313964844, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -1.1047309637069702, "rewards/margins": 5.121140480041504, "rewards/rejected": -6.225871562957764, "step": 7486 }, { "epoch": 1.66, "learning_rate": 9.533220765195223e-06, "logits/chosen": -1.4436290264129639, "logits/rejected": -1.4209450483322144, "logps/chosen": -91.4632797241211, "logps/rejected": -118.31861877441406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.11944961547851562, "rewards/margins": 9.2985200881958, "rewards/rejected": -9.417969703674316, "step": 7487 }, { "epoch": 1.66, "learning_rate": 9.532464297603124e-06, "logits/chosen": -1.8281711339950562, "logits/rejected": -1.7912061214447021, "logps/chosen": -122.43780517578125, "logps/rejected": -236.27932739257812, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.9001014828681946, "rewards/margins": 10.492867469787598, "rewards/rejected": -11.392969131469727, "step": 7488 }, { "epoch": 1.66, "learning_rate": 9.531707247600258e-06, "logits/chosen": -1.7584484815597534, "logits/rejected": -1.7415231466293335, "logps/chosen": -131.95718383789062, "logps/rejected": -240.44207763671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.33724671602249146, "rewards/margins": 12.696569442749023, "rewards/rejected": -13.03381633758545, "step": 7489 }, { "epoch": 1.66, "learning_rate": 9.530949615283902e-06, "logits/chosen": -1.4668238162994385, "logits/rejected": -1.4613972902297974, "logps/chosen": -91.92994689941406, "logps/rejected": -69.53237915039062, "loss": 0.0689, "rewards/accuracies": 1.0, "rewards/chosen": 0.31871873140335083, "rewards/margins": 1.9500882625579834, "rewards/rejected": -1.6313694715499878, "step": 7490 }, { "epoch": 1.66, "learning_rate": 9.530191400751416e-06, "logits/chosen": -1.6233808994293213, "logits/rejected": -1.2074120044708252, "logps/chosen": -142.83090209960938, "logps/rejected": -1182.1986083984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6349586248397827, "rewards/margins": 109.34385681152344, "rewards/rejected": -110.97881317138672, "step": 7491 }, { "epoch": 1.66, "learning_rate": 9.529432604100223e-06, "logits/chosen": -1.3233120441436768, "logits/rejected": -1.3183469772338867, "logps/chosen": -88.20519256591797, "logps/rejected": -171.5012664794922, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.1682655811309814, "rewards/margins": 7.292816162109375, "rewards/rejected": -10.461081504821777, "step": 7492 }, { "epoch": 1.66, "learning_rate": 9.528673225427831e-06, "logits/chosen": -1.5117243528366089, "logits/rejected": -1.4745838642120361, "logps/chosen": -153.90797424316406, "logps/rejected": -182.79531860351562, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.4177353382110596, "rewards/margins": 6.730518341064453, "rewards/rejected": -8.148253440856934, "step": 7493 }, { "epoch": 1.66, "learning_rate": 9.527913264831817e-06, "logits/chosen": -1.1003825664520264, "logits/rejected": -1.1003825664520264, "logps/chosen": -79.91847229003906, "logps/rejected": -79.91847229003906, "loss": 0.3478, "rewards/accuracies": 0.0, "rewards/chosen": -6.425216197967529, "rewards/margins": 0.0, "rewards/rejected": -6.425216197967529, "step": 7494 }, { "epoch": 1.66, "learning_rate": 9.52715272240983e-06, "logits/chosen": -1.2130060195922852, "logits/rejected": -0.9892227053642273, "logps/chosen": -96.28216552734375, "logps/rejected": -392.87701416015625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.2640434503555298, "rewards/margins": 16.03960609436035, "rewards/rejected": -17.30364990234375, "step": 7495 }, { "epoch": 1.66, "learning_rate": 9.526391598259604e-06, "logits/chosen": -1.4447481632232666, "logits/rejected": -1.4447481632232666, "logps/chosen": -121.21147155761719, "logps/rejected": -121.21147155761719, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -7.479778289794922, "rewards/margins": 0.0, "rewards/rejected": -7.479778289794922, "step": 7496 }, { "epoch": 1.66, "learning_rate": 9.525629892478936e-06, "logits/chosen": -1.4233341217041016, "logits/rejected": -1.2971315383911133, "logps/chosen": -190.74411010742188, "logps/rejected": -383.8677673339844, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.1809066534042358, "rewards/margins": 13.065709114074707, "rewards/rejected": -14.246615409851074, "step": 7497 }, { "epoch": 1.66, "learning_rate": 9.524867605165709e-06, "logits/chosen": -1.5100696086883545, "logits/rejected": -1.48163902759552, "logps/chosen": -99.27086639404297, "logps/rejected": -223.49282836914062, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -0.6623062491416931, "rewards/margins": 7.490421772003174, "rewards/rejected": -8.152728080749512, "step": 7498 }, { "epoch": 1.66, "learning_rate": 9.52410473641787e-06, "logits/chosen": -1.8271476030349731, "logits/rejected": -1.8271476030349731, "logps/chosen": -117.72016906738281, "logps/rejected": -117.72016906738281, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.972294569015503, "rewards/margins": 0.0, "rewards/rejected": -2.972294569015503, "step": 7499 }, { "epoch": 1.66, "learning_rate": 9.523341286333448e-06, "logits/chosen": -1.408488392829895, "logits/rejected": -1.379001259803772, "logps/chosen": -100.50569152832031, "logps/rejected": -187.714111328125, "loss": 0.3586, "rewards/accuracies": 1.0, "rewards/chosen": -2.458256483078003, "rewards/margins": 3.714350461959839, "rewards/rejected": -6.172606945037842, "step": 7500 }, { "epoch": 1.66, "learning_rate": 9.522577255010546e-06, "logits/chosen": -1.7450122833251953, "logits/rejected": -1.5484833717346191, "logps/chosen": -66.49761199951172, "logps/rejected": -328.91241455078125, "loss": 0.7259, "rewards/accuracies": 1.0, "rewards/chosen": -0.111699678003788, "rewards/margins": 17.991596221923828, "rewards/rejected": -18.103296279907227, "step": 7501 }, { "epoch": 1.66, "learning_rate": 9.521812642547337e-06, "logits/chosen": -1.3934595584869385, "logits/rejected": -1.400743007659912, "logps/chosen": -237.32955932617188, "logps/rejected": -142.2922821044922, "loss": 0.1735, "rewards/accuracies": 1.0, "rewards/chosen": -3.1788055896759033, "rewards/margins": 0.8873941898345947, "rewards/rejected": -4.066199779510498, "step": 7502 }, { "epoch": 1.66, "learning_rate": 9.521047449042075e-06, "logits/chosen": -1.5338114500045776, "logits/rejected": -1.5287010669708252, "logps/chosen": -149.12213134765625, "logps/rejected": -139.53182983398438, "loss": 0.1081, "rewards/accuracies": 1.0, "rewards/chosen": -7.415720462799072, "rewards/margins": 1.421536922454834, "rewards/rejected": -8.837257385253906, "step": 7503 }, { "epoch": 1.66, "learning_rate": 9.520281674593084e-06, "logits/chosen": -1.38101065158844, "logits/rejected": -1.3769969940185547, "logps/chosen": -143.58863830566406, "logps/rejected": -228.7826690673828, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": -3.441075086593628, "rewards/margins": 2.8730623722076416, "rewards/rejected": -6.3141374588012695, "step": 7504 }, { "epoch": 1.66, "learning_rate": 9.519515319298765e-06, "logits/chosen": -1.4681830406188965, "logits/rejected": -1.5272269248962402, "logps/chosen": -210.51214599609375, "logps/rejected": -114.15477752685547, "loss": 0.5223, "rewards/accuracies": 0.0, "rewards/chosen": -10.099319458007812, "rewards/margins": -0.6109714508056641, "rewards/rejected": -9.488348007202148, "step": 7505 }, { "epoch": 1.66, "learning_rate": 9.51874838325759e-06, "logits/chosen": -1.5173121690750122, "logits/rejected": -1.261375904083252, "logps/chosen": -209.5318603515625, "logps/rejected": -317.3455505371094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.4120727479457855, "rewards/margins": 8.897114753723145, "rewards/rejected": -9.309187889099121, "step": 7506 }, { "epoch": 1.66, "learning_rate": 9.517980866568112e-06, "logits/chosen": -1.7337530851364136, "logits/rejected": -1.7304867506027222, "logps/chosen": -104.37774658203125, "logps/rejected": -122.08419036865234, "loss": 0.1249, "rewards/accuracies": 1.0, "rewards/chosen": -1.0155105590820312, "rewards/margins": 1.2596192359924316, "rewards/rejected": -2.275129795074463, "step": 7507 }, { "epoch": 1.66, "learning_rate": 9.517212769328952e-06, "logits/chosen": -1.9024229049682617, "logits/rejected": -1.9094158411026, "logps/chosen": -87.37379455566406, "logps/rejected": -124.70423889160156, "loss": 0.1209, "rewards/accuracies": 1.0, "rewards/chosen": -0.15554046630859375, "rewards/margins": 3.6358933448791504, "rewards/rejected": -3.791433811187744, "step": 7508 }, { "epoch": 1.66, "learning_rate": 9.516444091638812e-06, "logits/chosen": -1.457169532775879, "logits/rejected": -1.457169532775879, "logps/chosen": -277.5856018066406, "logps/rejected": -277.5856018066406, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -16.4378662109375, "rewards/margins": 0.0, "rewards/rejected": -16.4378662109375, "step": 7509 }, { "epoch": 1.66, "learning_rate": 9.515674833596464e-06, "logits/chosen": -1.4795488119125366, "logits/rejected": -1.5411244630813599, "logps/chosen": -232.31216430664062, "logps/rejected": -142.5489501953125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 2.2210662364959717, "rewards/margins": 5.7298150062561035, "rewards/rejected": -3.508748769760132, "step": 7510 }, { "epoch": 1.66, "learning_rate": 9.514904995300754e-06, "logits/chosen": -1.6182008981704712, "logits/rejected": -1.6261266469955444, "logps/chosen": -163.51890563964844, "logps/rejected": -186.71337890625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 2.562643527984619, "rewards/margins": 6.019625663757324, "rewards/rejected": -3.456982374191284, "step": 7511 }, { "epoch": 1.66, "learning_rate": 9.514134576850605e-06, "logits/chosen": -1.5033504962921143, "logits/rejected": -1.5033504962921143, "logps/chosen": -253.56753540039062, "logps/rejected": -253.56753540039062, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -9.583148002624512, "rewards/margins": 0.0, "rewards/rejected": -9.583148002624512, "step": 7512 }, { "epoch": 1.66, "learning_rate": 9.513363578345014e-06, "logits/chosen": -1.5773197412490845, "logits/rejected": -1.5625979900360107, "logps/chosen": -195.66146850585938, "logps/rejected": -206.5238800048828, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.173449754714966, "rewards/margins": 8.517844200134277, "rewards/rejected": -10.691293716430664, "step": 7513 }, { "epoch": 1.66, "learning_rate": 9.512591999883056e-06, "logits/chosen": -1.592816948890686, "logits/rejected": -1.5400285720825195, "logps/chosen": -124.34957122802734, "logps/rejected": -175.91432189941406, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.7545814514160156, "rewards/margins": 6.853565216064453, "rewards/rejected": -8.608146667480469, "step": 7514 }, { "epoch": 1.66, "learning_rate": 9.511819841563872e-06, "logits/chosen": -1.5357507467269897, "logits/rejected": -1.3838591575622559, "logps/chosen": -163.73892211914062, "logps/rejected": -296.5469055175781, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.7660095691680908, "rewards/margins": 14.071433067321777, "rewards/rejected": -15.837442398071289, "step": 7515 }, { "epoch": 1.66, "learning_rate": 9.511047103486685e-06, "logits/chosen": -1.525778889656067, "logits/rejected": -1.5409562587738037, "logps/chosen": -194.31973266601562, "logps/rejected": -206.83689880371094, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.644238233566284, "rewards/margins": 8.171712875366211, "rewards/rejected": -11.815951347351074, "step": 7516 }, { "epoch": 1.66, "learning_rate": 9.510273785750788e-06, "logits/chosen": -1.5828856229782104, "logits/rejected": -1.5600194931030273, "logps/chosen": -132.66835021972656, "logps/rejected": -160.85484313964844, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/chosen": -3.4948060512542725, "rewards/margins": 2.346299886703491, "rewards/rejected": -5.841105937957764, "step": 7517 }, { "epoch": 1.66, "learning_rate": 9.509499888455554e-06, "logits/chosen": -1.779255747795105, "logits/rejected": -1.5534248352050781, "logps/chosen": -104.59281921386719, "logps/rejected": -345.7587585449219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.897071123123169, "rewards/margins": 9.359848976135254, "rewards/rejected": -11.256919860839844, "step": 7518 }, { "epoch": 1.66, "learning_rate": 9.508725411700424e-06, "logits/chosen": -1.589438557624817, "logits/rejected": -1.6531490087509155, "logps/chosen": -230.01309204101562, "logps/rejected": -276.254638671875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.3806488513946533, "rewards/margins": 11.03007698059082, "rewards/rejected": -12.410725593566895, "step": 7519 }, { "epoch": 1.66, "learning_rate": 9.507950355584917e-06, "logits/chosen": -1.3470209836959839, "logits/rejected": -0.989604115486145, "logps/chosen": -204.5767364501953, "logps/rejected": -971.2147216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.0348678827285767, "rewards/margins": 75.99942016601562, "rewards/rejected": -74.96455383300781, "step": 7520 }, { "epoch": 1.66, "learning_rate": 9.507174720208627e-06, "logits/chosen": -1.6784934997558594, "logits/rejected": -1.7562388181686401, "logps/chosen": -88.8055419921875, "logps/rejected": -82.09701538085938, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.1704047918319702, "rewards/margins": 5.277260780334473, "rewards/rejected": -6.447665691375732, "step": 7521 }, { "epoch": 1.66, "learning_rate": 9.50639850567122e-06, "logits/chosen": -2.1016364097595215, "logits/rejected": -2.1016364097595215, "logps/chosen": -195.46438598632812, "logps/rejected": -195.46438598632812, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -11.026732444763184, "rewards/margins": 0.0, "rewards/rejected": -11.026732444763184, "step": 7522 }, { "epoch": 1.67, "learning_rate": 9.505621712072437e-06, "logits/chosen": -1.5539215803146362, "logits/rejected": -1.5009229183197021, "logps/chosen": -119.04116821289062, "logps/rejected": -152.5131072998047, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -1.1325629949569702, "rewards/margins": 3.389507293701172, "rewards/rejected": -4.522070407867432, "step": 7523 }, { "epoch": 1.67, "learning_rate": 9.504844339512096e-06, "logits/chosen": -1.3146307468414307, "logits/rejected": -1.1485099792480469, "logps/chosen": -126.15750122070312, "logps/rejected": -280.3976135253906, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.033430576324463, "rewards/margins": 6.151170253753662, "rewards/rejected": -8.184600830078125, "step": 7524 }, { "epoch": 1.67, "learning_rate": 9.504066388090088e-06, "logits/chosen": -1.5699727535247803, "logits/rejected": -1.560844898223877, "logps/chosen": -159.98239135742188, "logps/rejected": -196.56979370117188, "loss": 0.1526, "rewards/accuracies": 1.0, "rewards/chosen": -6.32309103012085, "rewards/margins": 4.42025899887085, "rewards/rejected": -10.7433500289917, "step": 7525 }, { "epoch": 1.67, "learning_rate": 9.503287857906374e-06, "logits/chosen": -1.8941640853881836, "logits/rejected": -1.0949649810791016, "logps/chosen": -122.23194885253906, "logps/rejected": -433.79803466796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5733108520507812, "rewards/margins": 25.55787467956543, "rewards/rejected": -27.13118553161621, "step": 7526 }, { "epoch": 1.67, "learning_rate": 9.502508749060998e-06, "logits/chosen": -1.3682163953781128, "logits/rejected": -1.370474934577942, "logps/chosen": -240.58917236328125, "logps/rejected": -272.5576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.6707763671875, "rewards/margins": 15.941904067993164, "rewards/rejected": -13.271127700805664, "step": 7527 }, { "epoch": 1.67, "learning_rate": 9.50172906165407e-06, "logits/chosen": -1.8213715553283691, "logits/rejected": -1.7971727848052979, "logps/chosen": -138.05795288085938, "logps/rejected": -193.42965698242188, "loss": 0.3821, "rewards/accuracies": 1.0, "rewards/chosen": -4.8809494972229, "rewards/margins": 2.6098580360412598, "rewards/rejected": -7.49080753326416, "step": 7528 }, { "epoch": 1.67, "learning_rate": 9.50094879578578e-06, "logits/chosen": -1.548948884010315, "logits/rejected": -1.5201972723007202, "logps/chosen": -144.79893493652344, "logps/rejected": -240.36724853515625, "loss": 0.662, "rewards/accuracies": 1.0, "rewards/chosen": -5.8061842918396, "rewards/margins": 5.1440815925598145, "rewards/rejected": -10.950265884399414, "step": 7529 }, { "epoch": 1.67, "learning_rate": 9.500167951556392e-06, "logits/chosen": -1.9733502864837646, "logits/rejected": -1.950124979019165, "logps/chosen": -155.1169891357422, "logps/rejected": -191.6146240234375, "loss": 0.0308, "rewards/accuracies": 1.0, "rewards/chosen": -9.098943710327148, "rewards/margins": 2.8475208282470703, "rewards/rejected": -11.946464538574219, "step": 7530 }, { "epoch": 1.67, "learning_rate": 9.499386529066236e-06, "logits/chosen": -1.7246471643447876, "logits/rejected": -1.7386531829833984, "logps/chosen": -64.55541229248047, "logps/rejected": -88.68978118896484, "loss": 0.3477, "rewards/accuracies": 1.0, "rewards/chosen": -0.7954918146133423, "rewards/margins": 6.076254367828369, "rewards/rejected": -6.871746063232422, "step": 7531 }, { "epoch": 1.67, "learning_rate": 9.498604528415731e-06, "logits/chosen": -1.1790986061096191, "logits/rejected": -1.117940902709961, "logps/chosen": -243.8941650390625, "logps/rejected": -223.758056640625, "loss": 0.1647, "rewards/accuracies": 1.0, "rewards/chosen": -1.457733154296875, "rewards/margins": 0.9459550380706787, "rewards/rejected": -2.4036881923675537, "step": 7532 }, { "epoch": 1.67, "learning_rate": 9.497821949705356e-06, "logits/chosen": -1.5665326118469238, "logits/rejected": -1.5610965490341187, "logps/chosen": -330.407958984375, "logps/rejected": -353.3929443359375, "loss": 0.2449, "rewards/accuracies": 1.0, "rewards/chosen": -8.330316543579102, "rewards/margins": 0.46195316314697266, "rewards/rejected": -8.792269706726074, "step": 7533 }, { "epoch": 1.67, "learning_rate": 9.497038793035674e-06, "logits/chosen": -1.3116340637207031, "logits/rejected": -1.192820429801941, "logps/chosen": -266.9194641113281, "logps/rejected": -344.7188415527344, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.4801971912384033, "rewards/margins": 7.2210540771484375, "rewards/rejected": -8.701251029968262, "step": 7534 }, { "epoch": 1.67, "learning_rate": 9.496255058507318e-06, "logits/chosen": -1.6048715114593506, "logits/rejected": -1.5941306352615356, "logps/chosen": -93.3525161743164, "logps/rejected": -94.51737976074219, "loss": 0.0845, "rewards/accuracies": 1.0, "rewards/chosen": -0.8934432864189148, "rewards/margins": 1.6962432861328125, "rewards/rejected": -2.589686632156372, "step": 7535 }, { "epoch": 1.67, "learning_rate": 9.495470746220995e-06, "logits/chosen": -1.4414303302764893, "logits/rejected": -1.4414303302764893, "logps/chosen": -103.38744354248047, "logps/rejected": -103.38744354248047, "loss": 0.347, "rewards/accuracies": 0.0, "rewards/chosen": -3.976245880126953, "rewards/margins": 0.0, "rewards/rejected": -3.976245880126953, "step": 7536 }, { "epoch": 1.67, "learning_rate": 9.494685856277488e-06, "logits/chosen": -1.6653692722320557, "logits/rejected": -1.719539761543274, "logps/chosen": -157.99420166015625, "logps/rejected": -126.96810150146484, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.9306656122207642, "rewards/margins": 7.194840908050537, "rewards/rejected": -9.125506401062012, "step": 7537 }, { "epoch": 1.67, "learning_rate": 9.493900388777654e-06, "logits/chosen": -1.4254047870635986, "logits/rejected": -1.3480229377746582, "logps/chosen": -190.69309997558594, "logps/rejected": -235.53248596191406, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 2.1968369483947754, "rewards/margins": 9.0152587890625, "rewards/rejected": -6.818421840667725, "step": 7538 }, { "epoch": 1.67, "learning_rate": 9.493114343822422e-06, "logits/chosen": -1.9431318044662476, "logits/rejected": -1.7351402044296265, "logps/chosen": -126.71973419189453, "logps/rejected": -342.7171936035156, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -1.800270915031433, "rewards/margins": 19.03861427307129, "rewards/rejected": -20.838884353637695, "step": 7539 }, { "epoch": 1.67, "learning_rate": 9.4923277215128e-06, "logits/chosen": -1.6740341186523438, "logits/rejected": -1.6464056968688965, "logps/chosen": -132.2212677001953, "logps/rejected": -195.19692993164062, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.9618011713027954, "rewards/margins": 5.451227188110352, "rewards/rejected": -7.413028240203857, "step": 7540 }, { "epoch": 1.67, "learning_rate": 9.491540521949862e-06, "logits/chosen": -1.5850297212600708, "logits/rejected": -1.4923115968704224, "logps/chosen": -107.49974060058594, "logps/rejected": -175.88955688476562, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.491725206375122, "rewards/margins": 7.196952819824219, "rewards/rejected": -8.688677787780762, "step": 7541 }, { "epoch": 1.67, "learning_rate": 9.490752745234767e-06, "logits/chosen": -1.8642927408218384, "logits/rejected": -1.8642927408218384, "logps/chosen": -126.95225524902344, "logps/rejected": -126.95225524902344, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -6.39785623550415, "rewards/margins": 0.0, "rewards/rejected": -6.39785623550415, "step": 7542 }, { "epoch": 1.67, "learning_rate": 9.489964391468739e-06, "logits/chosen": -1.3442537784576416, "logits/rejected": -1.1771842241287231, "logps/chosen": -188.19879150390625, "logps/rejected": -259.12969970703125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 2.0169007778167725, "rewards/margins": 5.708340644836426, "rewards/rejected": -3.6914398670196533, "step": 7543 }, { "epoch": 1.67, "learning_rate": 9.48917546075308e-06, "logits/chosen": -1.855011224746704, "logits/rejected": -1.0119372606277466, "logps/chosen": -144.0929412841797, "logps/rejected": -380.0293273925781, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.0343918800354, "rewards/margins": 6.6918110847473145, "rewards/rejected": -11.726202964782715, "step": 7544 }, { "epoch": 1.67, "learning_rate": 9.488385953189165e-06, "logits/chosen": -1.3533228635787964, "logits/rejected": -0.9859148263931274, "logps/chosen": -166.67376708984375, "logps/rejected": -535.8714599609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.23567047715187073, "rewards/margins": 41.583248138427734, "rewards/rejected": -41.34757614135742, "step": 7545 }, { "epoch": 1.67, "learning_rate": 9.487595868878447e-06, "logits/chosen": -1.530687689781189, "logits/rejected": -1.5776395797729492, "logps/chosen": -197.4847412109375, "logps/rejected": -208.4569549560547, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 4.980172634124756, "rewards/margins": 6.297114372253418, "rewards/rejected": -1.3169418573379517, "step": 7546 }, { "epoch": 1.67, "learning_rate": 9.486805207922445e-06, "logits/chosen": -1.538551926612854, "logits/rejected": -1.514700174331665, "logps/chosen": -152.42758178710938, "logps/rejected": -218.1058349609375, "loss": 0.6055, "rewards/accuracies": 0.0, "rewards/chosen": -4.015779972076416, "rewards/margins": -0.8508172035217285, "rewards/rejected": -3.1649627685546875, "step": 7547 }, { "epoch": 1.67, "learning_rate": 9.486013970422762e-06, "logits/chosen": -1.619583010673523, "logits/rejected": -1.6133230924606323, "logps/chosen": -161.97720336914062, "logps/rejected": -217.607421875, "loss": 0.0308, "rewards/accuracies": 1.0, "rewards/chosen": -5.80755615234375, "rewards/margins": 2.763826370239258, "rewards/rejected": -8.571382522583008, "step": 7548 }, { "epoch": 1.67, "learning_rate": 9.485222156481067e-06, "logits/chosen": -1.4220694303512573, "logits/rejected": -1.4506412744522095, "logps/chosen": -170.05862426757812, "logps/rejected": -143.42645263671875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -2.326190233230591, "rewards/margins": 4.246728897094727, "rewards/rejected": -6.5729193687438965, "step": 7549 }, { "epoch": 1.67, "learning_rate": 9.484429766199107e-06, "logits/chosen": -1.4339576959609985, "logits/rejected": -1.4048898220062256, "logps/chosen": -83.90740966796875, "logps/rejected": -140.45590209960938, "loss": 0.0532, "rewards/accuracies": 1.0, "rewards/chosen": -0.7555374503135681, "rewards/margins": 2.1867568492889404, "rewards/rejected": -2.9422943592071533, "step": 7550 }, { "epoch": 1.67, "learning_rate": 9.483636799678703e-06, "logits/chosen": -1.195982813835144, "logits/rejected": -1.2643718719482422, "logps/chosen": -259.6327819824219, "logps/rejected": -333.254638671875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.238445997238159, "rewards/margins": 6.205820083618164, "rewards/rejected": -9.444266319274902, "step": 7551 }, { "epoch": 1.67, "learning_rate": 9.482843257021747e-06, "logits/chosen": -1.3321681022644043, "logits/rejected": -0.8051263093948364, "logps/chosen": -160.17135620117188, "logps/rejected": -933.921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.107061743736267, "rewards/margins": 79.78462219238281, "rewards/rejected": -80.89168548583984, "step": 7552 }, { "epoch": 1.67, "learning_rate": 9.48204913833021e-06, "logits/chosen": -1.9151694774627686, "logits/rejected": -1.9104033708572388, "logps/chosen": -90.39082336425781, "logps/rejected": -101.17204284667969, "loss": 0.2814, "rewards/accuracies": 1.0, "rewards/chosen": -1.9736359119415283, "rewards/margins": 0.30084753036499023, "rewards/rejected": -2.2744834423065186, "step": 7553 }, { "epoch": 1.67, "learning_rate": 9.481254443706133e-06, "logits/chosen": -1.4742485284805298, "logits/rejected": -1.3610762357711792, "logps/chosen": -94.97398376464844, "logps/rejected": -118.69845581054688, "loss": 0.0504, "rewards/accuracies": 1.0, "rewards/chosen": -2.3490066528320312, "rewards/margins": 2.2752861976623535, "rewards/rejected": -4.624292850494385, "step": 7554 }, { "epoch": 1.67, "learning_rate": 9.480459173251634e-06, "logits/chosen": -1.3441632986068726, "logits/rejected": -1.5827149152755737, "logps/chosen": -147.12667846679688, "logps/rejected": -87.16053771972656, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.9957672357559204, "rewards/margins": 7.70306921005249, "rewards/rejected": -6.707302093505859, "step": 7555 }, { "epoch": 1.67, "learning_rate": 9.4796633270689e-06, "logits/chosen": -1.7609323263168335, "logits/rejected": -1.9330838918685913, "logps/chosen": -208.77366638183594, "logps/rejected": -101.97638702392578, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.569976806640625, "rewards/margins": 6.075582981109619, "rewards/rejected": -7.645559787750244, "step": 7556 }, { "epoch": 1.67, "learning_rate": 9.478866905260198e-06, "logits/chosen": -1.6709052324295044, "logits/rejected": -1.6317696571350098, "logps/chosen": -135.6641082763672, "logps/rejected": -166.31617736816406, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.875335693359375, "rewards/margins": 5.94940185546875, "rewards/rejected": -6.824737548828125, "step": 7557 }, { "epoch": 1.67, "learning_rate": 9.478069907927867e-06, "logits/chosen": -1.4899170398712158, "logits/rejected": -1.4899170398712158, "logps/chosen": -144.18777465820312, "logps/rejected": -144.18777465820312, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -6.071226596832275, "rewards/margins": 0.0, "rewards/rejected": -6.071226596832275, "step": 7558 }, { "epoch": 1.67, "learning_rate": 9.477272335174315e-06, "logits/chosen": -1.447395920753479, "logits/rejected": -1.4628818035125732, "logps/chosen": -78.86859130859375, "logps/rejected": -115.68016052246094, "loss": 0.0872, "rewards/accuracies": 1.0, "rewards/chosen": -2.4351723194122314, "rewards/margins": 1.909954309463501, "rewards/rejected": -4.345126628875732, "step": 7559 }, { "epoch": 1.67, "learning_rate": 9.476474187102033e-06, "logits/chosen": -1.646038293838501, "logits/rejected": -1.6969584226608276, "logps/chosen": -108.07676696777344, "logps/rejected": -157.6258544921875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.494280219078064, "rewards/margins": 6.815711498260498, "rewards/rejected": -8.309991836547852, "step": 7560 }, { "epoch": 1.67, "learning_rate": 9.475675463813578e-06, "logits/chosen": -1.544023871421814, "logits/rejected": -0.9564637541770935, "logps/chosen": -109.90080261230469, "logps/rejected": -617.8104248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1624374389648438, "rewards/margins": 51.20290756225586, "rewards/rejected": -54.3653450012207, "step": 7561 }, { "epoch": 1.67, "learning_rate": 9.474876165411586e-06, "logits/chosen": -1.8724987506866455, "logits/rejected": -1.9832814931869507, "logps/chosen": -137.70880126953125, "logps/rejected": -175.08511352539062, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.5347185134887695, "rewards/margins": 8.515180587768555, "rewards/rejected": -14.049899101257324, "step": 7562 }, { "epoch": 1.67, "learning_rate": 9.474076291998765e-06, "logits/chosen": -1.3171625137329102, "logits/rejected": -1.2241580486297607, "logps/chosen": -141.20257568359375, "logps/rejected": -183.34817504882812, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.68332839012146, "rewards/margins": 5.947405815124512, "rewards/rejected": -8.63073444366455, "step": 7563 }, { "epoch": 1.67, "learning_rate": 9.473275843677893e-06, "logits/chosen": -1.823163390159607, "logits/rejected": -1.8492928743362427, "logps/chosen": -110.20308685302734, "logps/rejected": -139.4512939453125, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -2.310189962387085, "rewards/margins": 7.410520553588867, "rewards/rejected": -9.720710754394531, "step": 7564 }, { "epoch": 1.67, "learning_rate": 9.472474820551831e-06, "logits/chosen": -1.4446508884429932, "logits/rejected": -1.4141993522644043, "logps/chosen": -168.56317138671875, "logps/rejected": -163.21119689941406, "loss": 2.0209, "rewards/accuracies": 0.0, "rewards/chosen": -8.784509658813477, "rewards/margins": -3.990847110748291, "rewards/rejected": -4.7936625480651855, "step": 7565 }, { "epoch": 1.67, "learning_rate": 9.471673222723506e-06, "logits/chosen": -1.3618651628494263, "logits/rejected": -1.3527963161468506, "logps/chosen": -187.21591186523438, "logps/rejected": -182.6926727294922, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -5.293103218078613, "rewards/margins": 3.867410659790039, "rewards/rejected": -9.160513877868652, "step": 7566 }, { "epoch": 1.67, "learning_rate": 9.47087105029592e-06, "logits/chosen": -1.6845587491989136, "logits/rejected": -1.763657569885254, "logps/chosen": -82.78724670410156, "logps/rejected": -62.99757766723633, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": 0.0012802124256268144, "rewards/margins": 3.9774529933929443, "rewards/rejected": -3.976172685623169, "step": 7567 }, { "epoch": 1.68, "learning_rate": 9.470068303372153e-06, "logits/chosen": -1.4215604066848755, "logits/rejected": -1.3866215944290161, "logps/chosen": -133.50692749023438, "logps/rejected": -174.04598999023438, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.7511444091796875, "rewards/margins": 6.475181579589844, "rewards/rejected": -8.226325988769531, "step": 7568 }, { "epoch": 1.68, "learning_rate": 9.469264982055355e-06, "logits/chosen": -1.6371585130691528, "logits/rejected": -1.6078708171844482, "logps/chosen": -248.95315551757812, "logps/rejected": -309.4368896484375, "loss": 1.0212, "rewards/accuracies": 0.0, "rewards/chosen": -8.813382148742676, "rewards/margins": -1.8904967308044434, "rewards/rejected": -6.922885417938232, "step": 7569 }, { "epoch": 1.68, "learning_rate": 9.46846108644875e-06, "logits/chosen": -1.592330813407898, "logits/rejected": -1.383854627609253, "logps/chosen": -76.02481079101562, "logps/rejected": -270.8980407714844, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": -1.997846245765686, "rewards/margins": 2.8420162200927734, "rewards/rejected": -4.83986234664917, "step": 7570 }, { "epoch": 1.68, "learning_rate": 9.467656616655636e-06, "logits/chosen": -1.7574257850646973, "logits/rejected": -1.806589126586914, "logps/chosen": -231.2657012939453, "logps/rejected": -189.90206909179688, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -3.916243076324463, "rewards/margins": 3.2992706298828125, "rewards/rejected": -7.215513706207275, "step": 7571 }, { "epoch": 1.68, "learning_rate": 9.466851572779388e-06, "logits/chosen": -1.7066011428833008, "logits/rejected": -1.587640404701233, "logps/chosen": -87.76922607421875, "logps/rejected": -310.537109375, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -0.4629623591899872, "rewards/margins": 14.153017044067383, "rewards/rejected": -14.615979194641113, "step": 7572 }, { "epoch": 1.68, "learning_rate": 9.46604595492345e-06, "logits/chosen": -1.3119721412658691, "logits/rejected": -0.7201208472251892, "logps/chosen": -146.10696411132812, "logps/rejected": -671.450927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.975738525390625, "rewards/margins": 60.55422592163086, "rewards/rejected": -61.529964447021484, "step": 7573 }, { "epoch": 1.68, "learning_rate": 9.465239763191345e-06, "logits/chosen": -1.5140681266784668, "logits/rejected": -1.5250269174575806, "logps/chosen": -179.87884521484375, "logps/rejected": -173.86114501953125, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": 2.1573333740234375, "rewards/margins": 3.6677780151367188, "rewards/rejected": -1.5104446411132812, "step": 7574 }, { "epoch": 1.68, "learning_rate": 9.464432997686664e-06, "logits/chosen": -1.4510129690170288, "logits/rejected": -1.322843074798584, "logps/chosen": -133.37234497070312, "logps/rejected": -220.4886474609375, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -1.02660071849823, "rewards/margins": 3.5327253341674805, "rewards/rejected": -4.559326171875, "step": 7575 }, { "epoch": 1.68, "learning_rate": 9.463625658513073e-06, "logits/chosen": -1.5150575637817383, "logits/rejected": -1.5087292194366455, "logps/chosen": -73.67120361328125, "logps/rejected": -115.85122680664062, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -3.118303060531616, "rewards/margins": 5.940232276916504, "rewards/rejected": -9.0585355758667, "step": 7576 }, { "epoch": 1.68, "learning_rate": 9.462817745774316e-06, "logits/chosen": -1.2159963846206665, "logits/rejected": -1.272289514541626, "logps/chosen": -292.4224548339844, "logps/rejected": -185.16050720214844, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -4.466372966766357, "rewards/margins": 5.585763454437256, "rewards/rejected": -10.052136421203613, "step": 7577 }, { "epoch": 1.68, "learning_rate": 9.462009259574207e-06, "logits/chosen": -1.9525374174118042, "logits/rejected": -1.9741300344467163, "logps/chosen": -116.4747314453125, "logps/rejected": -99.33934783935547, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": -2.860734701156616, "rewards/margins": 4.842231750488281, "rewards/rejected": -7.702966213226318, "step": 7578 }, { "epoch": 1.68, "learning_rate": 9.461200200016636e-06, "logits/chosen": -1.5929571390151978, "logits/rejected": -1.5929571390151978, "logps/chosen": -81.84327697753906, "logps/rejected": -81.84327697753906, "loss": 0.3471, "rewards/accuracies": 0.0, "rewards/chosen": 0.6295914053916931, "rewards/margins": 0.0, "rewards/rejected": 0.6295914053916931, "step": 7579 }, { "epoch": 1.68, "learning_rate": 9.460390567205562e-06, "logits/chosen": -1.1335420608520508, "logits/rejected": -1.1341077089309692, "logps/chosen": -168.85086059570312, "logps/rejected": -126.76231384277344, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -0.19027404487133026, "rewards/margins": 3.1674041748046875, "rewards/rejected": -3.357678174972534, "step": 7580 }, { "epoch": 1.68, "learning_rate": 9.459580361245024e-06, "logits/chosen": -1.723789930343628, "logits/rejected": -1.7142897844314575, "logps/chosen": -92.60075378417969, "logps/rejected": -79.73622131347656, "loss": 0.3493, "rewards/accuracies": 1.0, "rewards/chosen": -1.784310221672058, "rewards/margins": 0.01839590072631836, "rewards/rejected": -1.8027061223983765, "step": 7581 }, { "epoch": 1.68, "learning_rate": 9.458769582239128e-06, "logits/chosen": -1.9675787687301636, "logits/rejected": -1.9795079231262207, "logps/chosen": -121.5873031616211, "logps/rejected": -156.96820068359375, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -1.043311357498169, "rewards/margins": 3.339848279953003, "rewards/rejected": -4.383159637451172, "step": 7582 }, { "epoch": 1.68, "learning_rate": 9.457958230292061e-06, "logits/chosen": -1.837921142578125, "logits/rejected": -1.6941606998443604, "logps/chosen": -105.88735961914062, "logps/rejected": -267.9533386230469, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.2217438220977783, "rewards/margins": 5.937406539916992, "rewards/rejected": -7.15915060043335, "step": 7583 }, { "epoch": 1.68, "learning_rate": 9.457146305508078e-06, "logits/chosen": -1.2948869466781616, "logits/rejected": -1.2506413459777832, "logps/chosen": -104.8353271484375, "logps/rejected": -166.7202606201172, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/chosen": -2.2206666469573975, "rewards/margins": 2.7276265621185303, "rewards/rejected": -4.948293209075928, "step": 7584 }, { "epoch": 1.68, "learning_rate": 9.45633380799151e-06, "logits/chosen": -1.9011551141738892, "logits/rejected": -1.8602360486984253, "logps/chosen": -92.4774398803711, "logps/rejected": -162.3260955810547, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.7946152091026306, "rewards/margins": 4.348233222961426, "rewards/rejected": -5.142848491668701, "step": 7585 }, { "epoch": 1.68, "learning_rate": 9.455520737846757e-06, "logits/chosen": -1.3407028913497925, "logits/rejected": -1.13998281955719, "logps/chosen": -144.1477508544922, "logps/rejected": -221.89703369140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 5.4898834228515625, "rewards/margins": 8.187525749206543, "rewards/rejected": -2.6976425647735596, "step": 7586 }, { "epoch": 1.68, "learning_rate": 9.454707095178304e-06, "logits/chosen": -1.3611726760864258, "logits/rejected": -1.3843392133712769, "logps/chosen": -142.96615600585938, "logps/rejected": -115.67011260986328, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 2.1225311756134033, "rewards/margins": 6.408695220947266, "rewards/rejected": -4.286164283752441, "step": 7587 }, { "epoch": 1.68, "learning_rate": 9.453892880090696e-06, "logits/chosen": -1.5296677350997925, "logits/rejected": -1.4786003828048706, "logps/chosen": -102.10724639892578, "logps/rejected": -174.30630493164062, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -2.140361785888672, "rewards/margins": 4.512012481689453, "rewards/rejected": -6.652374267578125, "step": 7588 }, { "epoch": 1.68, "learning_rate": 9.45307809268856e-06, "logits/chosen": -1.333373785018921, "logits/rejected": -1.36662757396698, "logps/chosen": -212.918212890625, "logps/rejected": -299.18975830078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.1122559309005737, "rewards/margins": 15.809483528137207, "rewards/rejected": -16.92173957824707, "step": 7589 }, { "epoch": 1.68, "learning_rate": 9.452262733076594e-06, "logits/chosen": -1.7393479347229004, "logits/rejected": -1.7904943227767944, "logps/chosen": -235.19931030273438, "logps/rejected": -203.54718017578125, "loss": 0.2177, "rewards/accuracies": 1.0, "rewards/chosen": -1.8607361316680908, "rewards/margins": 0.6992263793945312, "rewards/rejected": -2.559962511062622, "step": 7590 }, { "epoch": 1.68, "learning_rate": 9.45144680135957e-06, "logits/chosen": -1.4287320375442505, "logits/rejected": -1.4287320375442505, "logps/chosen": -215.22238159179688, "logps/rejected": -215.22238159179688, "loss": 0.3487, "rewards/accuracies": 0.0, "rewards/chosen": -2.9031999111175537, "rewards/margins": 0.0, "rewards/rejected": -2.9031999111175537, "step": 7591 }, { "epoch": 1.68, "learning_rate": 9.450630297642334e-06, "logits/chosen": -1.8578592538833618, "logits/rejected": -1.8494194746017456, "logps/chosen": -104.93455505371094, "logps/rejected": -137.54037475585938, "loss": 0.0497, "rewards/accuracies": 1.0, "rewards/chosen": -2.171069383621216, "rewards/margins": 2.270921468734741, "rewards/rejected": -4.441990852355957, "step": 7592 }, { "epoch": 1.68, "learning_rate": 9.449813222029802e-06, "logits/chosen": -1.459978461265564, "logits/rejected": -1.4353700876235962, "logps/chosen": -109.8489990234375, "logps/rejected": -240.15969848632812, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.188495635986328, "rewards/margins": 4.236721038818359, "rewards/rejected": -6.4252166748046875, "step": 7593 }, { "epoch": 1.68, "learning_rate": 9.448995574626969e-06, "logits/chosen": -1.8696526288986206, "logits/rejected": -1.86781644821167, "logps/chosen": -163.59283447265625, "logps/rejected": -216.2332763671875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.7853729724884033, "rewards/margins": 6.743390083312988, "rewards/rejected": -8.528762817382812, "step": 7594 }, { "epoch": 1.68, "learning_rate": 9.448177355538899e-06, "logits/chosen": -1.5879067182540894, "logits/rejected": -1.5647425651550293, "logps/chosen": -110.94884490966797, "logps/rejected": -248.41143798828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.4740846157073975, "rewards/margins": 10.061100959777832, "rewards/rejected": -12.535185813903809, "step": 7595 }, { "epoch": 1.68, "learning_rate": 9.447358564870732e-06, "logits/chosen": -1.850639820098877, "logits/rejected": -1.8389356136322021, "logps/chosen": -119.4400634765625, "logps/rejected": -154.26708984375, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -1.7341355085372925, "rewards/margins": 3.0747251510620117, "rewards/rejected": -4.808860778808594, "step": 7596 }, { "epoch": 1.68, "learning_rate": 9.446539202727683e-06, "logits/chosen": -1.6377326250076294, "logits/rejected": -1.6465022563934326, "logps/chosen": -260.55517578125, "logps/rejected": -218.3752899169922, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.44592592120170593, "rewards/margins": 18.173965454101562, "rewards/rejected": -18.619892120361328, "step": 7597 }, { "epoch": 1.68, "learning_rate": 9.445719269215032e-06, "logits/chosen": -2.0342249870300293, "logits/rejected": -2.065152406692505, "logps/chosen": -105.83125305175781, "logps/rejected": -129.5863494873047, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": -1.972834825515747, "rewards/margins": 2.9259727001190186, "rewards/rejected": -4.898807525634766, "step": 7598 }, { "epoch": 1.68, "learning_rate": 9.444898764438144e-06, "logits/chosen": -1.6946581602096558, "logits/rejected": -1.6946581602096558, "logps/chosen": -190.27621459960938, "logps/rejected": -190.27621459960938, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -9.32324504852295, "rewards/margins": 0.0, "rewards/rejected": -9.32324504852295, "step": 7599 }, { "epoch": 1.68, "learning_rate": 9.444077688502451e-06, "logits/chosen": -1.519749402999878, "logits/rejected": -1.3943243026733398, "logps/chosen": -124.04246520996094, "logps/rejected": -158.15953063964844, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -2.216698408126831, "rewards/margins": 3.525663137435913, "rewards/rejected": -5.742361545562744, "step": 7600 }, { "epoch": 1.68, "learning_rate": 9.443256041513457e-06, "logits/chosen": -1.3105900287628174, "logits/rejected": -1.303783893585205, "logps/chosen": -170.46534729003906, "logps/rejected": -188.77896118164062, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -0.7323059439659119, "rewards/margins": 4.1197614669799805, "rewards/rejected": -4.852067470550537, "step": 7601 }, { "epoch": 1.68, "learning_rate": 9.442433823576741e-06, "logits/chosen": -1.8876155614852905, "logits/rejected": -1.2361208200454712, "logps/chosen": -182.29664611816406, "logps/rejected": -748.92822265625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.35082244873046875, "rewards/margins": 59.28606033325195, "rewards/rejected": -58.935237884521484, "step": 7602 }, { "epoch": 1.68, "learning_rate": 9.441611034797961e-06, "logits/chosen": -1.7815715074539185, "logits/rejected": -1.7038947343826294, "logps/chosen": -141.10089111328125, "logps/rejected": -195.41412353515625, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -3.9696640968322754, "rewards/margins": 3.6414389610290527, "rewards/rejected": -7.611103057861328, "step": 7603 }, { "epoch": 1.68, "learning_rate": 9.44078767528284e-06, "logits/chosen": -1.348289966583252, "logits/rejected": -1.2739343643188477, "logps/chosen": -172.46774291992188, "logps/rejected": -233.32337951660156, "loss": 0.1556, "rewards/accuracies": 1.0, "rewards/chosen": -4.191110134124756, "rewards/margins": 5.570122241973877, "rewards/rejected": -9.761232376098633, "step": 7604 }, { "epoch": 1.68, "learning_rate": 9.439963745137177e-06, "logits/chosen": -1.480907917022705, "logits/rejected": -1.4903663396835327, "logps/chosen": -113.52926635742188, "logps/rejected": -134.98370361328125, "loss": 0.349, "rewards/accuracies": 0.0, "rewards/chosen": -1.8446930646896362, "rewards/margins": -0.00564885139465332, "rewards/rejected": -1.839044213294983, "step": 7605 }, { "epoch": 1.68, "learning_rate": 9.439139244466847e-06, "logits/chosen": -1.3451257944107056, "logits/rejected": -1.2477905750274658, "logps/chosen": -180.32275390625, "logps/rejected": -277.4134826660156, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": 8.594497680664062, "rewards/margins": 10.193017959594727, "rewards/rejected": -1.5985199213027954, "step": 7606 }, { "epoch": 1.68, "learning_rate": 9.438314173377796e-06, "logits/chosen": -1.818893313407898, "logits/rejected": -1.706200361251831, "logps/chosen": -150.9530029296875, "logps/rejected": -233.40182495117188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.8827087879180908, "rewards/margins": 5.8957672119140625, "rewards/rejected": -7.778476238250732, "step": 7607 }, { "epoch": 1.68, "learning_rate": 9.437488531976042e-06, "logits/chosen": -1.9414054155349731, "logits/rejected": -1.8766419887542725, "logps/chosen": -64.37041473388672, "logps/rejected": -189.34344482421875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.01686859130859375, "rewards/margins": 6.5822672843933105, "rewards/rejected": -6.599135875701904, "step": 7608 }, { "epoch": 1.68, "learning_rate": 9.43666232036768e-06, "logits/chosen": -1.3711286783218384, "logits/rejected": -1.3360753059387207, "logps/chosen": -97.3355712890625, "logps/rejected": -228.61854553222656, "loss": 0.0467, "rewards/accuracies": 1.0, "rewards/chosen": 0.3686233460903168, "rewards/margins": 7.553636074066162, "rewards/rejected": -7.1850128173828125, "step": 7609 }, { "epoch": 1.68, "learning_rate": 9.435835538658873e-06, "logits/chosen": -2.0454611778259277, "logits/rejected": -1.9127222299575806, "logps/chosen": -125.38349151611328, "logps/rejected": -271.42230224609375, "loss": 0.1663, "rewards/accuracies": 1.0, "rewards/chosen": -1.444771647453308, "rewards/margins": 1.2570747137069702, "rewards/rejected": -2.7018463611602783, "step": 7610 }, { "epoch": 1.68, "learning_rate": 9.435008186955866e-06, "logits/chosen": -1.1208789348602295, "logits/rejected": -1.1972980499267578, "logps/chosen": -199.41983032226562, "logps/rejected": -165.76324462890625, "loss": 0.1069, "rewards/accuracies": 1.0, "rewards/chosen": -3.294848680496216, "rewards/margins": 2.24320387840271, "rewards/rejected": -5.538052558898926, "step": 7611 }, { "epoch": 1.68, "learning_rate": 9.434180265364965e-06, "logits/chosen": -1.578738808631897, "logits/rejected": -1.5569266080856323, "logps/chosen": -96.43975830078125, "logps/rejected": -126.92514038085938, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.5426346063613892, "rewards/margins": 4.793097972869873, "rewards/rejected": -5.335732460021973, "step": 7612 }, { "epoch": 1.69, "learning_rate": 9.43335177399256e-06, "logits/chosen": -1.1857181787490845, "logits/rejected": -1.1305599212646484, "logps/chosen": -85.09370422363281, "logps/rejected": -188.09205627441406, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.5405685901641846, "rewards/margins": 7.704631805419922, "rewards/rejected": -9.245200157165527, "step": 7613 }, { "epoch": 1.69, "learning_rate": 9.432522712945111e-06, "logits/chosen": -1.3948894739151, "logits/rejected": -1.3655874729156494, "logps/chosen": -140.73052978515625, "logps/rejected": -226.5968475341797, "loss": 0.3472, "rewards/accuracies": 1.0, "rewards/chosen": -2.1173508167266846, "rewards/margins": 6.646137237548828, "rewards/rejected": -8.763487815856934, "step": 7614 }, { "epoch": 1.69, "learning_rate": 9.43169308232915e-06, "logits/chosen": -1.8275651931762695, "logits/rejected": -1.6811827421188354, "logps/chosen": -240.13442993164062, "logps/rejected": -340.9482421875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.3570007383823395, "rewards/margins": 6.703790187835693, "rewards/rejected": -7.060791015625, "step": 7615 }, { "epoch": 1.69, "learning_rate": 9.430862882251279e-06, "logits/chosen": -1.4962129592895508, "logits/rejected": -1.4820022583007812, "logps/chosen": -187.83099365234375, "logps/rejected": -181.3578338623047, "loss": 0.0663, "rewards/accuracies": 1.0, "rewards/chosen": 1.9925674200057983, "rewards/margins": 4.873600959777832, "rewards/rejected": -2.881033420562744, "step": 7616 }, { "epoch": 1.69, "learning_rate": 9.430032112818182e-06, "logits/chosen": -1.909916639328003, "logits/rejected": -1.8094277381896973, "logps/chosen": -115.60149383544922, "logps/rejected": -266.88763427734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8575820922851562, "rewards/margins": 6.069496154785156, "rewards/rejected": -6.9270782470703125, "step": 7617 }, { "epoch": 1.69, "learning_rate": 9.429200774136603e-06, "logits/chosen": -1.2041981220245361, "logits/rejected": -1.2144739627838135, "logps/chosen": -253.83934020996094, "logps/rejected": -262.080810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.7083114385604858, "rewards/margins": 13.263160705566406, "rewards/rejected": -11.554849624633789, "step": 7618 }, { "epoch": 1.69, "learning_rate": 9.428368866313377e-06, "logits/chosen": -1.4680536985397339, "logits/rejected": -1.3626115322113037, "logps/chosen": -156.69654846191406, "logps/rejected": -205.4776153564453, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 3.071763753890991, "rewards/margins": 7.1640119552612305, "rewards/rejected": -4.092248439788818, "step": 7619 }, { "epoch": 1.69, "learning_rate": 9.427536389455394e-06, "logits/chosen": -1.8258254528045654, "logits/rejected": -1.8387395143508911, "logps/chosen": -106.89372253417969, "logps/rejected": -88.63197326660156, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -2.599011182785034, "rewards/margins": 3.4352290630340576, "rewards/rejected": -6.034240245819092, "step": 7620 }, { "epoch": 1.69, "learning_rate": 9.426703343669631e-06, "logits/chosen": -1.7800854444503784, "logits/rejected": -1.8535178899765015, "logps/chosen": -244.7660675048828, "logps/rejected": -205.9234619140625, "loss": 0.2427, "rewards/accuracies": 1.0, "rewards/chosen": -6.648356914520264, "rewards/margins": 0.5756268501281738, "rewards/rejected": -7.2239837646484375, "step": 7621 }, { "epoch": 1.69, "learning_rate": 9.425869729063129e-06, "logits/chosen": -1.7277960777282715, "logits/rejected": -1.698551893234253, "logps/chosen": -88.4853744506836, "logps/rejected": -125.55204010009766, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": -1.2752525806427002, "rewards/margins": 2.437033176422119, "rewards/rejected": -3.7122857570648193, "step": 7622 }, { "epoch": 1.69, "learning_rate": 9.425035545743005e-06, "logits/chosen": -1.5526931285858154, "logits/rejected": -1.5560195446014404, "logps/chosen": -232.70668029785156, "logps/rejected": -264.196533203125, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.4378403425216675, "rewards/margins": 7.912496566772461, "rewards/rejected": -9.350337028503418, "step": 7623 }, { "epoch": 1.69, "learning_rate": 9.424200793816451e-06, "logits/chosen": -1.5852537155151367, "logits/rejected": -1.498723030090332, "logps/chosen": -160.25900268554688, "logps/rejected": -161.87339782714844, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.8592193722724915, "rewards/margins": 7.473530292510986, "rewards/rejected": -6.6143107414245605, "step": 7624 }, { "epoch": 1.69, "learning_rate": 9.423365473390734e-06, "logits/chosen": -1.5419224500656128, "logits/rejected": -1.5419224500656128, "logps/chosen": -64.53694915771484, "logps/rejected": -64.53694915771484, "loss": 0.4903, "rewards/accuracies": 0.0, "rewards/chosen": -0.7153518795967102, "rewards/margins": 0.0, "rewards/rejected": -0.7153518795967102, "step": 7625 }, { "epoch": 1.69, "learning_rate": 9.422529584573183e-06, "logits/chosen": -1.339145302772522, "logits/rejected": -1.3004820346832275, "logps/chosen": -190.96011352539062, "logps/rejected": -174.3319091796875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.10666809231042862, "rewards/margins": 8.494328498840332, "rewards/rejected": -8.600996971130371, "step": 7626 }, { "epoch": 1.69, "learning_rate": 9.421693127471214e-06, "logits/chosen": -1.758927583694458, "logits/rejected": -1.7991987466812134, "logps/chosen": -217.67050170898438, "logps/rejected": -167.8323211669922, "loss": 0.0692, "rewards/accuracies": 1.0, "rewards/chosen": -3.327496290206909, "rewards/margins": 1.912590742111206, "rewards/rejected": -5.240087032318115, "step": 7627 }, { "epoch": 1.69, "learning_rate": 9.420856102192305e-06, "logits/chosen": -1.9007831811904907, "logits/rejected": -1.8261233568191528, "logps/chosen": -142.8438720703125, "logps/rejected": -232.73898315429688, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": 2.2239136695861816, "rewards/margins": 5.977060317993164, "rewards/rejected": -3.7531464099884033, "step": 7628 }, { "epoch": 1.69, "learning_rate": 9.420018508844017e-06, "logits/chosen": -1.5800598859786987, "logits/rejected": -1.5555987358093262, "logps/chosen": -220.14077758789062, "logps/rejected": -171.6690673828125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.5122222900390625, "rewards/margins": 6.004386901855469, "rewards/rejected": -8.516609191894531, "step": 7629 }, { "epoch": 1.69, "learning_rate": 9.419180347533976e-06, "logits/chosen": -1.483481526374817, "logits/rejected": -1.413152813911438, "logps/chosen": -93.02874755859375, "logps/rejected": -143.66448974609375, "loss": 0.1592, "rewards/accuracies": 1.0, "rewards/chosen": -0.7470741271972656, "rewards/margins": 0.9933769702911377, "rewards/rejected": -1.7404510974884033, "step": 7630 }, { "epoch": 1.69, "learning_rate": 9.418341618369882e-06, "logits/chosen": -1.4210762977600098, "logits/rejected": -1.3351956605911255, "logps/chosen": -153.66677856445312, "logps/rejected": -246.67698669433594, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -0.2824157774448395, "rewards/margins": 8.086434364318848, "rewards/rejected": -8.368849754333496, "step": 7631 }, { "epoch": 1.69, "learning_rate": 9.417502321459513e-06, "logits/chosen": -1.6841070652008057, "logits/rejected": -1.6900784969329834, "logps/chosen": -133.42095947265625, "logps/rejected": -106.36322784423828, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -4.933482646942139, "rewards/margins": 3.1858248710632324, "rewards/rejected": -8.119307518005371, "step": 7632 }, { "epoch": 1.69, "learning_rate": 9.416662456910714e-06, "logits/chosen": -2.639662027359009, "logits/rejected": -2.661386489868164, "logps/chosen": -180.7811737060547, "logps/rejected": -342.419189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3918777406215668, "rewards/margins": 16.63551139831543, "rewards/rejected": -17.027389526367188, "step": 7633 }, { "epoch": 1.69, "learning_rate": 9.415822024831407e-06, "logits/chosen": -1.5957916975021362, "logits/rejected": -1.6272093057632446, "logps/chosen": -159.25955200195312, "logps/rejected": -94.66046905517578, "loss": 0.4237, "rewards/accuracies": 0.0, "rewards/chosen": -6.408041477203369, "rewards/margins": -0.20251750946044922, "rewards/rejected": -6.20552396774292, "step": 7634 }, { "epoch": 1.69, "learning_rate": 9.414981025329585e-06, "logits/chosen": -1.2144311666488647, "logits/rejected": -1.086281418800354, "logps/chosen": -301.1736145019531, "logps/rejected": -227.2935791015625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 4.723596096038818, "rewards/margins": 6.581912040710449, "rewards/rejected": -1.8583160638809204, "step": 7635 }, { "epoch": 1.69, "learning_rate": 9.414139458513316e-06, "logits/chosen": -1.3447362184524536, "logits/rejected": -1.417358160018921, "logps/chosen": -235.83090209960938, "logps/rejected": -172.22637939453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.686537265777588, "rewards/margins": 9.138452529907227, "rewards/rejected": -13.824989318847656, "step": 7636 }, { "epoch": 1.69, "learning_rate": 9.413297324490736e-06, "logits/chosen": -1.441998839378357, "logits/rejected": -1.441998839378357, "logps/chosen": -110.32038116455078, "logps/rejected": -110.32038116455078, "loss": 0.3614, "rewards/accuracies": 0.0, "rewards/chosen": -1.8846420049667358, "rewards/margins": 0.0, "rewards/rejected": -1.8846420049667358, "step": 7637 }, { "epoch": 1.69, "learning_rate": 9.41245462337006e-06, "logits/chosen": -1.8958113193511963, "logits/rejected": -1.711744785308838, "logps/chosen": -160.3477020263672, "logps/rejected": -285.0693664550781, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.4758011102676392, "rewards/margins": 7.587570667266846, "rewards/rejected": -9.063371658325195, "step": 7638 }, { "epoch": 1.69, "learning_rate": 9.41161135525957e-06, "logits/chosen": -1.2479528188705444, "logits/rejected": -1.1931482553482056, "logps/chosen": -191.04505920410156, "logps/rejected": -183.22323608398438, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.2079651355743408, "rewards/margins": 6.8257646560668945, "rewards/rejected": -8.033729553222656, "step": 7639 }, { "epoch": 1.69, "learning_rate": 9.410767520267629e-06, "logits/chosen": -1.4424021244049072, "logits/rejected": -1.443792462348938, "logps/chosen": -95.75108337402344, "logps/rejected": -114.56690979003906, "loss": 0.1091, "rewards/accuracies": 1.0, "rewards/chosen": -2.91107177734375, "rewards/margins": 1.418229579925537, "rewards/rejected": -4.329301357269287, "step": 7640 }, { "epoch": 1.69, "learning_rate": 9.409923118502665e-06, "logits/chosen": -1.6232846975326538, "logits/rejected": -1.7145150899887085, "logps/chosen": -234.5439453125, "logps/rejected": -152.38526916503906, "loss": 0.2247, "rewards/accuracies": 1.0, "rewards/chosen": -2.1323487758636475, "rewards/margins": 5.879696846008301, "rewards/rejected": -8.012045860290527, "step": 7641 }, { "epoch": 1.69, "learning_rate": 9.40907815007318e-06, "logits/chosen": -1.5148569345474243, "logits/rejected": -1.4407832622528076, "logps/chosen": -176.72732543945312, "logps/rejected": -299.5679931640625, "loss": 0.73, "rewards/accuracies": 0.0, "rewards/chosen": -4.189885139465332, "rewards/margins": -1.1913652420043945, "rewards/rejected": -2.9985198974609375, "step": 7642 }, { "epoch": 1.69, "learning_rate": 9.408232615087752e-06, "logits/chosen": -1.352034330368042, "logits/rejected": -1.2473207712173462, "logps/chosen": -125.2620849609375, "logps/rejected": -199.66795349121094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2561371326446533, "rewards/margins": 8.743318557739258, "rewards/rejected": -9.999455451965332, "step": 7643 }, { "epoch": 1.69, "learning_rate": 9.40738651365503e-06, "logits/chosen": -1.6272724866867065, "logits/rejected": -1.4718677997589111, "logps/chosen": -95.63582611083984, "logps/rejected": -255.8292236328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.8772361874580383, "rewards/margins": 7.402555465698242, "rewards/rejected": -8.279791831970215, "step": 7644 }, { "epoch": 1.69, "learning_rate": 9.406539845883736e-06, "logits/chosen": -1.7360477447509766, "logits/rejected": -1.7354522943496704, "logps/chosen": -109.4857177734375, "logps/rejected": -136.46490478515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.3218400478363037, "rewards/margins": 7.644351005554199, "rewards/rejected": -9.966191291809082, "step": 7645 }, { "epoch": 1.69, "learning_rate": 9.405692611882666e-06, "logits/chosen": -1.6240556240081787, "logits/rejected": -1.6098130941390991, "logps/chosen": -170.3739013671875, "logps/rejected": -183.4754180908203, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -0.7444503903388977, "rewards/margins": 3.163752794265747, "rewards/rejected": -3.908203125, "step": 7646 }, { "epoch": 1.69, "learning_rate": 9.404844811760685e-06, "logits/chosen": -1.4173349142074585, "logits/rejected": -1.3955625295639038, "logps/chosen": -195.50572204589844, "logps/rejected": -173.9014892578125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -2.072770833969116, "rewards/margins": 4.538132667541504, "rewards/rejected": -6.610903263092041, "step": 7647 }, { "epoch": 1.69, "learning_rate": 9.403996445626735e-06, "logits/chosen": -1.7513259649276733, "logits/rejected": -1.783151388168335, "logps/chosen": -185.79029846191406, "logps/rejected": -153.93759155273438, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.9173049926757812, "rewards/margins": 5.214205741882324, "rewards/rejected": -8.131510734558105, "step": 7648 }, { "epoch": 1.69, "learning_rate": 9.403147513589829e-06, "logits/chosen": -1.8366059064865112, "logits/rejected": -1.8129007816314697, "logps/chosen": -154.3038330078125, "logps/rejected": -193.2557373046875, "loss": 0.0782, "rewards/accuracies": 1.0, "rewards/chosen": -4.97298002243042, "rewards/margins": 1.7763257026672363, "rewards/rejected": -6.749305725097656, "step": 7649 }, { "epoch": 1.69, "learning_rate": 9.402298015759052e-06, "logits/chosen": -1.7661795616149902, "logits/rejected": -1.8100508451461792, "logps/chosen": -243.69081115722656, "logps/rejected": -168.2342529296875, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -2.0968902111053467, "rewards/margins": 5.85225772857666, "rewards/rejected": -7.949147701263428, "step": 7650 }, { "epoch": 1.69, "learning_rate": 9.401447952243563e-06, "logits/chosen": -1.8105292320251465, "logits/rejected": -1.7482765913009644, "logps/chosen": -74.60747528076172, "logps/rejected": -202.76895141601562, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.40107041597366333, "rewards/margins": 7.900135040283203, "rewards/rejected": -8.3012056350708, "step": 7651 }, { "epoch": 1.69, "learning_rate": 9.400597323152591e-06, "logits/chosen": -1.7794069051742554, "logits/rejected": -1.765594244003296, "logps/chosen": -112.42762756347656, "logps/rejected": -239.10922241210938, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.06141052395105362, "rewards/margins": 5.720026016235352, "rewards/rejected": -5.781436443328857, "step": 7652 }, { "epoch": 1.69, "learning_rate": 9.399746128595444e-06, "logits/chosen": -1.6639093160629272, "logits/rejected": -1.522695779800415, "logps/chosen": -169.41619873046875, "logps/rejected": -370.59576416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 4.600386142730713, "rewards/margins": 11.999971389770508, "rewards/rejected": -7.399585247039795, "step": 7653 }, { "epoch": 1.69, "learning_rate": 9.398894368681496e-06, "logits/chosen": -1.7352014780044556, "logits/rejected": -1.6521193981170654, "logps/chosen": -95.63716888427734, "logps/rejected": -136.61012268066406, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.1403931379318237, "rewards/margins": 4.236170768737793, "rewards/rejected": -5.376564025878906, "step": 7654 }, { "epoch": 1.69, "learning_rate": 9.398042043520197e-06, "logits/chosen": -1.8587793111801147, "logits/rejected": -1.892994999885559, "logps/chosen": -89.91650390625, "logps/rejected": -115.105712890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.7043022513389587, "rewards/margins": 8.844597816467285, "rewards/rejected": -8.14029598236084, "step": 7655 }, { "epoch": 1.69, "learning_rate": 9.397189153221067e-06, "logits/chosen": -1.6042981147766113, "logits/rejected": -1.6401817798614502, "logps/chosen": -91.75250244140625, "logps/rejected": -106.38334655761719, "loss": 0.3766, "rewards/accuracies": 0.0, "rewards/chosen": -0.798510730266571, "rewards/margins": -0.0021522045135498047, "rewards/rejected": -0.7963585257530212, "step": 7656 }, { "epoch": 1.69, "learning_rate": 9.396335697893702e-06, "logits/chosen": -1.817950963973999, "logits/rejected": -1.7027007341384888, "logps/chosen": -115.33920288085938, "logps/rejected": -248.98062133789062, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.2950286865234375, "rewards/margins": 6.4992876052856445, "rewards/rejected": -6.204258918762207, "step": 7657 }, { "epoch": 1.69, "learning_rate": 9.395481677647767e-06, "logits/chosen": -1.2998533248901367, "logits/rejected": -1.2218749523162842, "logps/chosen": -213.31314086914062, "logps/rejected": -298.27191162109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.6909730434417725, "rewards/margins": 9.281991004943848, "rewards/rejected": -11.9729642868042, "step": 7658 }, { "epoch": 1.7, "learning_rate": 9.394627092593002e-06, "logits/chosen": -1.3747313022613525, "logits/rejected": -1.3871147632598877, "logps/chosen": -114.77742767333984, "logps/rejected": -131.8329620361328, "loss": 0.1434, "rewards/accuracies": 1.0, "rewards/chosen": -3.0018365383148193, "rewards/margins": 1.102484941482544, "rewards/rejected": -4.104321479797363, "step": 7659 }, { "epoch": 1.7, "learning_rate": 9.393771942839223e-06, "logits/chosen": -1.6950241327285767, "logits/rejected": -1.5474042892456055, "logps/chosen": -213.03268432617188, "logps/rejected": -314.1370544433594, "loss": 0.0277, "rewards/accuracies": 1.0, "rewards/chosen": 0.3516128659248352, "rewards/margins": 2.8761672973632812, "rewards/rejected": -2.524554491043091, "step": 7660 }, { "epoch": 1.7, "learning_rate": 9.392916228496309e-06, "logits/chosen": -1.5668734312057495, "logits/rejected": -1.446797251701355, "logps/chosen": -161.26231384277344, "logps/rejected": -240.9043731689453, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 1.730676293373108, "rewards/margins": 11.83970832824707, "rewards/rejected": -10.109031677246094, "step": 7661 }, { "epoch": 1.7, "learning_rate": 9.392059949674222e-06, "logits/chosen": -1.686305284500122, "logits/rejected": -1.686305284500122, "logps/chosen": -166.29995727539062, "logps/rejected": -166.29995727539062, "loss": 0.3469, "rewards/accuracies": 0.0, "rewards/chosen": -8.074457168579102, "rewards/margins": 0.0, "rewards/rejected": -8.074457168579102, "step": 7662 }, { "epoch": 1.7, "learning_rate": 9.39120310648299e-06, "logits/chosen": -1.0846431255340576, "logits/rejected": -1.0846431255340576, "logps/chosen": -455.2088317871094, "logps/rejected": -455.2088317871094, "loss": 0.3506, "rewards/accuracies": 0.0, "rewards/chosen": -21.36485481262207, "rewards/margins": 0.0, "rewards/rejected": -21.36485481262207, "step": 7663 }, { "epoch": 1.7, "learning_rate": 9.390345699032712e-06, "logits/chosen": -1.4011561870574951, "logits/rejected": -1.4011561870574951, "logps/chosen": -68.111572265625, "logps/rejected": -68.111572265625, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -0.5801002383232117, "rewards/margins": 0.0, "rewards/rejected": -0.5801002383232117, "step": 7664 }, { "epoch": 1.7, "learning_rate": 9.389487727433569e-06, "logits/chosen": -1.4853483438491821, "logits/rejected": -1.4944267272949219, "logps/chosen": -63.9619140625, "logps/rejected": -100.80396270751953, "loss": 0.0685, "rewards/accuracies": 1.0, "rewards/chosen": -1.9064396619796753, "rewards/margins": 5.5230937004089355, "rewards/rejected": -7.4295334815979, "step": 7665 }, { "epoch": 1.7, "learning_rate": 9.388629191795804e-06, "logits/chosen": -1.6466745138168335, "logits/rejected": -1.6466745138168335, "logps/chosen": -133.61387634277344, "logps/rejected": -133.61387634277344, "loss": 0.3488, "rewards/accuracies": 0.0, "rewards/chosen": -2.4620652198791504, "rewards/margins": 0.0, "rewards/rejected": -2.4620652198791504, "step": 7666 }, { "epoch": 1.7, "learning_rate": 9.387770092229736e-06, "logits/chosen": -1.8140318393707275, "logits/rejected": -1.8140318393707275, "logps/chosen": -233.91152954101562, "logps/rejected": -233.91152954101562, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -12.844764709472656, "rewards/margins": 0.0, "rewards/rejected": -12.844764709472656, "step": 7667 }, { "epoch": 1.7, "learning_rate": 9.386910428845762e-06, "logits/chosen": -1.786840796470642, "logits/rejected": -1.781870722770691, "logps/chosen": -208.58033752441406, "logps/rejected": -209.5364990234375, "loss": 0.3376, "rewards/accuracies": 1.0, "rewards/chosen": -1.3437576293945312, "rewards/margins": 3.384939670562744, "rewards/rejected": -4.728697299957275, "step": 7668 }, { "epoch": 1.7, "learning_rate": 9.386050201754342e-06, "logits/chosen": -1.6786847114562988, "logits/rejected": -1.7252970933914185, "logps/chosen": -172.35430908203125, "logps/rejected": -138.18063354492188, "loss": 0.4492, "rewards/accuracies": 0.0, "rewards/chosen": -4.288327217102051, "rewards/margins": -0.37534189224243164, "rewards/rejected": -3.912985324859619, "step": 7669 }, { "epoch": 1.7, "learning_rate": 9.385189411066014e-06, "logits/chosen": -1.5916701555252075, "logits/rejected": -1.5916701555252075, "logps/chosen": -116.33648681640625, "logps/rejected": -116.33648681640625, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -3.8524551391601562, "rewards/margins": 0.0, "rewards/rejected": -3.8524551391601562, "step": 7670 }, { "epoch": 1.7, "learning_rate": 9.384328056891389e-06, "logits/chosen": -1.6423346996307373, "logits/rejected": -1.6479295492172241, "logps/chosen": -93.94304656982422, "logps/rejected": -83.09009552001953, "loss": 0.2506, "rewards/accuracies": 1.0, "rewards/chosen": -3.3888041973114014, "rewards/margins": 0.4327728748321533, "rewards/rejected": -3.8215770721435547, "step": 7671 }, { "epoch": 1.7, "learning_rate": 9.38346613934115e-06, "logits/chosen": -1.5629122257232666, "logits/rejected": -1.536136269569397, "logps/chosen": -131.80767822265625, "logps/rejected": -222.85702514648438, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.4361350536346436, "rewards/margins": 5.957399368286133, "rewards/rejected": -8.393534660339355, "step": 7672 }, { "epoch": 1.7, "learning_rate": 9.382603658526048e-06, "logits/chosen": -1.9178478717803955, "logits/rejected": -1.914953351020813, "logps/chosen": -161.77761840820312, "logps/rejected": -152.03182983398438, "loss": 0.1062, "rewards/accuracies": 1.0, "rewards/chosen": -4.073378086090088, "rewards/margins": 1.8979597091674805, "rewards/rejected": -5.971337795257568, "step": 7673 }, { "epoch": 1.7, "learning_rate": 9.381740614556911e-06, "logits/chosen": -1.3523896932601929, "logits/rejected": -1.3523896932601929, "logps/chosen": -161.69027709960938, "logps/rejected": -161.69027709960938, "loss": 0.3687, "rewards/accuracies": 0.0, "rewards/chosen": -5.055456638336182, "rewards/margins": 0.0, "rewards/rejected": -5.055456638336182, "step": 7674 }, { "epoch": 1.7, "learning_rate": 9.38087700754464e-06, "logits/chosen": -1.6985430717468262, "logits/rejected": -1.7240766286849976, "logps/chosen": -148.2441864013672, "logps/rejected": -205.53018188476562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 4.246853828430176, "rewards/margins": 11.704446792602539, "rewards/rejected": -7.457592964172363, "step": 7675 }, { "epoch": 1.7, "learning_rate": 9.380012837600205e-06, "logits/chosen": -1.38193941116333, "logits/rejected": -1.2633624076843262, "logps/chosen": -181.9593048095703, "logps/rejected": -371.28619384765625, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -2.392530918121338, "rewards/margins": 12.993099212646484, "rewards/rejected": -15.385629653930664, "step": 7676 }, { "epoch": 1.7, "learning_rate": 9.379148104834648e-06, "logits/chosen": -1.3250328302383423, "logits/rejected": -1.3100558519363403, "logps/chosen": -123.78691864013672, "logps/rejected": -128.4160614013672, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -4.218456268310547, "rewards/margins": 3.1261749267578125, "rewards/rejected": -7.344631195068359, "step": 7677 }, { "epoch": 1.7, "learning_rate": 9.378282809359087e-06, "logits/chosen": -1.7028096914291382, "logits/rejected": -1.63202965259552, "logps/chosen": -105.52232360839844, "logps/rejected": -182.9375, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.9576752185821533, "rewards/margins": 4.036222457885742, "rewards/rejected": -5.993897914886475, "step": 7678 }, { "epoch": 1.7, "learning_rate": 9.377416951284712e-06, "logits/chosen": -1.5741702318191528, "logits/rejected": -1.4470815658569336, "logps/chosen": -89.66715240478516, "logps/rejected": -192.30535888671875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": 0.4806724488735199, "rewards/margins": 7.342033386230469, "rewards/rejected": -6.861361026763916, "step": 7679 }, { "epoch": 1.7, "learning_rate": 9.376550530722778e-06, "logits/chosen": -1.250313401222229, "logits/rejected": -1.3469009399414062, "logps/chosen": -137.3396759033203, "logps/rejected": -125.82032775878906, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.9518936276435852, "rewards/margins": 6.117260456085205, "rewards/rejected": -5.1653666496276855, "step": 7680 }, { "epoch": 1.7, "learning_rate": 9.375683547784626e-06, "logits/chosen": -1.8058751821517944, "logits/rejected": -1.818784475326538, "logps/chosen": -121.64161682128906, "logps/rejected": -175.98440551757812, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.3512252867221832, "rewards/margins": 11.907424926757812, "rewards/rejected": -12.258649826049805, "step": 7681 }, { "epoch": 1.7, "learning_rate": 9.374816002581654e-06, "logits/chosen": -1.7064080238342285, "logits/rejected": -1.6240962743759155, "logps/chosen": -150.00904846191406, "logps/rejected": -284.0450134277344, "loss": 0.3758, "rewards/accuracies": 1.0, "rewards/chosen": -3.0132980346679688, "rewards/margins": 2.8097338676452637, "rewards/rejected": -5.823031902313232, "step": 7682 }, { "epoch": 1.7, "learning_rate": 9.373947895225345e-06, "logits/chosen": -1.1445748805999756, "logits/rejected": -1.206709384918213, "logps/chosen": -211.4370880126953, "logps/rejected": -144.47280883789062, "loss": 0.034, "rewards/accuracies": 1.0, "rewards/chosen": -2.974658250808716, "rewards/margins": 2.7037436962127686, "rewards/rejected": -5.678401947021484, "step": 7683 }, { "epoch": 1.7, "learning_rate": 9.373079225827243e-06, "logits/chosen": -1.740658164024353, "logits/rejected": -1.715583086013794, "logps/chosen": -122.65034484863281, "logps/rejected": -218.2188262939453, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.6169395446777344, "rewards/margins": 5.446439743041992, "rewards/rejected": -8.063379287719727, "step": 7684 }, { "epoch": 1.7, "learning_rate": 9.372209994498976e-06, "logits/chosen": -1.7835116386413574, "logits/rejected": -1.7178893089294434, "logps/chosen": -253.48806762695312, "logps/rejected": -419.29180908203125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -2.8074891567230225, "rewards/margins": 10.287322998046875, "rewards/rejected": -13.094812393188477, "step": 7685 }, { "epoch": 1.7, "learning_rate": 9.371340201352234e-06, "logits/chosen": -1.2521699666976929, "logits/rejected": -1.2521699666976929, "logps/chosen": -161.9445343017578, "logps/rejected": -161.9445343017578, "loss": 0.3516, "rewards/accuracies": 0.0, "rewards/chosen": -4.798182964324951, "rewards/margins": 0.0, "rewards/rejected": -4.798182964324951, "step": 7686 }, { "epoch": 1.7, "learning_rate": 9.370469846498784e-06, "logits/chosen": -1.5156151056289673, "logits/rejected": -1.5092459917068481, "logps/chosen": -113.63600158691406, "logps/rejected": -179.236572265625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.8700546026229858, "rewards/margins": 4.582791805267334, "rewards/rejected": -6.452846527099609, "step": 7687 }, { "epoch": 1.7, "learning_rate": 9.369598930050466e-06, "logits/chosen": -1.4949291944503784, "logits/rejected": -1.5286226272583008, "logps/chosen": -50.15542984008789, "logps/rejected": -65.42467498779297, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -1.7323368787765503, "rewards/margins": 3.797700881958008, "rewards/rejected": -5.530037879943848, "step": 7688 }, { "epoch": 1.7, "learning_rate": 9.368727452119188e-06, "logits/chosen": -1.7486317157745361, "logits/rejected": -1.9085566997528076, "logps/chosen": -190.34596252441406, "logps/rejected": -162.56211853027344, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -3.835575819015503, "rewards/margins": 3.9542267322540283, "rewards/rejected": -7.789802551269531, "step": 7689 }, { "epoch": 1.7, "learning_rate": 9.367855412816935e-06, "logits/chosen": -1.62186861038208, "logits/rejected": -1.5878314971923828, "logps/chosen": -139.04364013671875, "logps/rejected": -179.84371948242188, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -3.0642364025115967, "rewards/margins": 4.692140579223633, "rewards/rejected": -7.75637674331665, "step": 7690 }, { "epoch": 1.7, "learning_rate": 9.366982812255764e-06, "logits/chosen": -1.3278917074203491, "logits/rejected": -1.361901044845581, "logps/chosen": -197.88882446289062, "logps/rejected": -172.98486328125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.9041839838027954, "rewards/margins": 5.518960952758789, "rewards/rejected": -6.423144817352295, "step": 7691 }, { "epoch": 1.7, "learning_rate": 9.366109650547798e-06, "logits/chosen": -1.6920415163040161, "logits/rejected": -1.6148324012756348, "logps/chosen": -143.37962341308594, "logps/rejected": -358.0042419433594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7652664184570312, "rewards/margins": 8.071876525878906, "rewards/rejected": -8.837142944335938, "step": 7692 }, { "epoch": 1.7, "learning_rate": 9.365235927805237e-06, "logits/chosen": -1.4691115617752075, "logits/rejected": -1.3853141069412231, "logps/chosen": -101.45024108886719, "logps/rejected": -346.95849609375, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -1.0167922973632812, "rewards/margins": 16.865880966186523, "rewards/rejected": -17.882673263549805, "step": 7693 }, { "epoch": 1.7, "learning_rate": 9.364361644140353e-06, "logits/chosen": -1.3601462841033936, "logits/rejected": -1.3193999528884888, "logps/chosen": -206.77915954589844, "logps/rejected": -312.5123291015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5522217154502869, "rewards/margins": 8.807199478149414, "rewards/rejected": -9.359420776367188, "step": 7694 }, { "epoch": 1.7, "learning_rate": 9.36348679966549e-06, "logits/chosen": -1.4034652709960938, "logits/rejected": -1.4034652709960938, "logps/chosen": -136.72520446777344, "logps/rejected": -136.72520446777344, "loss": 0.3471, "rewards/accuracies": 0.0, "rewards/chosen": -5.082160949707031, "rewards/margins": 0.0, "rewards/rejected": -5.082160949707031, "step": 7695 }, { "epoch": 1.7, "learning_rate": 9.362611394493063e-06, "logits/chosen": -1.927781105041504, "logits/rejected": -1.869850516319275, "logps/chosen": -132.53045654296875, "logps/rejected": -166.0568084716797, "loss": 0.0493, "rewards/accuracies": 1.0, "rewards/chosen": -4.549229621887207, "rewards/margins": 2.2674078941345215, "rewards/rejected": -6.8166375160217285, "step": 7696 }, { "epoch": 1.7, "learning_rate": 9.361735428735558e-06, "logits/chosen": -1.2890175580978394, "logits/rejected": -1.234763741493225, "logps/chosen": -96.25421905517578, "logps/rejected": -209.88711547851562, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/chosen": -0.02489166334271431, "rewards/margins": 2.8661789894104004, "rewards/rejected": -2.891070604324341, "step": 7697 }, { "epoch": 1.7, "learning_rate": 9.360858902505539e-06, "logits/chosen": -1.4914350509643555, "logits/rejected": -1.4297313690185547, "logps/chosen": -215.0438232421875, "logps/rejected": -262.5396728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9039703607559204, "rewards/margins": 11.026721000671387, "rewards/rejected": -12.930691719055176, "step": 7698 }, { "epoch": 1.7, "learning_rate": 9.359981815915632e-06, "logits/chosen": -1.599531650543213, "logits/rejected": -1.6588062047958374, "logps/chosen": -81.85208129882812, "logps/rejected": -116.40027618408203, "loss": 0.0414, "rewards/accuracies": 1.0, "rewards/chosen": -0.4249923825263977, "rewards/margins": 2.449249267578125, "rewards/rejected": -2.874241590499878, "step": 7699 }, { "epoch": 1.7, "learning_rate": 9.359104169078541e-06, "logits/chosen": -1.6738730669021606, "logits/rejected": -1.5376977920532227, "logps/chosen": -166.7608642578125, "logps/rejected": -347.9534606933594, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.79630446434021, "rewards/margins": 9.593167304992676, "rewards/rejected": -13.389472007751465, "step": 7700 }, { "epoch": 1.7, "learning_rate": 9.358225962107047e-06, "logits/chosen": -1.522556185722351, "logits/rejected": -1.5883461236953735, "logps/chosen": -234.63967895507812, "logps/rejected": -95.50646209716797, "loss": 0.8218, "rewards/accuracies": 0.0, "rewards/chosen": -4.252926826477051, "rewards/margins": -1.4273674488067627, "rewards/rejected": -2.825559377670288, "step": 7701 }, { "epoch": 1.7, "learning_rate": 9.35734719511399e-06, "logits/chosen": -1.8101063966751099, "logits/rejected": -1.795160174369812, "logps/chosen": -149.40402221679688, "logps/rejected": -132.559326171875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.5268996953964233, "rewards/margins": 4.116598606109619, "rewards/rejected": -5.643498420715332, "step": 7702 }, { "epoch": 1.7, "learning_rate": 9.356467868212295e-06, "logits/chosen": -1.2975257635116577, "logits/rejected": -1.298983097076416, "logps/chosen": -136.31362915039062, "logps/rejected": -78.26734924316406, "loss": 0.3193, "rewards/accuracies": 1.0, "rewards/chosen": -4.074272155761719, "rewards/margins": 0.11223268508911133, "rewards/rejected": -4.18650484085083, "step": 7703 }, { "epoch": 1.71, "learning_rate": 9.35558798151495e-06, "logits/chosen": -1.6940199136734009, "logits/rejected": -1.6940199136734009, "logps/chosen": -112.11933898925781, "logps/rejected": -112.11933898925781, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -1.7511428594589233, "rewards/margins": 0.0, "rewards/rejected": -1.7511428594589233, "step": 7704 }, { "epoch": 1.71, "learning_rate": 9.354707535135022e-06, "logits/chosen": -1.729584813117981, "logits/rejected": -1.624102234840393, "logps/chosen": -116.09852600097656, "logps/rejected": -325.8992004394531, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.5052826404571533, "rewards/margins": 10.31219482421875, "rewards/rejected": -12.817477226257324, "step": 7705 }, { "epoch": 1.71, "learning_rate": 9.353826529185644e-06, "logits/chosen": -1.5362401008605957, "logits/rejected": -1.5602411031723022, "logps/chosen": -65.80116271972656, "logps/rejected": -60.86094665527344, "loss": 0.6325, "rewards/accuracies": 1.0, "rewards/chosen": -5.007717132568359, "rewards/margins": 0.25937414169311523, "rewards/rejected": -5.267091274261475, "step": 7706 }, { "epoch": 1.71, "learning_rate": 9.352944963780024e-06, "logits/chosen": -1.7250666618347168, "logits/rejected": -1.6950507164001465, "logps/chosen": -87.75579833984375, "logps/rejected": -109.42111206054688, "loss": 0.3294, "rewards/accuracies": 1.0, "rewards/chosen": -2.4326558113098145, "rewards/margins": 0.09229850769042969, "rewards/rejected": -2.524954319000244, "step": 7707 }, { "epoch": 1.71, "learning_rate": 9.352062839031438e-06, "logits/chosen": -1.9524595737457275, "logits/rejected": -1.9149645566940308, "logps/chosen": -96.8705062866211, "logps/rejected": -140.46432495117188, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.8192031979560852, "rewards/margins": 4.765247821807861, "rewards/rejected": -5.584451198577881, "step": 7708 }, { "epoch": 1.71, "learning_rate": 9.351180155053242e-06, "logits/chosen": -1.4619770050048828, "logits/rejected": -1.4499889612197876, "logps/chosen": -126.29194641113281, "logps/rejected": -196.1809539794922, "loss": 0.1409, "rewards/accuracies": 1.0, "rewards/chosen": -2.471820116043091, "rewards/margins": 7.971837043762207, "rewards/rejected": -10.443656921386719, "step": 7709 }, { "epoch": 1.71, "learning_rate": 9.350296911958854e-06, "logits/chosen": -1.751466989517212, "logits/rejected": -1.6595265865325928, "logps/chosen": -143.7957763671875, "logps/rejected": -232.83657836914062, "loss": 0.2229, "rewards/accuracies": 1.0, "rewards/chosen": -0.686993420124054, "rewards/margins": 5.558935642242432, "rewards/rejected": -6.24592924118042, "step": 7710 }, { "epoch": 1.71, "learning_rate": 9.34941310986177e-06, "logits/chosen": -1.8558378219604492, "logits/rejected": -1.796684980392456, "logps/chosen": -113.18557739257812, "logps/rejected": -169.06668090820312, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": -1.3700103759765625, "rewards/margins": 2.6875977516174316, "rewards/rejected": -4.057608127593994, "step": 7711 }, { "epoch": 1.71, "learning_rate": 9.348528748875558e-06, "logits/chosen": -1.3188576698303223, "logits/rejected": -1.3102829456329346, "logps/chosen": -144.0923309326172, "logps/rejected": -198.5093536376953, "loss": 0.0391, "rewards/accuracies": 1.0, "rewards/chosen": -0.05747375637292862, "rewards/margins": 2.5189011096954346, "rewards/rejected": -2.5763747692108154, "step": 7712 }, { "epoch": 1.71, "learning_rate": 9.347643829113856e-06, "logits/chosen": -1.700801134109497, "logits/rejected": -1.7100257873535156, "logps/chosen": -129.88754272460938, "logps/rejected": -174.73721313476562, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.9991867542266846, "rewards/margins": 5.7163896560668945, "rewards/rejected": -7.715576171875, "step": 7713 }, { "epoch": 1.71, "learning_rate": 9.346758350690373e-06, "logits/chosen": -1.4994804859161377, "logits/rejected": -1.3864190578460693, "logps/chosen": -143.81275939941406, "logps/rejected": -321.1945495605469, "loss": 0.0346, "rewards/accuracies": 1.0, "rewards/chosen": -6.140391826629639, "rewards/margins": 5.536681652069092, "rewards/rejected": -11.67707347869873, "step": 7714 }, { "epoch": 1.71, "learning_rate": 9.34587231371889e-06, "logits/chosen": -1.5284128189086914, "logits/rejected": -1.531079649925232, "logps/chosen": -228.28536987304688, "logps/rejected": -272.16339111328125, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": 0.9378494620323181, "rewards/margins": 3.6957154273986816, "rewards/rejected": -2.7578659057617188, "step": 7715 }, { "epoch": 1.71, "learning_rate": 9.344985718313264e-06, "logits/chosen": -1.9158638715744019, "logits/rejected": -1.298512578010559, "logps/chosen": -74.58978271484375, "logps/rejected": -214.1707305908203, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -1.1917155981063843, "rewards/margins": 7.2082037925720215, "rewards/rejected": -8.399919509887695, "step": 7716 }, { "epoch": 1.71, "learning_rate": 9.344098564587418e-06, "logits/chosen": -1.54095458984375, "logits/rejected": -1.5894795656204224, "logps/chosen": -169.8348846435547, "logps/rejected": -102.06663513183594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.247201681137085, "rewards/margins": 9.872575759887695, "rewards/rejected": -7.625373840332031, "step": 7717 }, { "epoch": 1.71, "learning_rate": 9.343210852655348e-06, "logits/chosen": -1.549146056175232, "logits/rejected": -1.5585851669311523, "logps/chosen": -232.1337432861328, "logps/rejected": -148.9194793701172, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.341683954000473, "rewards/margins": 9.46107006072998, "rewards/rejected": -9.802754402160645, "step": 7718 }, { "epoch": 1.71, "learning_rate": 9.342322582631125e-06, "logits/chosen": -1.8840874433517456, "logits/rejected": -1.8840874433517456, "logps/chosen": -141.66526794433594, "logps/rejected": -141.66526794433594, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -11.26904582977295, "rewards/margins": 0.0, "rewards/rejected": -11.26904582977295, "step": 7719 }, { "epoch": 1.71, "learning_rate": 9.341433754628888e-06, "logits/chosen": -1.430592656135559, "logits/rejected": -1.4100854396820068, "logps/chosen": -101.10305786132812, "logps/rejected": -165.74363708496094, "loss": 0.3061, "rewards/accuracies": 1.0, "rewards/chosen": -1.996496558189392, "rewards/margins": 7.307811260223389, "rewards/rejected": -9.30430793762207, "step": 7720 }, { "epoch": 1.71, "learning_rate": 9.340544368762851e-06, "logits/chosen": -1.87952721118927, "logits/rejected": -1.6182842254638672, "logps/chosen": -174.96278381347656, "logps/rejected": -267.0072021484375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 4.523484706878662, "rewards/margins": 7.186384201049805, "rewards/rejected": -2.6628997325897217, "step": 7721 }, { "epoch": 1.71, "learning_rate": 9.339654425147297e-06, "logits/chosen": -1.2627593278884888, "logits/rejected": -1.205628514289856, "logps/chosen": -126.03541564941406, "logps/rejected": -277.0172119140625, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -4.8940277099609375, "rewards/margins": 6.740878105163574, "rewards/rejected": -11.634905815124512, "step": 7722 }, { "epoch": 1.71, "learning_rate": 9.338763923896583e-06, "logits/chosen": -1.5989700555801392, "logits/rejected": -1.57181715965271, "logps/chosen": -301.90631103515625, "logps/rejected": -275.5045166015625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -3.8407959938049316, "rewards/margins": 4.150590419769287, "rewards/rejected": -7.991386413574219, "step": 7723 }, { "epoch": 1.71, "learning_rate": 9.337872865125133e-06, "logits/chosen": -1.545960783958435, "logits/rejected": -1.4807143211364746, "logps/chosen": -269.51031494140625, "logps/rejected": -271.36572265625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.06104736402630806, "rewards/margins": 7.9336838722229, "rewards/rejected": -7.872636318206787, "step": 7724 }, { "epoch": 1.71, "learning_rate": 9.336981248947447e-06, "logits/chosen": -1.3965120315551758, "logits/rejected": -1.3965120315551758, "logps/chosen": -118.98394012451172, "logps/rejected": -118.98394012451172, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -2.7293860912323, "rewards/margins": 0.0, "rewards/rejected": -2.7293860912323, "step": 7725 }, { "epoch": 1.71, "learning_rate": 9.336089075478098e-06, "logits/chosen": -1.2250691652297974, "logits/rejected": -1.2746223211288452, "logps/chosen": -205.30316162109375, "logps/rejected": -142.1277618408203, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": -0.861114501953125, "rewards/margins": 3.08746337890625, "rewards/rejected": -3.948577880859375, "step": 7726 }, { "epoch": 1.71, "learning_rate": 9.335196344831727e-06, "logits/chosen": -1.828190803527832, "logits/rejected": -1.7160348892211914, "logps/chosen": -169.32119750976562, "logps/rejected": -234.9052276611328, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": -5.138600826263428, "rewards/margins": 2.11489200592041, "rewards/rejected": -7.253492832183838, "step": 7727 }, { "epoch": 1.71, "learning_rate": 9.334303057123044e-06, "logits/chosen": -1.6273653507232666, "logits/rejected": -1.6704318523406982, "logps/chosen": -146.01641845703125, "logps/rejected": -191.65438842773438, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -4.517605781555176, "rewards/margins": 5.026172637939453, "rewards/rejected": -9.543778419494629, "step": 7728 }, { "epoch": 1.71, "learning_rate": 9.33340921246684e-06, "logits/chosen": -1.7533589601516724, "logits/rejected": -1.7231667041778564, "logps/chosen": -81.88027954101562, "logps/rejected": -130.15631103515625, "loss": 0.0301, "rewards/accuracies": 1.0, "rewards/chosen": -2.402247190475464, "rewards/margins": 3.0155370235443115, "rewards/rejected": -5.417784214019775, "step": 7729 }, { "epoch": 1.71, "learning_rate": 9.332514810977969e-06, "logits/chosen": -1.9368162155151367, "logits/rejected": -1.911075234413147, "logps/chosen": -171.74668884277344, "logps/rejected": -254.6358642578125, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/chosen": -3.452284336090088, "rewards/margins": 2.5294861793518066, "rewards/rejected": -5.9817705154418945, "step": 7730 }, { "epoch": 1.71, "learning_rate": 9.331619852771361e-06, "logits/chosen": -1.4649163484573364, "logits/rejected": -1.3985257148742676, "logps/chosen": -99.04659271240234, "logps/rejected": -114.51612854003906, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6568069458007812, "rewards/margins": 7.763819694519043, "rewards/rejected": -8.420626640319824, "step": 7731 }, { "epoch": 1.71, "learning_rate": 9.330724337962013e-06, "logits/chosen": -1.7653387784957886, "logits/rejected": -1.7732654809951782, "logps/chosen": -196.65374755859375, "logps/rejected": -186.60134887695312, "loss": 0.0662, "rewards/accuracies": 1.0, "rewards/chosen": -4.54022216796875, "rewards/margins": 1.9543275833129883, "rewards/rejected": -6.494549751281738, "step": 7732 }, { "epoch": 1.71, "learning_rate": 9.329828266665e-06, "logits/chosen": -1.7671377658843994, "logits/rejected": -1.7893145084381104, "logps/chosen": -168.10916137695312, "logps/rejected": -172.50897216796875, "loss": 0.0662, "rewards/accuracies": 1.0, "rewards/chosen": -4.464786529541016, "rewards/margins": 1.954862117767334, "rewards/rejected": -6.41964864730835, "step": 7733 }, { "epoch": 1.71, "learning_rate": 9.328931638995461e-06, "logits/chosen": -1.9834917783737183, "logits/rejected": -1.943723201751709, "logps/chosen": -113.49852752685547, "logps/rejected": -162.98934936523438, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -4.296009063720703, "rewards/margins": 3.463965892791748, "rewards/rejected": -7.759974956512451, "step": 7734 }, { "epoch": 1.71, "learning_rate": 9.328034455068616e-06, "logits/chosen": -1.4044948816299438, "logits/rejected": -1.4041393995285034, "logps/chosen": -119.8426513671875, "logps/rejected": -138.21734619140625, "loss": 0.1013, "rewards/accuracies": 1.0, "rewards/chosen": -3.7641708850860596, "rewards/margins": 1.4935410022735596, "rewards/rejected": -5.257711887359619, "step": 7735 }, { "epoch": 1.71, "learning_rate": 9.327136714999745e-06, "logits/chosen": -1.633920669555664, "logits/rejected": -1.7287601232528687, "logps/chosen": -218.5418701171875, "logps/rejected": -149.83766174316406, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 3.0476531982421875, "rewards/margins": 11.208513259887695, "rewards/rejected": -8.160860061645508, "step": 7736 }, { "epoch": 1.71, "learning_rate": 9.32623841890421e-06, "logits/chosen": -1.5653380155563354, "logits/rejected": -1.469888687133789, "logps/chosen": -198.44085693359375, "logps/rejected": -249.52960205078125, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -2.7052001953125, "rewards/margins": 11.151118278503418, "rewards/rejected": -13.856318473815918, "step": 7737 }, { "epoch": 1.71, "learning_rate": 9.325339566897437e-06, "logits/chosen": -1.739157795906067, "logits/rejected": -1.7522538900375366, "logps/chosen": -146.71556091308594, "logps/rejected": -162.6797332763672, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -4.700431823730469, "rewards/margins": 3.3586082458496094, "rewards/rejected": -8.059040069580078, "step": 7738 }, { "epoch": 1.71, "learning_rate": 9.324440159094927e-06, "logits/chosen": -2.0595529079437256, "logits/rejected": -2.0151147842407227, "logps/chosen": -113.06674194335938, "logps/rejected": -136.02377319335938, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.29228973388671875, "rewards/margins": 5.273257732391357, "rewards/rejected": -5.565547466278076, "step": 7739 }, { "epoch": 1.71, "learning_rate": 9.323540195612255e-06, "logits/chosen": -1.2911401987075806, "logits/rejected": -1.2412418127059937, "logps/chosen": -164.44735717773438, "logps/rejected": -179.58355712890625, "loss": 0.2252, "rewards/accuracies": 1.0, "rewards/chosen": -1.8167755603790283, "rewards/margins": 4.056321144104004, "rewards/rejected": -5.873096466064453, "step": 7740 }, { "epoch": 1.71, "learning_rate": 9.322639676565059e-06, "logits/chosen": -2.36329984664917, "logits/rejected": -2.38653302192688, "logps/chosen": -114.8814468383789, "logps/rejected": -223.7578125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.9061485528945923, "rewards/margins": 5.362130165100098, "rewards/rejected": -7.2682785987854, "step": 7741 }, { "epoch": 1.71, "learning_rate": 9.321738602069057e-06, "logits/chosen": -1.496543526649475, "logits/rejected": -1.4175540208816528, "logps/chosen": -143.1793975830078, "logps/rejected": -240.6494140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.3464066982269287, "rewards/margins": 8.08214282989502, "rewards/rejected": -11.428549766540527, "step": 7742 }, { "epoch": 1.71, "learning_rate": 9.320836972240034e-06, "logits/chosen": -1.318018913269043, "logits/rejected": -1.3411736488342285, "logps/chosen": -202.9656982421875, "logps/rejected": -219.26931762695312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 1.8934234380722046, "rewards/margins": 6.1860551834106445, "rewards/rejected": -4.29263162612915, "step": 7743 }, { "epoch": 1.71, "learning_rate": 9.319934787193846e-06, "logits/chosen": -1.6506236791610718, "logits/rejected": -1.6575344800949097, "logps/chosen": -147.68624877929688, "logps/rejected": -169.42462158203125, "loss": 0.1551, "rewards/accuracies": 1.0, "rewards/chosen": -7.032116889953613, "rewards/margins": 1.0113401412963867, "rewards/rejected": -8.04345703125, "step": 7744 }, { "epoch": 1.71, "learning_rate": 9.319032047046422e-06, "logits/chosen": -1.7440096139907837, "logits/rejected": -1.7065640687942505, "logps/chosen": -157.6039581298828, "logps/rejected": -174.32533264160156, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.4840224981307983, "rewards/margins": 4.073092460632324, "rewards/rejected": -5.557115077972412, "step": 7745 }, { "epoch": 1.71, "learning_rate": 9.318128751913764e-06, "logits/chosen": -1.5502119064331055, "logits/rejected": -1.5499061346054077, "logps/chosen": -166.06028747558594, "logps/rejected": -278.498291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 4.269603252410889, "rewards/margins": 12.361324310302734, "rewards/rejected": -8.091720581054688, "step": 7746 }, { "epoch": 1.71, "learning_rate": 9.317224901911941e-06, "logits/chosen": -1.8537511825561523, "logits/rejected": -1.8088878393173218, "logps/chosen": -184.9355010986328, "logps/rejected": -191.5908660888672, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.830427646636963, "rewards/margins": 6.983572483062744, "rewards/rejected": -11.814000129699707, "step": 7747 }, { "epoch": 1.71, "learning_rate": 9.316320497157097e-06, "logits/chosen": -1.5980355739593506, "logits/rejected": -1.5317744016647339, "logps/chosen": -148.29000854492188, "logps/rejected": -142.7755889892578, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.4797439575195312, "rewards/margins": 10.61416244506836, "rewards/rejected": -9.134418487548828, "step": 7748 }, { "epoch": 1.72, "learning_rate": 9.315415537765446e-06, "logits/chosen": -1.8747668266296387, "logits/rejected": -1.3699041604995728, "logps/chosen": -170.1057891845703, "logps/rejected": -858.7872924804688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.141313076019287, "rewards/margins": 54.73998260498047, "rewards/rejected": -60.88129425048828, "step": 7749 }, { "epoch": 1.72, "learning_rate": 9.314510023853272e-06, "logits/chosen": -1.5965861082077026, "logits/rejected": -1.6047996282577515, "logps/chosen": -153.08493041992188, "logps/rejected": -195.99417114257812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 6.687994480133057, "rewards/margins": 12.4898042678833, "rewards/rejected": -5.801809787750244, "step": 7750 }, { "epoch": 1.72, "learning_rate": 9.313603955536931e-06, "logits/chosen": -1.615480899810791, "logits/rejected": -1.5715619325637817, "logps/chosen": -85.21337890625, "logps/rejected": -138.511962890625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 1.1641587018966675, "rewards/margins": 5.162110805511475, "rewards/rejected": -3.9979522228240967, "step": 7751 }, { "epoch": 1.72, "learning_rate": 9.312697332932852e-06, "logits/chosen": -1.5492241382598877, "logits/rejected": -1.5805448293685913, "logps/chosen": -187.05328369140625, "logps/rejected": -120.97250366210938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.72100830078125, "rewards/margins": 10.263644218444824, "rewards/rejected": -9.542635917663574, "step": 7752 }, { "epoch": 1.72, "learning_rate": 9.311790156157533e-06, "logits/chosen": -1.6323597431182861, "logits/rejected": -1.6323597431182861, "logps/chosen": -142.51097106933594, "logps/rejected": -142.51097106933594, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -3.060272216796875, "rewards/margins": 0.0, "rewards/rejected": -3.060272216796875, "step": 7753 }, { "epoch": 1.72, "learning_rate": 9.310882425327544e-06, "logits/chosen": -1.5988672971725464, "logits/rejected": -1.4338120222091675, "logps/chosen": -201.82730102539062, "logps/rejected": -426.0035400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6350555419921875, "rewards/margins": 11.834383964538574, "rewards/rejected": -13.469439506530762, "step": 7754 }, { "epoch": 1.72, "learning_rate": 9.309974140559525e-06, "logits/chosen": -1.5484282970428467, "logits/rejected": -1.4916902780532837, "logps/chosen": -127.43376159667969, "logps/rejected": -226.16375732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3663300275802612, "rewards/margins": 9.266839981079102, "rewards/rejected": -10.633170127868652, "step": 7755 }, { "epoch": 1.72, "learning_rate": 9.309065301970193e-06, "logits/chosen": -1.9745711088180542, "logits/rejected": -1.8487216234207153, "logps/chosen": -265.998779296875, "logps/rejected": -272.06292724609375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.362870693206787, "rewards/margins": 13.35714340209961, "rewards/rejected": -18.720014572143555, "step": 7756 }, { "epoch": 1.72, "learning_rate": 9.308155909676326e-06, "logits/chosen": -1.7186341285705566, "logits/rejected": -1.6599293947219849, "logps/chosen": -133.96356201171875, "logps/rejected": -237.97543334960938, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -3.403463840484619, "rewards/margins": 4.3548994064331055, "rewards/rejected": -7.758363246917725, "step": 7757 }, { "epoch": 1.72, "learning_rate": 9.307245963794782e-06, "logits/chosen": -1.6338074207305908, "logits/rejected": -1.6338074207305908, "logps/chosen": -176.39376831054688, "logps/rejected": -176.39376831054688, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.306286811828613, "rewards/margins": 0.0, "rewards/rejected": -5.306286811828613, "step": 7758 }, { "epoch": 1.72, "learning_rate": 9.306335464442485e-06, "logits/chosen": -1.7490586042404175, "logits/rejected": -1.7632523775100708, "logps/chosen": -114.50473022460938, "logps/rejected": -140.9280242919922, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7191009521484375, "rewards/margins": 7.806079864501953, "rewards/rejected": -9.52518081665039, "step": 7759 }, { "epoch": 1.72, "learning_rate": 9.305424411736434e-06, "logits/chosen": -1.8942203521728516, "logits/rejected": -1.9248812198638916, "logps/chosen": -155.01210021972656, "logps/rejected": -209.21780395507812, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1723037958145142, "rewards/margins": 6.442558288574219, "rewards/rejected": -7.614861965179443, "step": 7760 }, { "epoch": 1.72, "learning_rate": 9.304512805793696e-06, "logits/chosen": -1.8334087133407593, "logits/rejected": -1.8628103733062744, "logps/chosen": -189.99679565429688, "logps/rejected": -153.6853485107422, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.004626512527466, "rewards/margins": 9.624236106872559, "rewards/rejected": -11.628862380981445, "step": 7761 }, { "epoch": 1.72, "learning_rate": 9.30360064673141e-06, "logits/chosen": -1.7590081691741943, "logits/rejected": -1.66013503074646, "logps/chosen": -193.39678955078125, "logps/rejected": -292.37371826171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.7136688232421875, "rewards/margins": 12.19115161895752, "rewards/rejected": -13.904820442199707, "step": 7762 }, { "epoch": 1.72, "learning_rate": 9.302687934666787e-06, "logits/chosen": -1.9576911926269531, "logits/rejected": -1.918068528175354, "logps/chosen": -166.41725158691406, "logps/rejected": -191.3074951171875, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -4.187255859375, "rewards/margins": 3.442870616912842, "rewards/rejected": -7.630126476287842, "step": 7763 }, { "epoch": 1.72, "learning_rate": 9.301774669717108e-06, "logits/chosen": -1.3734275102615356, "logits/rejected": -1.4133881330490112, "logps/chosen": -187.99310302734375, "logps/rejected": -169.91004943847656, "loss": 0.1479, "rewards/accuracies": 1.0, "rewards/chosen": -5.1673264503479, "rewards/margins": 1.0666847229003906, "rewards/rejected": -6.234011173248291, "step": 7764 }, { "epoch": 1.72, "learning_rate": 9.300860851999723e-06, "logits/chosen": -1.7204049825668335, "logits/rejected": -1.7219562530517578, "logps/chosen": -82.49028015136719, "logps/rejected": -139.1115264892578, "loss": 1.8736, "rewards/accuracies": 1.0, "rewards/chosen": -3.2880921363830566, "rewards/margins": 3.619309425354004, "rewards/rejected": -6.9074015617370605, "step": 7765 }, { "epoch": 1.72, "learning_rate": 9.299946481632058e-06, "logits/chosen": -1.882203221321106, "logits/rejected": -1.9144641160964966, "logps/chosen": -147.078857421875, "logps/rejected": -135.22027587890625, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.7158843874931335, "rewards/margins": 3.9024202823638916, "rewards/rejected": -4.61830472946167, "step": 7766 }, { "epoch": 1.72, "learning_rate": 9.299031558731608e-06, "logits/chosen": -1.6304508447647095, "logits/rejected": -1.1573736667633057, "logps/chosen": -165.63165283203125, "logps/rejected": -927.48095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.58536696434021, "rewards/margins": 73.15644836425781, "rewards/rejected": -75.74181365966797, "step": 7767 }, { "epoch": 1.72, "learning_rate": 9.298116083415937e-06, "logits/chosen": -1.7887036800384521, "logits/rejected": -1.8576358556747437, "logps/chosen": -294.5665283203125, "logps/rejected": -189.59848022460938, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 1.3732330799102783, "rewards/margins": 7.6107025146484375, "rewards/rejected": -6.237469673156738, "step": 7768 }, { "epoch": 1.72, "learning_rate": 9.297200055802683e-06, "logits/chosen": -1.573966383934021, "logits/rejected": -1.575820803642273, "logps/chosen": -87.03097534179688, "logps/rejected": -103.86449432373047, "loss": 0.0918, "rewards/accuracies": 1.0, "rewards/chosen": -6.530160427093506, "rewards/margins": 1.6026301383972168, "rewards/rejected": -8.132790565490723, "step": 7769 }, { "epoch": 1.72, "learning_rate": 9.296283476009551e-06, "logits/chosen": -1.6615490913391113, "logits/rejected": -1.6647207736968994, "logps/chosen": -158.53457641601562, "logps/rejected": -229.1327667236328, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": 1.129418969154358, "rewards/margins": 13.768513679504395, "rewards/rejected": -12.639094352722168, "step": 7770 }, { "epoch": 1.72, "learning_rate": 9.295366344154319e-06, "logits/chosen": -1.6172655820846558, "logits/rejected": -1.6067030429840088, "logps/chosen": -183.06747436523438, "logps/rejected": -170.30096435546875, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -4.459417819976807, "rewards/margins": 3.8845486640930176, "rewards/rejected": -8.343966484069824, "step": 7771 }, { "epoch": 1.72, "learning_rate": 9.29444866035484e-06, "logits/chosen": -1.6373276710510254, "logits/rejected": -1.5935884714126587, "logps/chosen": -91.63795471191406, "logps/rejected": -168.5918426513672, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -1.4569847583770752, "rewards/margins": 4.690290451049805, "rewards/rejected": -6.147275447845459, "step": 7772 }, { "epoch": 1.72, "learning_rate": 9.293530424729029e-06, "logits/chosen": -1.7476325035095215, "logits/rejected": -1.5985616445541382, "logps/chosen": -133.23800659179688, "logps/rejected": -219.08392333984375, "loss": 0.9617, "rewards/accuracies": 0.0, "rewards/chosen": -2.3068559169769287, "rewards/margins": -1.755976915359497, "rewards/rejected": -0.5508789420127869, "step": 7773 }, { "epoch": 1.72, "learning_rate": 9.292611637394881e-06, "logits/chosen": -1.6976786851882935, "logits/rejected": -1.703728437423706, "logps/chosen": -79.16596984863281, "logps/rejected": -207.21514892578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6437759399414062, "rewards/margins": 10.327383995056152, "rewards/rejected": -10.971159934997559, "step": 7774 }, { "epoch": 1.72, "learning_rate": 9.291692298470457e-06, "logits/chosen": -1.8661571741104126, "logits/rejected": -1.8082157373428345, "logps/chosen": -142.0770721435547, "logps/rejected": -238.14346313476562, "loss": 0.7805, "rewards/accuracies": 0.0, "rewards/chosen": -4.271823406219482, "rewards/margins": -1.3248019218444824, "rewards/rejected": -2.947021484375, "step": 7775 }, { "epoch": 1.72, "learning_rate": 9.29077240807389e-06, "logits/chosen": -1.808349847793579, "logits/rejected": -1.7951633930206299, "logps/chosen": -111.53912353515625, "logps/rejected": -148.283203125, "loss": 0.4276, "rewards/accuracies": 0.0, "rewards/chosen": -3.2430694103240967, "rewards/margins": -0.28307247161865234, "rewards/rejected": -2.9599969387054443, "step": 7776 }, { "epoch": 1.72, "learning_rate": 9.289851966323382e-06, "logits/chosen": -1.519216775894165, "logits/rejected": -1.5183173418045044, "logps/chosen": -77.09245300292969, "logps/rejected": -98.51068115234375, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -1.119410753250122, "rewards/margins": 3.5046260356903076, "rewards/rejected": -4.62403678894043, "step": 7777 }, { "epoch": 1.72, "learning_rate": 9.288930973337212e-06, "logits/chosen": -1.3170679807662964, "logits/rejected": -0.6808660626411438, "logps/chosen": -96.87809753417969, "logps/rejected": -613.8123779296875, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -1.7650108337402344, "rewards/margins": 44.72917175292969, "rewards/rejected": -46.49418258666992, "step": 7778 }, { "epoch": 1.72, "learning_rate": 9.288009429233717e-06, "logits/chosen": -1.6341131925582886, "logits/rejected": -1.6152687072753906, "logps/chosen": -234.09230041503906, "logps/rejected": -276.09808349609375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.06356048583984375, "rewards/margins": 7.79580545425415, "rewards/rejected": -7.859365940093994, "step": 7779 }, { "epoch": 1.72, "learning_rate": 9.287087334131322e-06, "logits/chosen": -1.6260493993759155, "logits/rejected": -1.6260493993759155, "logps/chosen": -124.21955871582031, "logps/rejected": -124.21955871582031, "loss": 0.3568, "rewards/accuracies": 0.0, "rewards/chosen": -3.5301482677459717, "rewards/margins": 0.0, "rewards/rejected": -3.5301482677459717, "step": 7780 }, { "epoch": 1.72, "learning_rate": 9.28616468814851e-06, "logits/chosen": -1.8691829442977905, "logits/rejected": -1.8240700960159302, "logps/chosen": -178.38150024414062, "logps/rejected": -285.54827880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4268128871917725, "rewards/margins": 10.060328483581543, "rewards/rejected": -13.487141609191895, "step": 7781 }, { "epoch": 1.72, "learning_rate": 9.28524149140384e-06, "logits/chosen": -1.5740914344787598, "logits/rejected": -1.4665745496749878, "logps/chosen": -71.28352355957031, "logps/rejected": -207.26104736328125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.4758026599884033, "rewards/margins": 6.555720329284668, "rewards/rejected": -8.031522750854492, "step": 7782 }, { "epoch": 1.72, "learning_rate": 9.284317744015938e-06, "logits/chosen": -1.7894803285598755, "logits/rejected": -1.7357783317565918, "logps/chosen": -106.46048736572266, "logps/rejected": -288.56689453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9714927673339844, "rewards/margins": 8.29806137084961, "rewards/rejected": -9.269554138183594, "step": 7783 }, { "epoch": 1.72, "learning_rate": 9.283393446103506e-06, "logits/chosen": -1.9602738618850708, "logits/rejected": -1.9990156888961792, "logps/chosen": -124.10572814941406, "logps/rejected": -112.96881866455078, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -3.143995761871338, "rewards/margins": 5.356695652008057, "rewards/rejected": -8.500691413879395, "step": 7784 }, { "epoch": 1.72, "learning_rate": 9.282468597785312e-06, "logits/chosen": -1.2057619094848633, "logits/rejected": -1.2057619094848633, "logps/chosen": -138.73590087890625, "logps/rejected": -138.73590087890625, "loss": 0.3745, "rewards/accuracies": 0.0, "rewards/chosen": -7.220241069793701, "rewards/margins": 0.0, "rewards/rejected": -7.220241069793701, "step": 7785 }, { "epoch": 1.72, "learning_rate": 9.2815431991802e-06, "logits/chosen": -1.6836329698562622, "logits/rejected": -1.6836329698562622, "logps/chosen": -390.9006042480469, "logps/rejected": -390.9006042480469, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -7.345800876617432, "rewards/margins": 0.0, "rewards/rejected": -7.345800876617432, "step": 7786 }, { "epoch": 1.72, "learning_rate": 9.280617250407078e-06, "logits/chosen": -1.5412652492523193, "logits/rejected": -1.5108007192611694, "logps/chosen": -162.56402587890625, "logps/rejected": -256.63421630859375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.923693895339966, "rewards/margins": 8.10423755645752, "rewards/rejected": -11.027931213378906, "step": 7787 }, { "epoch": 1.72, "learning_rate": 9.27969075158493e-06, "logits/chosen": -1.4823920726776123, "logits/rejected": -1.5788779258728027, "logps/chosen": -275.2294616699219, "logps/rejected": -158.93682861328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.8635772466659546, "rewards/margins": 8.608433723449707, "rewards/rejected": -10.472010612487793, "step": 7788 }, { "epoch": 1.72, "learning_rate": 9.278763702832809e-06, "logits/chosen": -1.3160679340362549, "logits/rejected": -1.4081748723983765, "logps/chosen": -175.08673095703125, "logps/rejected": -217.28915405273438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.5314117670059204, "rewards/margins": 13.047173500061035, "rewards/rejected": -11.515761375427246, "step": 7789 }, { "epoch": 1.72, "learning_rate": 9.277836104269837e-06, "logits/chosen": -1.6695823669433594, "logits/rejected": -1.453552484512329, "logps/chosen": -123.23236083984375, "logps/rejected": -499.441162109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.676929473876953, "rewards/margins": 9.080068588256836, "rewards/rejected": -12.756998062133789, "step": 7790 }, { "epoch": 1.72, "learning_rate": 9.276907956015212e-06, "logits/chosen": -1.3153935670852661, "logits/rejected": -1.2458995580673218, "logps/chosen": -107.36796569824219, "logps/rejected": -276.99114990234375, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -1.956233263015747, "rewards/margins": 11.375146865844727, "rewards/rejected": -13.331379890441895, "step": 7791 }, { "epoch": 1.72, "learning_rate": 9.275979258188192e-06, "logits/chosen": -1.7107733488082886, "logits/rejected": -1.6749721765518188, "logps/chosen": -57.49998474121094, "logps/rejected": -142.79129028320312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.31448936462402344, "rewards/margins": 7.422605037689209, "rewards/rejected": -7.737094402313232, "step": 7792 }, { "epoch": 1.72, "learning_rate": 9.275050010908118e-06, "logits/chosen": -1.6707472801208496, "logits/rejected": -1.7347887754440308, "logps/chosen": -134.95013427734375, "logps/rejected": -120.68598937988281, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.8534629344940186, "rewards/margins": 5.645090103149414, "rewards/rejected": -8.498553276062012, "step": 7793 }, { "epoch": 1.73, "learning_rate": 9.274120214294395e-06, "logits/chosen": -1.3957879543304443, "logits/rejected": -1.2908234596252441, "logps/chosen": -188.78611755371094, "logps/rejected": -302.09381103515625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.7303178310394287, "rewards/margins": 13.392036437988281, "rewards/rejected": -17.12235450744629, "step": 7794 }, { "epoch": 1.73, "learning_rate": 9.273189868466499e-06, "logits/chosen": -1.409481406211853, "logits/rejected": -1.4256023168563843, "logps/chosen": -152.21627807617188, "logps/rejected": -169.57681274414062, "loss": 0.0885, "rewards/accuracies": 1.0, "rewards/chosen": -6.202272891998291, "rewards/margins": 1.6413969993591309, "rewards/rejected": -7.843669891357422, "step": 7795 }, { "epoch": 1.73, "learning_rate": 9.272258973543977e-06, "logits/chosen": -1.5656100511550903, "logits/rejected": -1.0429039001464844, "logps/chosen": -84.89157104492188, "logps/rejected": -173.943359375, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.581317126750946, "rewards/margins": 4.363210678100586, "rewards/rejected": -4.944527626037598, "step": 7796 }, { "epoch": 1.73, "learning_rate": 9.271327529646447e-06, "logits/chosen": -1.78787100315094, "logits/rejected": -1.8457211256027222, "logps/chosen": -207.85105895996094, "logps/rejected": -100.23451232910156, "loss": 0.4646, "rewards/accuracies": 0.0, "rewards/chosen": -8.581315994262695, "rewards/margins": -0.42278099060058594, "rewards/rejected": -8.15853500366211, "step": 7797 }, { "epoch": 1.73, "learning_rate": 9.270395536893599e-06, "logits/chosen": -1.2501431703567505, "logits/rejected": -1.2661924362182617, "logps/chosen": -192.7238006591797, "logps/rejected": -227.0201416015625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.8784164786338806, "rewards/margins": 9.02989387512207, "rewards/rejected": -9.908309936523438, "step": 7798 }, { "epoch": 1.73, "learning_rate": 9.269462995405189e-06, "logits/chosen": -1.8453541994094849, "logits/rejected": -1.79365873336792, "logps/chosen": -153.1781005859375, "logps/rejected": -203.43870544433594, "loss": 0.1271, "rewards/accuracies": 1.0, "rewards/chosen": -2.8301491737365723, "rewards/margins": 1.2397041320800781, "rewards/rejected": -4.06985330581665, "step": 7799 }, { "epoch": 1.73, "learning_rate": 9.268529905301049e-06, "logits/chosen": -1.7538812160491943, "logits/rejected": -1.1124080419540405, "logps/chosen": -134.77572631835938, "logps/rejected": -1214.6923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.19407044351100922, "rewards/margins": 107.34841918945312, "rewards/rejected": -107.15435028076172, "step": 7800 }, { "epoch": 1.73, "learning_rate": 9.267596266701076e-06, "logits/chosen": -1.5161901712417603, "logits/rejected": -1.4286978244781494, "logps/chosen": -166.11077880859375, "logps/rejected": -269.9025573730469, "loss": 0.9256, "rewards/accuracies": 0.0, "rewards/chosen": -5.898616313934326, "rewards/margins": -1.6800479888916016, "rewards/rejected": -4.218568325042725, "step": 7801 }, { "epoch": 1.73, "learning_rate": 9.266662079725241e-06, "logits/chosen": -1.5684200525283813, "logits/rejected": -1.5172775983810425, "logps/chosen": -150.63841247558594, "logps/rejected": -314.5069580078125, "loss": 0.0323, "rewards/accuracies": 1.0, "rewards/chosen": -1.589959740638733, "rewards/margins": 11.631948471069336, "rewards/rejected": -13.221908569335938, "step": 7802 }, { "epoch": 1.73, "learning_rate": 9.265727344493587e-06, "logits/chosen": -1.6626620292663574, "logits/rejected": -1.657663106918335, "logps/chosen": -108.11027526855469, "logps/rejected": -150.39089965820312, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -2.2024292945861816, "rewards/margins": 4.95810079574585, "rewards/rejected": -7.160530090332031, "step": 7803 }, { "epoch": 1.73, "learning_rate": 9.264792061126224e-06, "logits/chosen": -1.1964541673660278, "logits/rejected": -1.181151032447815, "logps/chosen": -36.00170135498047, "logps/rejected": -88.21342468261719, "loss": 0.2008, "rewards/accuracies": 1.0, "rewards/chosen": -1.0970985889434814, "rewards/margins": 0.7047996520996094, "rewards/rejected": -1.8018982410430908, "step": 7804 }, { "epoch": 1.73, "learning_rate": 9.263856229743334e-06, "logits/chosen": -1.706758975982666, "logits/rejected": -1.6677898168563843, "logps/chosen": -103.91526794433594, "logps/rejected": -165.56924438476562, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -2.3342628479003906, "rewards/margins": 3.87600040435791, "rewards/rejected": -6.210263252258301, "step": 7805 }, { "epoch": 1.73, "learning_rate": 9.262919850465166e-06, "logits/chosen": -1.8030451536178589, "logits/rejected": -1.800696611404419, "logps/chosen": -149.07728576660156, "logps/rejected": -206.53924560546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.6253662109375, "rewards/margins": 7.879115104675293, "rewards/rejected": -9.504481315612793, "step": 7806 }, { "epoch": 1.73, "learning_rate": 9.261982923412046e-06, "logits/chosen": -1.537109613418579, "logits/rejected": -1.5325545072555542, "logps/chosen": -211.13059997558594, "logps/rejected": -334.8927307128906, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 2.4690628051757812, "rewards/margins": 8.971925735473633, "rewards/rejected": -6.502862453460693, "step": 7807 }, { "epoch": 1.73, "learning_rate": 9.261045448704367e-06, "logits/chosen": -1.4382154941558838, "logits/rejected": -1.3983558416366577, "logps/chosen": -148.2344970703125, "logps/rejected": -214.5480194091797, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.688051700592041, "rewards/margins": 6.633800983428955, "rewards/rejected": -10.321852684020996, "step": 7808 }, { "epoch": 1.73, "learning_rate": 9.26010742646259e-06, "logits/chosen": -1.541778564453125, "logits/rejected": -1.3652269840240479, "logps/chosen": -200.79315185546875, "logps/rejected": -365.6751708984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.6221559047698975, "rewards/margins": 19.97777557373047, "rewards/rejected": -17.355619430541992, "step": 7809 }, { "epoch": 1.73, "learning_rate": 9.259168856807249e-06, "logits/chosen": -1.3104811906814575, "logits/rejected": -1.2595294713974, "logps/chosen": -253.1323699951172, "logps/rejected": -331.281005859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.7210281491279602, "rewards/margins": 8.853939056396484, "rewards/rejected": -8.13291072845459, "step": 7810 }, { "epoch": 1.73, "learning_rate": 9.25822973985895e-06, "logits/chosen": -1.5463621616363525, "logits/rejected": -1.499713659286499, "logps/chosen": -129.314208984375, "logps/rejected": -195.66546630859375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.1555252075195312, "rewards/margins": 5.380290508270264, "rewards/rejected": -6.535815715789795, "step": 7811 }, { "epoch": 1.73, "learning_rate": 9.257290075738365e-06, "logits/chosen": -1.441419243812561, "logits/rejected": -1.391256332397461, "logps/chosen": -204.0626678466797, "logps/rejected": -307.6498718261719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.9562530517578125, "rewards/margins": 10.925933837890625, "rewards/rejected": -8.969680786132812, "step": 7812 }, { "epoch": 1.73, "learning_rate": 9.25634986456624e-06, "logits/chosen": -1.7130208015441895, "logits/rejected": -1.8191636800765991, "logps/chosen": -162.9766845703125, "logps/rejected": -213.346923828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.013203429989516735, "rewards/margins": 9.854299545288086, "rewards/rejected": -9.86750316619873, "step": 7813 }, { "epoch": 1.73, "learning_rate": 9.25540910646339e-06, "logits/chosen": -1.2493432760238647, "logits/rejected": -0.9538229703903198, "logps/chosen": -95.71389770507812, "logps/rejected": -285.9928894042969, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.21070556342601776, "rewards/margins": 15.466105461120605, "rewards/rejected": -15.676811218261719, "step": 7814 }, { "epoch": 1.73, "learning_rate": 9.254467801550699e-06, "logits/chosen": -1.6115702390670776, "logits/rejected": -1.5676342248916626, "logps/chosen": -133.5998077392578, "logps/rejected": -214.41127014160156, "loss": 0.3241, "rewards/accuracies": 1.0, "rewards/chosen": 0.06528320163488388, "rewards/margins": 0.19383391737937927, "rewards/rejected": -0.128550723195076, "step": 7815 }, { "epoch": 1.73, "learning_rate": 9.253525949949123e-06, "logits/chosen": -1.386859655380249, "logits/rejected": -1.386859655380249, "logps/chosen": -322.4639892578125, "logps/rejected": -322.4639892578125, "loss": 0.3495, "rewards/accuracies": 0.0, "rewards/chosen": -8.87584114074707, "rewards/margins": 0.0, "rewards/rejected": -8.87584114074707, "step": 7816 }, { "epoch": 1.73, "learning_rate": 9.252583551779687e-06, "logits/chosen": -1.3520872592926025, "logits/rejected": -0.8538029789924622, "logps/chosen": -219.31515502929688, "logps/rejected": -348.4501037597656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.1384429931640625, "rewards/margins": 29.494369506835938, "rewards/rejected": -29.6328125, "step": 7817 }, { "epoch": 1.73, "learning_rate": 9.251640607163488e-06, "logits/chosen": -1.4381985664367676, "logits/rejected": -1.6511681079864502, "logps/chosen": -209.16253662109375, "logps/rejected": -157.30645751953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 7.7026214599609375, "rewards/margins": 12.672880172729492, "rewards/rejected": -4.970259189605713, "step": 7818 }, { "epoch": 1.73, "learning_rate": 9.250697116221692e-06, "logits/chosen": -1.643479347229004, "logits/rejected": -1.5602638721466064, "logps/chosen": -177.04995727539062, "logps/rejected": -305.6015319824219, "loss": 0.3895, "rewards/accuracies": 1.0, "rewards/chosen": 1.9278045892715454, "rewards/margins": 7.566906929016113, "rewards/rejected": -5.639102458953857, "step": 7819 }, { "epoch": 1.73, "learning_rate": 9.249753079075534e-06, "logits/chosen": -1.410826563835144, "logits/rejected": -1.4145468473434448, "logps/chosen": -202.32032775878906, "logps/rejected": -234.8631134033203, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9338089227676392, "rewards/margins": 7.4151434898376465, "rewards/rejected": -8.348952293395996, "step": 7820 }, { "epoch": 1.73, "learning_rate": 9.248808495846322e-06, "logits/chosen": -1.17104971408844, "logits/rejected": -1.423151969909668, "logps/chosen": -331.03955078125, "logps/rejected": -143.29978942871094, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": 0.379852294921875, "rewards/margins": 4.776501655578613, "rewards/rejected": -4.396649360656738, "step": 7821 }, { "epoch": 1.73, "learning_rate": 9.247863366655434e-06, "logits/chosen": -1.6210603713989258, "logits/rejected": -1.6014710664749146, "logps/chosen": -130.9852294921875, "logps/rejected": -168.20587158203125, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -3.272282361984253, "rewards/margins": 3.942014455795288, "rewards/rejected": -7.214296817779541, "step": 7822 }, { "epoch": 1.73, "learning_rate": 9.246917691624314e-06, "logits/chosen": -1.4421765804290771, "logits/rejected": -1.4442647695541382, "logps/chosen": -140.4276580810547, "logps/rejected": -116.8269271850586, "loss": 0.1449, "rewards/accuracies": 1.0, "rewards/chosen": -2.5727601051330566, "rewards/margins": 1.0899367332458496, "rewards/rejected": -3.6626968383789062, "step": 7823 }, { "epoch": 1.73, "learning_rate": 9.245971470874477e-06, "logits/chosen": -1.0627989768981934, "logits/rejected": -1.0682636499404907, "logps/chosen": -140.84771728515625, "logps/rejected": -104.59475708007812, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.09351807087659836, "rewards/margins": 7.800111770629883, "rewards/rejected": -7.7065935134887695, "step": 7824 }, { "epoch": 1.73, "learning_rate": 9.245024704527517e-06, "logits/chosen": -1.345866084098816, "logits/rejected": -1.3805209398269653, "logps/chosen": -228.11325073242188, "logps/rejected": -230.22549438476562, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.563427746295929, "rewards/margins": 7.603597640991211, "rewards/rejected": -8.167025566101074, "step": 7825 }, { "epoch": 1.73, "learning_rate": 9.244077392705085e-06, "logits/chosen": -1.8021488189697266, "logits/rejected": -1.7919803857803345, "logps/chosen": -83.71951293945312, "logps/rejected": -147.5162811279297, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/chosen": -2.0752809047698975, "rewards/margins": 2.7803986072540283, "rewards/rejected": -4.855679512023926, "step": 7826 }, { "epoch": 1.73, "learning_rate": 9.243129535528909e-06, "logits/chosen": -0.8675105571746826, "logits/rejected": -0.9053434133529663, "logps/chosen": -288.71588134765625, "logps/rejected": -245.3113555908203, "loss": 0.2063, "rewards/accuracies": 1.0, "rewards/chosen": -9.157254219055176, "rewards/margins": 0.6752729415893555, "rewards/rejected": -9.832527160644531, "step": 7827 }, { "epoch": 1.73, "learning_rate": 9.242181133120791e-06, "logits/chosen": -1.6186530590057373, "logits/rejected": -1.0884714126586914, "logps/chosen": -119.48670959472656, "logps/rejected": -800.421142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.917874813079834, "rewards/margins": 60.975215911865234, "rewards/rejected": -65.8930892944336, "step": 7828 }, { "epoch": 1.73, "learning_rate": 9.241232185602594e-06, "logits/chosen": -1.5699483156204224, "logits/rejected": -1.5130668878555298, "logps/chosen": -86.49989318847656, "logps/rejected": -167.20233154296875, "loss": 0.0583, "rewards/accuracies": 1.0, "rewards/chosen": -2.00634765625, "rewards/margins": 7.58988094329834, "rewards/rejected": -9.59622859954834, "step": 7829 }, { "epoch": 1.73, "learning_rate": 9.240282693096257e-06, "logits/chosen": -1.5821373462677002, "logits/rejected": -1.604422688484192, "logps/chosen": -271.9704284667969, "logps/rejected": -262.8052673339844, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": -5.866090297698975, "rewards/margins": 2.5304856300354004, "rewards/rejected": -8.396575927734375, "step": 7830 }, { "epoch": 1.73, "learning_rate": 9.239332655723787e-06, "logits/chosen": -1.2797772884368896, "logits/rejected": -1.2586230039596558, "logps/chosen": -93.99125671386719, "logps/rejected": -179.57269287109375, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": -3.5349388122558594, "rewards/margins": 3.5834765434265137, "rewards/rejected": -7.118415355682373, "step": 7831 }, { "epoch": 1.73, "learning_rate": 9.238382073607262e-06, "logits/chosen": -0.9738196134567261, "logits/rejected": -0.849045991897583, "logps/chosen": -93.45640563964844, "logps/rejected": -360.67462158203125, "loss": 0.1026, "rewards/accuracies": 1.0, "rewards/chosen": -1.0376510620117188, "rewards/margins": 9.683642387390137, "rewards/rejected": -10.721293449401855, "step": 7832 }, { "epoch": 1.73, "learning_rate": 9.237430946868829e-06, "logits/chosen": -1.7118903398513794, "logits/rejected": -1.6734132766723633, "logps/chosen": -107.13151550292969, "logps/rejected": -118.86932373046875, "loss": 0.1017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7708153128623962, "rewards/margins": 1.5007095336914062, "rewards/rejected": -2.2715249061584473, "step": 7833 }, { "epoch": 1.73, "learning_rate": 9.236479275630707e-06, "logits/chosen": -1.2656362056732178, "logits/rejected": -1.2570961713790894, "logps/chosen": -73.55766296386719, "logps/rejected": -141.07366943359375, "loss": 0.0635, "rewards/accuracies": 1.0, "rewards/chosen": -0.9866771697998047, "rewards/margins": 3.903069019317627, "rewards/rejected": -4.889746189117432, "step": 7834 }, { "epoch": 1.73, "learning_rate": 9.235527060015182e-06, "logits/chosen": -1.4542081356048584, "logits/rejected": -1.4375061988830566, "logps/chosen": -183.249755859375, "logps/rejected": -220.04791259765625, "loss": 0.9267, "rewards/accuracies": 0.0, "rewards/chosen": -3.6558074951171875, "rewards/margins": -0.5016036033630371, "rewards/rejected": -3.1542038917541504, "step": 7835 }, { "epoch": 1.73, "learning_rate": 9.23457430014461e-06, "logits/chosen": -1.447020411491394, "logits/rejected": -1.447020411491394, "logps/chosen": -54.952857971191406, "logps/rejected": -54.952857971191406, "loss": 0.3898, "rewards/accuracies": 0.0, "rewards/chosen": -2.567377805709839, "rewards/margins": 0.0, "rewards/rejected": -2.567377805709839, "step": 7836 }, { "epoch": 1.73, "learning_rate": 9.233620996141421e-06, "logits/chosen": -1.771026372909546, "logits/rejected": -1.6376770734786987, "logps/chosen": -117.65232849121094, "logps/rejected": -241.21359252929688, "loss": 0.0595, "rewards/accuracies": 1.0, "rewards/chosen": -0.21732178330421448, "rewards/margins": 2.068551540374756, "rewards/rejected": -2.2858734130859375, "step": 7837 }, { "epoch": 1.73, "learning_rate": 9.232667148128112e-06, "logits/chosen": -1.5447546243667603, "logits/rejected": -1.531230092048645, "logps/chosen": -101.88385009765625, "logps/rejected": -121.39244079589844, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": -0.6273353695869446, "rewards/margins": 2.783400058746338, "rewards/rejected": -3.4107353687286377, "step": 7838 }, { "epoch": 1.74, "learning_rate": 9.231712756227249e-06, "logits/chosen": -1.8980616331100464, "logits/rejected": -1.8834837675094604, "logps/chosen": -131.3220977783203, "logps/rejected": -189.65557861328125, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -3.580204725265503, "rewards/margins": 3.2648584842681885, "rewards/rejected": -6.845063209533691, "step": 7839 }, { "epoch": 1.74, "learning_rate": 9.23075782056147e-06, "logits/chosen": -1.138221025466919, "logits/rejected": -1.0582127571105957, "logps/chosen": -185.19589233398438, "logps/rejected": -177.03985595703125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 4.818267822265625, "rewards/margins": 6.715612888336182, "rewards/rejected": -1.897344946861267, "step": 7840 }, { "epoch": 1.74, "learning_rate": 9.229802341253482e-06, "logits/chosen": -0.9669557213783264, "logits/rejected": -0.9669557213783264, "logps/chosen": -64.58995056152344, "logps/rejected": -64.58995056152344, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -0.2201530486345291, "rewards/margins": 0.0, "rewards/rejected": -0.2201530486345291, "step": 7841 }, { "epoch": 1.74, "learning_rate": 9.22884631842606e-06, "logits/chosen": -1.8951913118362427, "logits/rejected": -1.917995810508728, "logps/chosen": -107.44793701171875, "logps/rejected": -111.0952377319336, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": -4.207757472991943, "rewards/margins": 2.7424869537353516, "rewards/rejected": -6.950244426727295, "step": 7842 }, { "epoch": 1.74, "learning_rate": 9.227889752202052e-06, "logits/chosen": -1.4126392602920532, "logits/rejected": -1.4193899631500244, "logps/chosen": -261.0738220214844, "logps/rejected": -228.963623046875, "loss": 0.3964, "rewards/accuracies": 0.0, "rewards/chosen": -9.614273071289062, "rewards/margins": -0.19026756286621094, "rewards/rejected": -9.424005508422852, "step": 7843 }, { "epoch": 1.74, "learning_rate": 9.226932642704376e-06, "logits/chosen": -1.3494179248809814, "logits/rejected": -1.3522698879241943, "logps/chosen": -143.55953979492188, "logps/rejected": -157.968017578125, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.8308151960372925, "rewards/margins": 3.7073240280151367, "rewards/rejected": -5.538139343261719, "step": 7844 }, { "epoch": 1.74, "learning_rate": 9.225974990056016e-06, "logits/chosen": -1.4018040895462036, "logits/rejected": -1.3986082077026367, "logps/chosen": -143.19741821289062, "logps/rejected": -132.07492065429688, "loss": 1.4731, "rewards/accuracies": 0.0, "rewards/chosen": -6.125734806060791, "rewards/margins": -2.8783118724823, "rewards/rejected": -3.247422933578491, "step": 7845 }, { "epoch": 1.74, "learning_rate": 9.225016794380027e-06, "logits/chosen": -1.6979305744171143, "logits/rejected": -1.6979305744171143, "logps/chosen": -217.1309356689453, "logps/rejected": -217.1309356689453, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -12.670244216918945, "rewards/margins": 0.0, "rewards/rejected": -12.670244216918945, "step": 7846 }, { "epoch": 1.74, "learning_rate": 9.22405805579954e-06, "logits/chosen": -1.4678071737289429, "logits/rejected": -1.5066699981689453, "logps/chosen": -113.05658721923828, "logps/rejected": -89.89608001708984, "loss": 0.048, "rewards/accuracies": 1.0, "rewards/chosen": -2.7129783630371094, "rewards/margins": 2.618593692779541, "rewards/rejected": -5.33157205581665, "step": 7847 }, { "epoch": 1.74, "learning_rate": 9.223098774437744e-06, "logits/chosen": -1.6352964639663696, "logits/rejected": -1.5364779233932495, "logps/chosen": -144.79574584960938, "logps/rejected": -180.4136505126953, "loss": 0.4313, "rewards/accuracies": 0.0, "rewards/chosen": -4.1278839111328125, "rewards/margins": -0.31150364875793457, "rewards/rejected": -3.816380262374878, "step": 7848 }, { "epoch": 1.74, "learning_rate": 9.222138950417908e-06, "logits/chosen": -1.607766032218933, "logits/rejected": -1.607766032218933, "logps/chosen": -116.7086410522461, "logps/rejected": -116.7086410522461, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -6.4035210609436035, "rewards/margins": 0.0, "rewards/rejected": -6.4035210609436035, "step": 7849 }, { "epoch": 1.74, "learning_rate": 9.221178583863367e-06, "logits/chosen": -1.3637332916259766, "logits/rejected": -1.4199782609939575, "logps/chosen": -75.49430847167969, "logps/rejected": -77.77779388427734, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/chosen": -1.6836204528808594, "rewards/margins": 3.0056381225585938, "rewards/rejected": -4.689258575439453, "step": 7850 }, { "epoch": 1.74, "learning_rate": 9.220217674897524e-06, "logits/chosen": -1.2355443239212036, "logits/rejected": -1.3969697952270508, "logps/chosen": -225.94992065429688, "logps/rejected": -218.6328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.4347610473632812, "rewards/margins": 9.235939025878906, "rewards/rejected": -7.801177978515625, "step": 7851 }, { "epoch": 1.74, "learning_rate": 9.219256223643857e-06, "logits/chosen": -1.3717379570007324, "logits/rejected": -1.529391884803772, "logps/chosen": -264.9775390625, "logps/rejected": -205.50802612304688, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.129023790359497, "rewards/margins": 6.161554336547852, "rewards/rejected": -7.2905778884887695, "step": 7852 }, { "epoch": 1.74, "learning_rate": 9.218294230225908e-06, "logits/chosen": -1.3740395307540894, "logits/rejected": -1.547711968421936, "logps/chosen": -305.80169677734375, "logps/rejected": -183.11514282226562, "loss": 1.3329, "rewards/accuracies": 0.0, "rewards/chosen": -9.240615844726562, "rewards/margins": -2.5936574935913086, "rewards/rejected": -6.646958351135254, "step": 7853 }, { "epoch": 1.74, "learning_rate": 9.217331694767291e-06, "logits/chosen": -1.269191026687622, "logits/rejected": -1.0979424715042114, "logps/chosen": -221.4569549560547, "logps/rejected": -372.33746337890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.18281403183937073, "rewards/margins": 8.888526916503906, "rewards/rejected": -8.705713272094727, "step": 7854 }, { "epoch": 1.74, "learning_rate": 9.21636861739169e-06, "logits/chosen": -1.471086025238037, "logits/rejected": -1.4545754194259644, "logps/chosen": -297.70159912109375, "logps/rejected": -373.40509033203125, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -5.956780910491943, "rewards/margins": 3.197592258453369, "rewards/rejected": -9.154373168945312, "step": 7855 }, { "epoch": 1.74, "learning_rate": 9.215404998222856e-06, "logits/chosen": -1.7248555421829224, "logits/rejected": -1.8245964050292969, "logps/chosen": -94.72930145263672, "logps/rejected": -125.30731201171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.20537643134593964, "rewards/margins": 8.641355514526367, "rewards/rejected": -8.846732139587402, "step": 7856 }, { "epoch": 1.74, "learning_rate": 9.214440837384612e-06, "logits/chosen": -1.0878968238830566, "logits/rejected": -0.9517867565155029, "logps/chosen": -163.0723114013672, "logps/rejected": -224.3655242919922, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.12982331216335297, "rewards/margins": 10.787672996520996, "rewards/rejected": -10.657849311828613, "step": 7857 }, { "epoch": 1.74, "learning_rate": 9.213476135000853e-06, "logits/chosen": -0.9651362299919128, "logits/rejected": -1.0819566249847412, "logps/chosen": -219.6226806640625, "logps/rejected": -237.5528564453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.7481719851493835, "rewards/margins": 16.265256881713867, "rewards/rejected": -15.517084121704102, "step": 7858 }, { "epoch": 1.74, "learning_rate": 9.21251089119554e-06, "logits/chosen": -1.3411321640014648, "logits/rejected": -1.1959930658340454, "logps/chosen": -206.28207397460938, "logps/rejected": -290.6665344238281, "loss": 0.1538, "rewards/accuracies": 1.0, "rewards/chosen": 3.601579427719116, "rewards/margins": 7.544059753417969, "rewards/rejected": -3.9424805641174316, "step": 7859 }, { "epoch": 1.74, "learning_rate": 9.211545106092706e-06, "logits/chosen": -1.213283658027649, "logits/rejected": -1.2319327592849731, "logps/chosen": -187.9250030517578, "logps/rejected": -145.63153076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.84600830078125, "rewards/margins": 10.489530563354492, "rewards/rejected": -7.6435227394104, "step": 7860 }, { "epoch": 1.74, "learning_rate": 9.210578779816449e-06, "logits/chosen": -1.0860671997070312, "logits/rejected": -1.0125110149383545, "logps/chosen": -170.7593994140625, "logps/rejected": -250.6900177001953, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": 0.32213136553764343, "rewards/margins": 10.377714157104492, "rewards/rejected": -10.055583000183105, "step": 7861 }, { "epoch": 1.74, "learning_rate": 9.20961191249094e-06, "logits/chosen": -1.3790740966796875, "logits/rejected": -1.4128341674804688, "logps/chosen": -181.93890380859375, "logps/rejected": -195.7335205078125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -2.037318468093872, "rewards/margins": 4.813129425048828, "rewards/rejected": -6.850448131561279, "step": 7862 }, { "epoch": 1.74, "learning_rate": 9.208644504240418e-06, "logits/chosen": -1.561039686203003, "logits/rejected": -1.4562251567840576, "logps/chosen": -99.47515869140625, "logps/rejected": -259.083984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.1614669561386108, "rewards/margins": 7.764652729034424, "rewards/rejected": -8.926119804382324, "step": 7863 }, { "epoch": 1.74, "learning_rate": 9.207676555189196e-06, "logits/chosen": -1.2896114587783813, "logits/rejected": -1.2680416107177734, "logps/chosen": -109.63104248046875, "logps/rejected": -196.07135009765625, "loss": 0.3493, "rewards/accuracies": 1.0, "rewards/chosen": -0.3694442808628082, "rewards/margins": 5.190768241882324, "rewards/rejected": -5.5602126121521, "step": 7864 }, { "epoch": 1.74, "learning_rate": 9.206708065461652e-06, "logits/chosen": -1.6383906602859497, "logits/rejected": -1.6431896686553955, "logps/chosen": -110.60527038574219, "logps/rejected": -115.74344635009766, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -1.3158692121505737, "rewards/margins": 3.613758087158203, "rewards/rejected": -4.929627418518066, "step": 7865 }, { "epoch": 1.74, "learning_rate": 9.205739035182236e-06, "logits/chosen": -1.2847166061401367, "logits/rejected": -1.3304146528244019, "logps/chosen": -108.14690399169922, "logps/rejected": -116.16230773925781, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.8527275323867798, "rewards/margins": 7.1717047691345215, "rewards/rejected": -8.024432182312012, "step": 7866 }, { "epoch": 1.74, "learning_rate": 9.204769464475462e-06, "logits/chosen": -1.526238203048706, "logits/rejected": -1.2816951274871826, "logps/chosen": -166.66497802734375, "logps/rejected": -431.5540771484375, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": 3.471449375152588, "rewards/margins": 18.821748733520508, "rewards/rejected": -15.350298881530762, "step": 7867 }, { "epoch": 1.74, "learning_rate": 9.20379935346592e-06, "logits/chosen": -1.0522074699401855, "logits/rejected": -0.9869996309280396, "logps/chosen": -125.27665710449219, "logps/rejected": -210.935302734375, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": 0.4965072572231293, "rewards/margins": 12.010897636413574, "rewards/rejected": -11.514389991760254, "step": 7868 }, { "epoch": 1.74, "learning_rate": 9.202828702278265e-06, "logits/chosen": -1.2773141860961914, "logits/rejected": -1.2773141860961914, "logps/chosen": -127.97767639160156, "logps/rejected": -127.97767639160156, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -3.4265878200531006, "rewards/margins": 0.0, "rewards/rejected": -3.4265878200531006, "step": 7869 }, { "epoch": 1.74, "learning_rate": 9.201857511037228e-06, "logits/chosen": -1.2809327840805054, "logits/rejected": -1.3069698810577393, "logps/chosen": -55.93787384033203, "logps/rejected": -70.62319946289062, "loss": 0.0478, "rewards/accuracies": 1.0, "rewards/chosen": -0.19965820014476776, "rewards/margins": 2.3602561950683594, "rewards/rejected": -2.5599143505096436, "step": 7870 }, { "epoch": 1.74, "learning_rate": 9.200885779867601e-06, "logits/chosen": -1.6154615879058838, "logits/rejected": -1.6154615879058838, "logps/chosen": -91.07085418701172, "logps/rejected": -91.07085418701172, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -2.077327013015747, "rewards/margins": 0.0, "rewards/rejected": -2.077327013015747, "step": 7871 }, { "epoch": 1.74, "learning_rate": 9.199913508894251e-06, "logits/chosen": -1.660914421081543, "logits/rejected": -1.7118066549301147, "logps/chosen": -197.26937866210938, "logps/rejected": -188.60610961914062, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 0.5596588253974915, "rewards/margins": 5.978369235992432, "rewards/rejected": -5.418710231781006, "step": 7872 }, { "epoch": 1.74, "learning_rate": 9.198940698242108e-06, "logits/chosen": -1.4830065965652466, "logits/rejected": -1.4259239435195923, "logps/chosen": -122.13963317871094, "logps/rejected": -205.98622131347656, "loss": 0.1201, "rewards/accuracies": 1.0, "rewards/chosen": -1.9968376159667969, "rewards/margins": 1.6839592456817627, "rewards/rejected": -3.6807968616485596, "step": 7873 }, { "epoch": 1.74, "learning_rate": 9.197967348036182e-06, "logits/chosen": -1.0255956649780273, "logits/rejected": -0.7693133354187012, "logps/chosen": -144.62135314941406, "logps/rejected": -364.4080810546875, "loss": 1.6608, "rewards/accuracies": 1.0, "rewards/chosen": 0.5278976559638977, "rewards/margins": 20.028717041015625, "rewards/rejected": -19.50082015991211, "step": 7874 }, { "epoch": 1.74, "learning_rate": 9.196993458401544e-06, "logits/chosen": -1.2193108797073364, "logits/rejected": -1.06255042552948, "logps/chosen": -51.40607452392578, "logps/rejected": -234.23980712890625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.14692460000514984, "rewards/margins": 10.382041931152344, "rewards/rejected": -10.235116958618164, "step": 7875 }, { "epoch": 1.74, "learning_rate": 9.196019029463335e-06, "logits/chosen": -1.4201353788375854, "logits/rejected": -1.5102410316467285, "logps/chosen": -255.6756591796875, "logps/rejected": -144.66897583007812, "loss": 0.6502, "rewards/accuracies": 0.0, "rewards/chosen": -3.232133626937866, "rewards/margins": -0.9822671413421631, "rewards/rejected": -2.249866485595703, "step": 7876 }, { "epoch": 1.74, "learning_rate": 9.195044061346767e-06, "logits/chosen": -1.4526162147521973, "logits/rejected": -1.47933030128479, "logps/chosen": -125.57508850097656, "logps/rejected": -224.69400024414062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6761817932128906, "rewards/margins": 12.11122989654541, "rewards/rejected": -12.7874116897583, "step": 7877 }, { "epoch": 1.74, "learning_rate": 9.194068554177123e-06, "logits/chosen": -1.3680334091186523, "logits/rejected": -1.3582022190093994, "logps/chosen": -195.9498748779297, "logps/rejected": -167.35379028320312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.908744812011719, "rewards/margins": 8.478041648864746, "rewards/rejected": -13.386786460876465, "step": 7878 }, { "epoch": 1.74, "learning_rate": 9.19309250807975e-06, "logits/chosen": -1.447965383529663, "logits/rejected": -1.546390175819397, "logps/chosen": -201.40704345703125, "logps/rejected": -164.45205688476562, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 3.6768829822540283, "rewards/margins": 8.455979347229004, "rewards/rejected": -4.7790961265563965, "step": 7879 }, { "epoch": 1.74, "learning_rate": 9.192115923180071e-06, "logits/chosen": -1.3170909881591797, "logits/rejected": -1.3884122371673584, "logps/chosen": -324.67022705078125, "logps/rejected": -202.9379425048828, "loss": 1.2235, "rewards/accuracies": 0.0, "rewards/chosen": -8.542157173156738, "rewards/margins": -2.337521553039551, "rewards/rejected": -6.2046356201171875, "step": 7880 }, { "epoch": 1.74, "learning_rate": 9.191138799603574e-06, "logits/chosen": -1.3114850521087646, "logits/rejected": -1.179836392402649, "logps/chosen": -113.14596557617188, "logps/rejected": -237.8494873046875, "loss": 0.2155, "rewards/accuracies": 1.0, "rewards/chosen": 0.07154693454504013, "rewards/margins": 0.6304183602333069, "rewards/rejected": -0.5588714480400085, "step": 7881 }, { "epoch": 1.74, "learning_rate": 9.190161137475814e-06, "logits/chosen": -1.4238075017929077, "logits/rejected": -1.4238075017929077, "logps/chosen": -96.19195556640625, "logps/rejected": -96.19195556640625, "loss": 0.3538, "rewards/accuracies": 0.0, "rewards/chosen": -4.760612487792969, "rewards/margins": 0.0, "rewards/rejected": -4.760612487792969, "step": 7882 }, { "epoch": 1.74, "learning_rate": 9.189182936922424e-06, "logits/chosen": -1.0760451555252075, "logits/rejected": -1.1390724182128906, "logps/chosen": -206.4510040283203, "logps/rejected": -123.2526626586914, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 0.02780914306640625, "rewards/margins": 4.287552833557129, "rewards/rejected": -4.259743690490723, "step": 7883 }, { "epoch": 1.75, "learning_rate": 9.188204198069096e-06, "logits/chosen": -1.229828953742981, "logits/rejected": -1.317280650138855, "logps/chosen": -148.78598022460938, "logps/rejected": -200.02029418945312, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 3.2838075160980225, "rewards/margins": 10.573144912719727, "rewards/rejected": -7.289337158203125, "step": 7884 }, { "epoch": 1.75, "learning_rate": 9.187224921041595e-06, "logits/chosen": -1.1004303693771362, "logits/rejected": -1.013476848602295, "logps/chosen": -177.2649688720703, "logps/rejected": -288.85528564453125, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": 2.4526994228363037, "rewards/margins": 9.712552070617676, "rewards/rejected": -7.259852886199951, "step": 7885 }, { "epoch": 1.75, "learning_rate": 9.186245105965758e-06, "logits/chosen": -1.392768144607544, "logits/rejected": -1.434989333152771, "logps/chosen": -337.254150390625, "logps/rejected": -268.33038330078125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -5.204003810882568, "rewards/margins": 4.529595851898193, "rewards/rejected": -9.733599662780762, "step": 7886 }, { "epoch": 1.75, "learning_rate": 9.18526475296749e-06, "logits/chosen": -1.1315025091171265, "logits/rejected": -1.0439046621322632, "logps/chosen": -174.20809936523438, "logps/rejected": -197.45758056640625, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -4.219598293304443, "rewards/margins": 3.737490177154541, "rewards/rejected": -7.957088470458984, "step": 7887 }, { "epoch": 1.75, "learning_rate": 9.184283862172763e-06, "logits/chosen": -1.4065711498260498, "logits/rejected": -1.4065711498260498, "logps/chosen": -186.24240112304688, "logps/rejected": -186.24240112304688, "loss": 0.4082, "rewards/accuracies": 0.0, "rewards/chosen": -8.586397171020508, "rewards/margins": 0.0, "rewards/rejected": -8.586397171020508, "step": 7888 }, { "epoch": 1.75, "learning_rate": 9.183302433707616e-06, "logits/chosen": -1.2419238090515137, "logits/rejected": -1.2786091566085815, "logps/chosen": -185.8633270263672, "logps/rejected": -228.6287078857422, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.7767136096954346, "rewards/margins": 12.970125198364258, "rewards/rejected": -10.193411827087402, "step": 7889 }, { "epoch": 1.75, "learning_rate": 9.182320467698164e-06, "logits/chosen": -1.689576268196106, "logits/rejected": -1.6902034282684326, "logps/chosen": -97.37739562988281, "logps/rejected": -211.1825714111328, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.142878770828247, "rewards/margins": 5.801565170288086, "rewards/rejected": -7.944444179534912, "step": 7890 }, { "epoch": 1.75, "learning_rate": 9.181337964270585e-06, "logits/chosen": -1.3653370141983032, "logits/rejected": -1.3264226913452148, "logps/chosen": -95.15609741210938, "logps/rejected": -149.16986083984375, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": 0.44553452730178833, "rewards/margins": 4.831404209136963, "rewards/rejected": -4.38586950302124, "step": 7891 }, { "epoch": 1.75, "learning_rate": 9.180354923551129e-06, "logits/chosen": -1.3121918439865112, "logits/rejected": -1.1745827198028564, "logps/chosen": -197.0887451171875, "logps/rejected": -338.0115966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.0086501836776733, "rewards/margins": 12.035758972167969, "rewards/rejected": -11.027109146118164, "step": 7892 }, { "epoch": 1.75, "learning_rate": 9.179371345666115e-06, "logits/chosen": -1.0060762166976929, "logits/rejected": -0.9544686079025269, "logps/chosen": -84.46334838867188, "logps/rejected": -147.7323760986328, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1920547485351562, "rewards/margins": 4.167436122894287, "rewards/rejected": -5.359490871429443, "step": 7893 }, { "epoch": 1.75, "learning_rate": 9.178387230741932e-06, "logits/chosen": -1.5823811292648315, "logits/rejected": -1.5563210248947144, "logps/chosen": -205.07962036132812, "logps/rejected": -281.2692565917969, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 4.004127502441406, "rewards/margins": 6.113935947418213, "rewards/rejected": -2.1098084449768066, "step": 7894 }, { "epoch": 1.75, "learning_rate": 9.177402578905032e-06, "logits/chosen": -1.3637580871582031, "logits/rejected": -1.383323311805725, "logps/chosen": -124.62559509277344, "logps/rejected": -108.32865905761719, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -2.3476028442382812, "rewards/margins": 4.5109453201293945, "rewards/rejected": -6.858548164367676, "step": 7895 }, { "epoch": 1.75, "learning_rate": 9.176417390281944e-06, "logits/chosen": -1.3541173934936523, "logits/rejected": -0.7054269909858704, "logps/chosen": -333.55023193359375, "logps/rejected": -744.4764404296875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -18.72511863708496, "rewards/margins": 34.344940185546875, "rewards/rejected": -53.0700569152832, "step": 7896 }, { "epoch": 1.75, "learning_rate": 9.17543166499926e-06, "logits/chosen": -1.0094449520111084, "logits/rejected": -1.0140544176101685, "logps/chosen": -105.94407653808594, "logps/rejected": -122.70909118652344, "loss": 0.1175, "rewards/accuracies": 1.0, "rewards/chosen": -0.5193085074424744, "rewards/margins": 2.637908935546875, "rewards/rejected": -3.157217502593994, "step": 7897 }, { "epoch": 1.75, "learning_rate": 9.174445403183645e-06, "logits/chosen": -1.7137072086334229, "logits/rejected": -1.6904579401016235, "logps/chosen": -97.97562408447266, "logps/rejected": -185.66030883789062, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.135199785232544, "rewards/margins": 3.946096181869507, "rewards/rejected": -5.081295967102051, "step": 7898 }, { "epoch": 1.75, "learning_rate": 9.173458604961832e-06, "logits/chosen": -1.1132396459579468, "logits/rejected": -1.0415045022964478, "logps/chosen": -210.21795654296875, "logps/rejected": -201.40167236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.6235992908477783, "rewards/margins": 9.981927871704102, "rewards/rejected": -6.358328342437744, "step": 7899 }, { "epoch": 1.75, "learning_rate": 9.17247127046062e-06, "logits/chosen": -1.4718793630599976, "logits/rejected": -1.4670802354812622, "logps/chosen": -98.09626007080078, "logps/rejected": -156.0826873779297, "loss": 0.1469, "rewards/accuracies": 1.0, "rewards/chosen": 0.6516899466514587, "rewards/margins": 8.397502899169922, "rewards/rejected": -7.745812892913818, "step": 7900 }, { "epoch": 1.75, "learning_rate": 9.17148339980688e-06, "logits/chosen": -1.641729712486267, "logits/rejected": -1.5231603384017944, "logps/chosen": -101.31257629394531, "logps/rejected": -252.58645629882812, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.6771568655967712, "rewards/margins": 7.340854644775391, "rewards/rejected": -6.663697719573975, "step": 7901 }, { "epoch": 1.75, "learning_rate": 9.170494993127552e-06, "logits/chosen": -1.4572608470916748, "logits/rejected": -1.5042684078216553, "logps/chosen": -193.23944091796875, "logps/rejected": -143.71945190429688, "loss": 0.1039, "rewards/accuracies": 1.0, "rewards/chosen": -4.35860013961792, "rewards/margins": 2.1381468772888184, "rewards/rejected": -6.496747016906738, "step": 7902 }, { "epoch": 1.75, "learning_rate": 9.169506050549641e-06, "logits/chosen": -1.401909351348877, "logits/rejected": -1.3914252519607544, "logps/chosen": -125.27606201171875, "logps/rejected": -165.60208129882812, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -1.7545089721679688, "rewards/margins": 3.113861083984375, "rewards/rejected": -4.868370056152344, "step": 7903 }, { "epoch": 1.75, "learning_rate": 9.168516572200227e-06, "logits/chosen": -1.230086088180542, "logits/rejected": -1.230086088180542, "logps/chosen": -214.35134887695312, "logps/rejected": -214.35134887695312, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -8.126520156860352, "rewards/margins": 0.0, "rewards/rejected": -8.126520156860352, "step": 7904 }, { "epoch": 1.75, "learning_rate": 9.167526558206455e-06, "logits/chosen": -1.1987452507019043, "logits/rejected": -1.2882826328277588, "logps/chosen": -193.20770263671875, "logps/rejected": -99.96080017089844, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": 1.2945343255996704, "rewards/margins": 3.4927492141723633, "rewards/rejected": -2.1982147693634033, "step": 7905 }, { "epoch": 1.75, "learning_rate": 9.166536008695536e-06, "logits/chosen": -1.2649480104446411, "logits/rejected": -1.2730445861816406, "logps/chosen": -51.125877380371094, "logps/rejected": -45.899112701416016, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -0.9612602591514587, "rewards/margins": 2.654946804046631, "rewards/rejected": -3.6162071228027344, "step": 7906 }, { "epoch": 1.75, "learning_rate": 9.165544923794758e-06, "logits/chosen": -1.1523078680038452, "logits/rejected": -1.136038064956665, "logps/chosen": -199.95669555664062, "logps/rejected": -130.75021362304688, "loss": 0.2345, "rewards/accuracies": 1.0, "rewards/chosen": -3.1460235118865967, "rewards/margins": 0.5133590698242188, "rewards/rejected": -3.6593825817108154, "step": 7907 }, { "epoch": 1.75, "learning_rate": 9.164553303631472e-06, "logits/chosen": -1.778432011604309, "logits/rejected": -1.5667427778244019, "logps/chosen": -92.19398498535156, "logps/rejected": -358.675048828125, "loss": 0.1224, "rewards/accuracies": 1.0, "rewards/chosen": -1.3979682922363281, "rewards/margins": 6.515993595123291, "rewards/rejected": -7.913961887359619, "step": 7908 }, { "epoch": 1.75, "learning_rate": 9.163561148333097e-06, "logits/chosen": -1.3327981233596802, "logits/rejected": -1.3906008005142212, "logps/chosen": -224.039794921875, "logps/rejected": -216.37413024902344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.3463897705078125, "rewards/margins": 7.958464622497559, "rewards/rejected": -8.304854393005371, "step": 7909 }, { "epoch": 1.75, "learning_rate": 9.162568458027122e-06, "logits/chosen": -1.524776577949524, "logits/rejected": -1.6256611347198486, "logps/chosen": -246.4151153564453, "logps/rejected": -274.4332580566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.8018600940704346, "rewards/margins": 17.559715270996094, "rewards/rejected": -14.757855415344238, "step": 7910 }, { "epoch": 1.75, "learning_rate": 9.16157523284111e-06, "logits/chosen": -1.3287630081176758, "logits/rejected": -1.2605164051055908, "logps/chosen": -72.7359390258789, "logps/rejected": -90.3970947265625, "loss": 0.1252, "rewards/accuracies": 1.0, "rewards/chosen": 0.9333267211914062, "rewards/margins": 1.3032630681991577, "rewards/rejected": -0.36993637681007385, "step": 7911 }, { "epoch": 1.75, "learning_rate": 9.16058147290268e-06, "logits/chosen": -1.6499601602554321, "logits/rejected": -1.680467963218689, "logps/chosen": -166.64898681640625, "logps/rejected": -166.5489501953125, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": 0.5878540277481079, "rewards/margins": 4.679186820983887, "rewards/rejected": -4.091332912445068, "step": 7912 }, { "epoch": 1.75, "learning_rate": 9.159587178339535e-06, "logits/chosen": -1.0275934934616089, "logits/rejected": -0.9325190186500549, "logps/chosen": -148.233642578125, "logps/rejected": -228.78404235839844, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -3.7548744678497314, "rewards/margins": 6.079196929931641, "rewards/rejected": -9.834071159362793, "step": 7913 }, { "epoch": 1.75, "learning_rate": 9.158592349279439e-06, "logits/chosen": -1.477577567100525, "logits/rejected": -1.3486381769180298, "logps/chosen": -64.03376007080078, "logps/rejected": -200.94448852539062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.5834197998046875, "rewards/margins": 8.673036575317383, "rewards/rejected": -8.089616775512695, "step": 7914 }, { "epoch": 1.75, "learning_rate": 9.157596985850218e-06, "logits/chosen": -1.4949482679367065, "logits/rejected": -1.4876900911331177, "logps/chosen": -141.06488037109375, "logps/rejected": -145.29466247558594, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": 0.0017211914528161287, "rewards/margins": 4.396224498748779, "rewards/rejected": -4.394503116607666, "step": 7915 }, { "epoch": 1.75, "learning_rate": 9.156601088179785e-06, "logits/chosen": -1.21244478225708, "logits/rejected": -1.2831997871398926, "logps/chosen": -279.326171875, "logps/rejected": -190.52728271484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.01934814453125, "rewards/margins": 6.900832653045654, "rewards/rejected": -6.881484508514404, "step": 7916 }, { "epoch": 1.75, "learning_rate": 9.1556046563961e-06, "logits/chosen": -1.602575659751892, "logits/rejected": -1.5010024309158325, "logps/chosen": -149.3716278076172, "logps/rejected": -221.84161376953125, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -1.2830002307891846, "rewards/margins": 3.205517530441284, "rewards/rejected": -4.488517761230469, "step": 7917 }, { "epoch": 1.75, "learning_rate": 9.154607690627207e-06, "logits/chosen": -1.002584457397461, "logits/rejected": -0.8270675539970398, "logps/chosen": -95.86099243164062, "logps/rejected": -237.73524475097656, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -3.3473031520843506, "rewards/margins": 7.137653350830078, "rewards/rejected": -10.484956741333008, "step": 7918 }, { "epoch": 1.75, "learning_rate": 9.153610191001214e-06, "logits/chosen": -1.595260500907898, "logits/rejected": -0.7364833354949951, "logps/chosen": -80.99620056152344, "logps/rejected": -951.4735717773438, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.0438530445098877, "rewards/margins": 79.43172454833984, "rewards/rejected": -80.47557830810547, "step": 7919 }, { "epoch": 1.75, "learning_rate": 9.152612157646297e-06, "logits/chosen": -1.3231173753738403, "logits/rejected": -1.2984081506729126, "logps/chosen": -104.7928466796875, "logps/rejected": -205.55520629882812, "loss": 0.0454, "rewards/accuracies": 1.0, "rewards/chosen": -1.020394206047058, "rewards/margins": 4.573366641998291, "rewards/rejected": -5.593760967254639, "step": 7920 }, { "epoch": 1.75, "learning_rate": 9.1516135906907e-06, "logits/chosen": -1.4276057481765747, "logits/rejected": -1.1422483921051025, "logps/chosen": -55.32722091674805, "logps/rejected": -527.005126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.9953808188438416, "rewards/margins": 26.379261016845703, "rewards/rejected": -25.383880615234375, "step": 7921 }, { "epoch": 1.75, "learning_rate": 9.150614490262736e-06, "logits/chosen": -1.6375216245651245, "logits/rejected": -1.631504774093628, "logps/chosen": -89.0149917602539, "logps/rejected": -113.77713012695312, "loss": 0.2148, "rewards/accuracies": 1.0, "rewards/chosen": -1.3308982849121094, "rewards/margins": 3.1451430320739746, "rewards/rejected": -4.476041316986084, "step": 7922 }, { "epoch": 1.75, "learning_rate": 9.149614856490788e-06, "logits/chosen": -1.5463683605194092, "logits/rejected": -1.5463683605194092, "logps/chosen": -196.31573486328125, "logps/rejected": -196.31573486328125, "loss": 0.3469, "rewards/accuracies": 0.0, "rewards/chosen": 0.722442626953125, "rewards/margins": 0.0, "rewards/rejected": 0.722442626953125, "step": 7923 }, { "epoch": 1.75, "learning_rate": 9.148614689503307e-06, "logits/chosen": -1.2805085182189941, "logits/rejected": -1.2457060813903809, "logps/chosen": -147.90744018554688, "logps/rejected": -136.88211059570312, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.30273133516311646, "rewards/margins": 5.971102714538574, "rewards/rejected": -6.273834228515625, "step": 7924 }, { "epoch": 1.75, "learning_rate": 9.147613989428809e-06, "logits/chosen": -1.326392650604248, "logits/rejected": -1.3989824056625366, "logps/chosen": -264.76593017578125, "logps/rejected": -164.5526885986328, "loss": 0.3129, "rewards/accuracies": 1.0, "rewards/chosen": 2.2760589122772217, "rewards/margins": 0.14802074432373047, "rewards/rejected": 2.128038167953491, "step": 7925 }, { "epoch": 1.75, "learning_rate": 9.146612756395888e-06, "logits/chosen": -1.4425835609436035, "logits/rejected": -1.4358901977539062, "logps/chosen": -161.11264038085938, "logps/rejected": -215.6278533935547, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.7837876081466675, "rewards/margins": 4.951623439788818, "rewards/rejected": -6.735411167144775, "step": 7926 }, { "epoch": 1.75, "learning_rate": 9.145610990533193e-06, "logits/chosen": -1.240157127380371, "logits/rejected": -1.3141756057739258, "logps/chosen": -192.1283721923828, "logps/rejected": -86.1759033203125, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/chosen": 1.3197555541992188, "rewards/margins": 2.360842227935791, "rewards/rejected": -1.0410865545272827, "step": 7927 }, { "epoch": 1.75, "learning_rate": 9.144608691969452e-06, "logits/chosen": -1.1699895858764648, "logits/rejected": -1.1699895858764648, "logps/chosen": -110.62336730957031, "logps/rejected": -110.62336730957031, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.930335521697998, "rewards/margins": 0.0, "rewards/rejected": -4.930335521697998, "step": 7928 }, { "epoch": 1.75, "learning_rate": 9.143605860833459e-06, "logits/chosen": -1.434658408164978, "logits/rejected": -1.400916576385498, "logps/chosen": -110.23221588134766, "logps/rejected": -198.70892333984375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.3511192500591278, "rewards/margins": 6.228687763214111, "rewards/rejected": -6.579806804656982, "step": 7929 }, { "epoch": 1.76, "learning_rate": 9.142602497254071e-06, "logits/chosen": -1.4708189964294434, "logits/rejected": -1.4609770774841309, "logps/chosen": -106.42130279541016, "logps/rejected": -107.99003601074219, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/chosen": -0.7950050234794617, "rewards/margins": 2.749803066253662, "rewards/rejected": -3.5448081493377686, "step": 7930 }, { "epoch": 1.76, "learning_rate": 9.141598601360225e-06, "logits/chosen": -1.1301380395889282, "logits/rejected": -1.1019926071166992, "logps/chosen": -118.28651428222656, "logps/rejected": -254.64877319335938, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.235815405845642, "rewards/margins": 9.178427696228027, "rewards/rejected": -10.4142427444458, "step": 7931 }, { "epoch": 1.76, "learning_rate": 9.14059417328091e-06, "logits/chosen": -1.5390819311141968, "logits/rejected": -1.442147135734558, "logps/chosen": -164.92425537109375, "logps/rejected": -213.3008575439453, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 0.4613815248012543, "rewards/margins": 5.927227973937988, "rewards/rejected": -5.465846538543701, "step": 7932 }, { "epoch": 1.76, "learning_rate": 9.139589213145202e-06, "logits/chosen": -1.630425214767456, "logits/rejected": -1.6058213710784912, "logps/chosen": -123.52484130859375, "logps/rejected": -180.4266357421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.9579925537109375, "rewards/margins": 7.3782854080200195, "rewards/rejected": -6.420292854309082, "step": 7933 }, { "epoch": 1.76, "learning_rate": 9.138583721082229e-06, "logits/chosen": -1.602597951889038, "logits/rejected": -1.602597951889038, "logps/chosen": -217.2125244140625, "logps/rejected": -217.2125244140625, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -9.979281425476074, "rewards/margins": 0.0, "rewards/rejected": -9.979281425476074, "step": 7934 }, { "epoch": 1.76, "learning_rate": 9.137577697221195e-06, "logits/chosen": -1.485518455505371, "logits/rejected": -1.4036208391189575, "logps/chosen": -105.15387725830078, "logps/rejected": -203.5753631591797, "loss": 0.1704, "rewards/accuracies": 1.0, "rewards/chosen": -3.4417176246643066, "rewards/margins": 0.9103517532348633, "rewards/rejected": -4.35206937789917, "step": 7935 }, { "epoch": 1.76, "learning_rate": 9.136571141691376e-06, "logits/chosen": -1.3508052825927734, "logits/rejected": -1.305752158164978, "logps/chosen": -87.23699951171875, "logps/rejected": -197.34451293945312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0053421258926392, "rewards/margins": 8.700360298156738, "rewards/rejected": -9.705702781677246, "step": 7936 }, { "epoch": 1.76, "learning_rate": 9.135564054622108e-06, "logits/chosen": -1.419986367225647, "logits/rejected": -1.494414210319519, "logps/chosen": -116.97596740722656, "logps/rejected": -173.38037109375, "loss": 0.0749, "rewards/accuracies": 1.0, "rewards/chosen": -2.5454957485198975, "rewards/margins": 2.415562391281128, "rewards/rejected": -4.961058139801025, "step": 7937 }, { "epoch": 1.76, "learning_rate": 9.134556436142801e-06, "logits/chosen": -1.5584042072296143, "logits/rejected": -1.461266040802002, "logps/chosen": -102.68377685546875, "logps/rejected": -246.6488037109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6569107174873352, "rewards/margins": 7.606880187988281, "rewards/rejected": -8.26379108428955, "step": 7938 }, { "epoch": 1.76, "learning_rate": 9.133548286382932e-06, "logits/chosen": -1.2472045421600342, "logits/rejected": -1.3301242589950562, "logps/chosen": -187.14427185058594, "logps/rejected": -110.14582824707031, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": 0.4790969789028168, "rewards/margins": 2.994643449783325, "rewards/rejected": -2.5155465602874756, "step": 7939 }, { "epoch": 1.76, "learning_rate": 9.132539605472044e-06, "logits/chosen": -1.4973350763320923, "logits/rejected": -1.5331147909164429, "logps/chosen": -97.21406555175781, "logps/rejected": -220.0380401611328, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8806167840957642, "rewards/margins": 6.461955547332764, "rewards/rejected": -7.342572212219238, "step": 7940 }, { "epoch": 1.76, "learning_rate": 9.131530393539752e-06, "logits/chosen": -1.4129889011383057, "logits/rejected": -1.3924845457077026, "logps/chosen": -87.20980072021484, "logps/rejected": -156.32147216796875, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -3.7433624267578125, "rewards/margins": 4.088343143463135, "rewards/rejected": -7.831705570220947, "step": 7941 }, { "epoch": 1.76, "learning_rate": 9.130520650715735e-06, "logits/chosen": -1.2565436363220215, "logits/rejected": -1.2859179973602295, "logps/chosen": -96.36332702636719, "logps/rejected": -153.7884521484375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9390465021133423, "rewards/margins": 6.076298713684082, "rewards/rejected": -7.015345096588135, "step": 7942 }, { "epoch": 1.76, "learning_rate": 9.129510377129745e-06, "logits/chosen": -1.4645799398422241, "logits/rejected": -1.4112343788146973, "logps/chosen": -111.75440979003906, "logps/rejected": -216.28936767578125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8906486630439758, "rewards/margins": 6.732264041900635, "rewards/rejected": -7.622912883758545, "step": 7943 }, { "epoch": 1.76, "learning_rate": 9.128499572911596e-06, "logits/chosen": -1.4481574296951294, "logits/rejected": -1.4062947034835815, "logps/chosen": -97.68514251708984, "logps/rejected": -178.67495727539062, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": -1.1620186567306519, "rewards/margins": 2.15023136138916, "rewards/rejected": -3.3122498989105225, "step": 7944 }, { "epoch": 1.76, "learning_rate": 9.12748823819118e-06, "logits/chosen": -1.3896623849868774, "logits/rejected": -1.3014960289001465, "logps/chosen": -257.9382019042969, "logps/rejected": -253.2530059814453, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0215485095977783, "rewards/margins": 5.7965898513793945, "rewards/rejected": -6.818138122558594, "step": 7945 }, { "epoch": 1.76, "learning_rate": 9.126476373098446e-06, "logits/chosen": -1.4298571348190308, "logits/rejected": -1.4370664358139038, "logps/chosen": -107.66100311279297, "logps/rejected": -151.7914276123047, "loss": 0.1142, "rewards/accuracies": 1.0, "rewards/chosen": -0.9085403680801392, "rewards/margins": 1.360620141029358, "rewards/rejected": -2.269160509109497, "step": 7946 }, { "epoch": 1.76, "learning_rate": 9.125463977763417e-06, "logits/chosen": -1.6050982475280762, "logits/rejected": -1.6062555313110352, "logps/chosen": -131.16647338867188, "logps/rejected": -149.24896240234375, "loss": 0.1124, "rewards/accuracies": 1.0, "rewards/chosen": -2.2328948974609375, "rewards/margins": 3.6110682487487793, "rewards/rejected": -5.843963146209717, "step": 7947 }, { "epoch": 1.76, "learning_rate": 9.124451052316185e-06, "logits/chosen": -1.241390585899353, "logits/rejected": -1.3394217491149902, "logps/chosen": -138.75596618652344, "logps/rejected": -127.35931396484375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.840895891189575, "rewards/margins": 6.041184425354004, "rewards/rejected": -9.882080078125, "step": 7948 }, { "epoch": 1.76, "learning_rate": 9.123437596886909e-06, "logits/chosen": -1.2601933479309082, "logits/rejected": -1.2754031419754028, "logps/chosen": -118.99022674560547, "logps/rejected": -213.03321838378906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.199125051498413, "rewards/margins": 7.814180374145508, "rewards/rejected": -10.0133056640625, "step": 7949 }, { "epoch": 1.76, "learning_rate": 9.122423611605814e-06, "logits/chosen": -1.3693039417266846, "logits/rejected": -1.2987157106399536, "logps/chosen": -260.2196960449219, "logps/rejected": -228.6762237548828, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": 1.6444183588027954, "rewards/margins": 4.682445049285889, "rewards/rejected": -3.0380265712738037, "step": 7950 }, { "epoch": 1.76, "learning_rate": 9.121409096603193e-06, "logits/chosen": -1.0465853214263916, "logits/rejected": -1.0465041399002075, "logps/chosen": -70.50497436523438, "logps/rejected": -138.2239990234375, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": 0.7193504571914673, "rewards/margins": 4.420712947845459, "rewards/rejected": -3.7013626098632812, "step": 7951 }, { "epoch": 1.76, "learning_rate": 9.120394052009412e-06, "logits/chosen": -1.602682113647461, "logits/rejected": -1.5771273374557495, "logps/chosen": -180.916015625, "logps/rejected": -310.4945373535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.8469834327697754, "rewards/margins": 10.808812141418457, "rewards/rejected": -7.961828708648682, "step": 7952 }, { "epoch": 1.76, "learning_rate": 9.1193784779549e-06, "logits/chosen": -1.7583197355270386, "logits/rejected": -1.802066445350647, "logps/chosen": -247.17991638183594, "logps/rejected": -174.85594177246094, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.6747864484786987, "rewards/margins": 4.369074821472168, "rewards/rejected": -6.043861389160156, "step": 7953 }, { "epoch": 1.76, "learning_rate": 9.118362374570158e-06, "logits/chosen": -1.3938084840774536, "logits/rejected": -1.339638352394104, "logps/chosen": -104.639892578125, "logps/rejected": -120.42276763916016, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.09987258911132812, "rewards/margins": 3.5461747646331787, "rewards/rejected": -3.646047353744507, "step": 7954 }, { "epoch": 1.76, "learning_rate": 9.117345741985749e-06, "logits/chosen": -1.1892979145050049, "logits/rejected": -0.21091051399707794, "logps/chosen": -164.06997680664062, "logps/rejected": -693.4085693359375, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -2.0355470180511475, "rewards/margins": 57.73067092895508, "rewards/rejected": -59.76621627807617, "step": 7955 }, { "epoch": 1.76, "learning_rate": 9.116328580332309e-06, "logits/chosen": -1.0601369142532349, "logits/rejected": -1.1739944219589233, "logps/chosen": -216.9454345703125, "logps/rejected": -100.20079040527344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.9803924560546875, "rewards/margins": 9.234882354736328, "rewards/rejected": -7.254489421844482, "step": 7956 }, { "epoch": 1.76, "learning_rate": 9.115310889740545e-06, "logits/chosen": -0.9476814866065979, "logits/rejected": -0.8821645975112915, "logps/chosen": -137.57711791992188, "logps/rejected": -200.00863647460938, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.4481155574321747, "rewards/margins": 7.203358173370361, "rewards/rejected": -6.755242824554443, "step": 7957 }, { "epoch": 1.76, "learning_rate": 9.114292670341222e-06, "logits/chosen": -1.1608675718307495, "logits/rejected": -1.1608675718307495, "logps/chosen": -137.59091186523438, "logps/rejected": -137.59091186523438, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -11.045969009399414, "rewards/margins": 0.0, "rewards/rejected": -11.045969009399414, "step": 7958 }, { "epoch": 1.76, "learning_rate": 9.113273922265183e-06, "logits/chosen": -1.494591474533081, "logits/rejected": -1.4469703435897827, "logps/chosen": -89.08980560302734, "logps/rejected": -176.42295837402344, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.8349632024765015, "rewards/margins": 5.389213562011719, "rewards/rejected": -7.22417688369751, "step": 7959 }, { "epoch": 1.76, "learning_rate": 9.112254645643332e-06, "logits/chosen": -0.8192040324211121, "logits/rejected": -0.8890413045883179, "logps/chosen": -278.75457763671875, "logps/rejected": -400.3433837890625, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": 0.6231719851493835, "rewards/margins": 24.659137725830078, "rewards/rejected": -24.035964965820312, "step": 7960 }, { "epoch": 1.76, "learning_rate": 9.111234840606647e-06, "logits/chosen": -1.015418529510498, "logits/rejected": -1.0212703943252563, "logps/chosen": -87.20571899414062, "logps/rejected": -91.21635437011719, "loss": 0.4719, "rewards/accuracies": 0.0, "rewards/chosen": -2.4624359607696533, "rewards/margins": -0.45029449462890625, "rewards/rejected": -2.012141466140747, "step": 7961 }, { "epoch": 1.76, "learning_rate": 9.110214507286167e-06, "logits/chosen": -1.0207703113555908, "logits/rejected": -0.7836090922355652, "logps/chosen": -128.32485961914062, "logps/rejected": -327.8897399902344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.4553146362304688, "rewards/margins": 11.828925132751465, "rewards/rejected": -10.373610496520996, "step": 7962 }, { "epoch": 1.76, "learning_rate": 9.109193645813001e-06, "logits/chosen": -1.6120609045028687, "logits/rejected": -1.544851303100586, "logps/chosen": -103.76289367675781, "logps/rejected": -173.3289794921875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.519146740436554, "rewards/margins": 5.798098564147949, "rewards/rejected": -6.3172454833984375, "step": 7963 }, { "epoch": 1.76, "learning_rate": 9.10817225631833e-06, "logits/chosen": -1.2293422222137451, "logits/rejected": -1.2237026691436768, "logps/chosen": -69.73490142822266, "logps/rejected": -101.81536865234375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 0.5192581415176392, "rewards/margins": 4.290688991546631, "rewards/rejected": -3.7714309692382812, "step": 7964 }, { "epoch": 1.76, "learning_rate": 9.107150338933403e-06, "logits/chosen": -1.3645588159561157, "logits/rejected": -1.2810487747192383, "logps/chosen": -186.7479705810547, "logps/rejected": -354.01763916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7111420035362244, "rewards/margins": 15.438215255737305, "rewards/rejected": -16.149356842041016, "step": 7965 }, { "epoch": 1.76, "learning_rate": 9.10612789378953e-06, "logits/chosen": -1.6657058000564575, "logits/rejected": -1.673207402229309, "logps/chosen": -82.56056213378906, "logps/rejected": -79.54421997070312, "loss": 0.0758, "rewards/accuracies": 1.0, "rewards/chosen": -2.607119083404541, "rewards/margins": 4.047396183013916, "rewards/rejected": -6.654515266418457, "step": 7966 }, { "epoch": 1.76, "learning_rate": 9.105104921018092e-06, "logits/chosen": -1.5413188934326172, "logits/rejected": -1.5112159252166748, "logps/chosen": -142.0792694091797, "logps/rejected": -269.0223083496094, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.435476779937744, "rewards/margins": 6.483694553375244, "rewards/rejected": -8.919171333312988, "step": 7967 }, { "epoch": 1.76, "learning_rate": 9.10408142075054e-06, "logits/chosen": -1.1921021938323975, "logits/rejected": -1.1537222862243652, "logps/chosen": -62.913787841796875, "logps/rejected": -111.63832092285156, "loss": 0.1035, "rewards/accuracies": 1.0, "rewards/chosen": -1.4667354822158813, "rewards/margins": 1.8521870374679565, "rewards/rejected": -3.318922519683838, "step": 7968 }, { "epoch": 1.76, "learning_rate": 9.103057393118392e-06, "logits/chosen": -1.5337581634521484, "logits/rejected": -1.5210591554641724, "logps/chosen": -191.70809936523438, "logps/rejected": -190.41818237304688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.273759603500366, "rewards/margins": 5.768082618713379, "rewards/rejected": -8.041842460632324, "step": 7969 }, { "epoch": 1.76, "learning_rate": 9.102032838253232e-06, "logits/chosen": -1.5089082717895508, "logits/rejected": -1.2827125787734985, "logps/chosen": -184.2449188232422, "logps/rejected": -358.87969970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.252972364425659, "rewards/margins": 15.845048904418945, "rewards/rejected": -13.592076301574707, "step": 7970 }, { "epoch": 1.76, "learning_rate": 9.101007756286713e-06, "logits/chosen": -1.6023353338241577, "logits/rejected": -1.6500661373138428, "logps/chosen": -172.61502075195312, "logps/rejected": -225.54164123535156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.2785323858261108, "rewards/margins": 7.721307754516602, "rewards/rejected": -6.442775249481201, "step": 7971 }, { "epoch": 1.76, "learning_rate": 9.099982147350558e-06, "logits/chosen": -1.7959328889846802, "logits/rejected": -1.7835676670074463, "logps/chosen": -104.21900939941406, "logps/rejected": -189.1978302001953, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.2354690581560135, "rewards/margins": 4.764114856719971, "rewards/rejected": -4.999583721160889, "step": 7972 }, { "epoch": 1.76, "learning_rate": 9.098956011576552e-06, "logits/chosen": -1.518444299697876, "logits/rejected": -1.5109103918075562, "logps/chosen": -94.76193237304688, "logps/rejected": -178.11410522460938, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.214228868484497, "rewards/margins": 3.953634023666382, "rewards/rejected": -5.167862892150879, "step": 7973 }, { "epoch": 1.76, "learning_rate": 9.097929349096551e-06, "logits/chosen": -1.34816575050354, "logits/rejected": -1.259373426437378, "logps/chosen": -171.28903198242188, "logps/rejected": -214.8749542236328, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.559741199016571, "rewards/margins": 8.905960083007812, "rewards/rejected": -9.46570110321045, "step": 7974 }, { "epoch": 1.77, "learning_rate": 9.09690216004248e-06, "logits/chosen": -1.0005401372909546, "logits/rejected": -1.0001184940338135, "logps/chosen": -181.24441528320312, "logps/rejected": -160.71701049804688, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.8609161376953125, "rewards/margins": 5.353159427642822, "rewards/rejected": -6.214075565338135, "step": 7975 }, { "epoch": 1.77, "learning_rate": 9.09587444454633e-06, "logits/chosen": -1.0456559658050537, "logits/rejected": -1.052363395690918, "logps/chosen": -247.63648986816406, "logps/rejected": -1366.89794921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.14061127603054047, "rewards/margins": 119.04420471191406, "rewards/rejected": -119.184814453125, "step": 7976 }, { "epoch": 1.77, "learning_rate": 9.094846202740162e-06, "logits/chosen": -1.570247769355774, "logits/rejected": -1.408457636833191, "logps/chosen": -95.19778442382812, "logps/rejected": -228.2745361328125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.8403892517089844, "rewards/margins": 7.642045021057129, "rewards/rejected": -9.482434272766113, "step": 7977 }, { "epoch": 1.77, "learning_rate": 9.0938174347561e-06, "logits/chosen": -1.3921008110046387, "logits/rejected": -1.3921008110046387, "logps/chosen": -202.3806915283203, "logps/rejected": -202.3806915283203, "loss": 0.3501, "rewards/accuracies": 0.0, "rewards/chosen": -16.08194923400879, "rewards/margins": 0.0, "rewards/rejected": -16.08194923400879, "step": 7978 }, { "epoch": 1.77, "learning_rate": 9.092788140726338e-06, "logits/chosen": -1.2094179391860962, "logits/rejected": -1.1608819961547852, "logps/chosen": -116.43562316894531, "logps/rejected": -92.93757629394531, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.6650619506835938, "rewards/margins": 4.787178039550781, "rewards/rejected": -6.452239990234375, "step": 7979 }, { "epoch": 1.77, "learning_rate": 9.091758320783139e-06, "logits/chosen": -1.127701997756958, "logits/rejected": -1.224116563796997, "logps/chosen": -228.47694396972656, "logps/rejected": -253.38180541992188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.716065883636475, "rewards/margins": 7.864206790924072, "rewards/rejected": -12.580272674560547, "step": 7980 }, { "epoch": 1.77, "learning_rate": 9.090727975058833e-06, "logits/chosen": -1.2822933197021484, "logits/rejected": -1.3572380542755127, "logps/chosen": -212.87774658203125, "logps/rejected": -165.4936981201172, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 2.6706695556640625, "rewards/margins": 8.759132385253906, "rewards/rejected": -6.088462829589844, "step": 7981 }, { "epoch": 1.77, "learning_rate": 9.089697103685815e-06, "logits/chosen": -1.1119407415390015, "logits/rejected": -1.1249635219573975, "logps/chosen": -206.8734588623047, "logps/rejected": -230.54275512695312, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.191026449203491, "rewards/margins": 7.007302284240723, "rewards/rejected": -9.198328971862793, "step": 7982 }, { "epoch": 1.77, "learning_rate": 9.08866570679655e-06, "logits/chosen": -1.554885983467102, "logits/rejected": -1.5436089038848877, "logps/chosen": -88.18621063232422, "logps/rejected": -183.31210327148438, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.2867836058139801, "rewards/margins": 6.096056938171387, "rewards/rejected": -6.382840633392334, "step": 7983 }, { "epoch": 1.77, "learning_rate": 9.087633784523574e-06, "logits/chosen": -1.6449395418167114, "logits/rejected": -2.5696158409118652, "logps/chosen": -103.06011962890625, "logps/rejected": -250.48092651367188, "loss": 0.0291, "rewards/accuracies": 1.0, "rewards/chosen": -2.2581512928009033, "rewards/margins": 16.702571868896484, "rewards/rejected": -18.960723876953125, "step": 7984 }, { "epoch": 1.77, "learning_rate": 9.08660133699948e-06, "logits/chosen": -1.0494815111160278, "logits/rejected": -1.0641793012619019, "logps/chosen": -224.15982055664062, "logps/rejected": -113.71467590332031, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.9814178943634033, "rewards/margins": 5.3415679931640625, "rewards/rejected": -8.322985649108887, "step": 7985 }, { "epoch": 1.77, "learning_rate": 9.085568364356939e-06, "logits/chosen": -1.5276563167572021, "logits/rejected": -1.5177018642425537, "logps/chosen": -161.51602172851562, "logps/rejected": -333.9698486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.805365085601807, "rewards/margins": 12.815652847290039, "rewards/rejected": -17.621017456054688, "step": 7986 }, { "epoch": 1.77, "learning_rate": 9.084534866728683e-06, "logits/chosen": -2.24760103225708, "logits/rejected": -2.4366581439971924, "logps/chosen": -251.724365234375, "logps/rejected": -301.2553405761719, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.4099029302597046, "rewards/margins": 7.825409412384033, "rewards/rejected": -9.235312461853027, "step": 7987 }, { "epoch": 1.77, "learning_rate": 9.083500844247517e-06, "logits/chosen": -1.3778001070022583, "logits/rejected": -1.3392332792282104, "logps/chosen": -123.60386657714844, "logps/rejected": -128.8128662109375, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -1.968403697013855, "rewards/margins": 3.2852425575256348, "rewards/rejected": -5.253646373748779, "step": 7988 }, { "epoch": 1.77, "learning_rate": 9.082466297046308e-06, "logits/chosen": -1.3596017360687256, "logits/rejected": -1.5660361051559448, "logps/chosen": -204.76019287109375, "logps/rejected": -93.73526000976562, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.266793817281723, "rewards/margins": 7.179125785827637, "rewards/rejected": -6.912332057952881, "step": 7989 }, { "epoch": 1.77, "learning_rate": 9.081431225257994e-06, "logits/chosen": -1.020671010017395, "logits/rejected": -1.021238088607788, "logps/chosen": -160.49005126953125, "logps/rejected": -190.56124877929688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8942169547080994, "rewards/margins": 11.1754732131958, "rewards/rejected": -12.069689750671387, "step": 7990 }, { "epoch": 1.77, "learning_rate": 9.08039562901558e-06, "logits/chosen": -0.9471273422241211, "logits/rejected": -0.9471273422241211, "logps/chosen": -102.21528625488281, "logps/rejected": -102.21528625488281, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -0.5396362543106079, "rewards/margins": 0.0, "rewards/rejected": -0.5396362543106079, "step": 7991 }, { "epoch": 1.77, "learning_rate": 9.079359508452138e-06, "logits/chosen": -1.235623836517334, "logits/rejected": -1.1164484024047852, "logps/chosen": -132.8531951904297, "logps/rejected": -158.71969604492188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.9016357660293579, "rewards/margins": 6.38921594619751, "rewards/rejected": -5.487580299377441, "step": 7992 }, { "epoch": 1.77, "learning_rate": 9.078322863700803e-06, "logits/chosen": -1.5001236200332642, "logits/rejected": -1.535629153251648, "logps/chosen": -152.49636840820312, "logps/rejected": -164.3872833251953, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.4861420392990112, "rewards/margins": 8.088622093200684, "rewards/rejected": -6.602480411529541, "step": 7993 }, { "epoch": 1.77, "learning_rate": 9.077285694894786e-06, "logits/chosen": -1.2721881866455078, "logits/rejected": -1.286876916885376, "logps/chosen": -78.30384826660156, "logps/rejected": -165.4462890625, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": -0.06270523369312286, "rewards/margins": 2.6135551929473877, "rewards/rejected": -2.676260471343994, "step": 7994 }, { "epoch": 1.77, "learning_rate": 9.076248002167357e-06, "logits/chosen": -1.0252114534378052, "logits/rejected": -1.0212346315383911, "logps/chosen": -137.54342651367188, "logps/rejected": -247.04718017578125, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": 2.543814182281494, "rewards/margins": 9.817630767822266, "rewards/rejected": -7.273816108703613, "step": 7995 }, { "epoch": 1.77, "learning_rate": 9.07520978565186e-06, "logits/chosen": -1.6057631969451904, "logits/rejected": -1.59197199344635, "logps/chosen": -91.59878540039062, "logps/rejected": -152.13072204589844, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": 0.479135125875473, "rewards/margins": 4.443795680999756, "rewards/rejected": -3.96466064453125, "step": 7996 }, { "epoch": 1.77, "learning_rate": 9.074171045481701e-06, "logits/chosen": -1.4825903177261353, "logits/rejected": -1.3964903354644775, "logps/chosen": -108.21940612792969, "logps/rejected": -235.60397338867188, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": 0.4989356994628906, "rewards/margins": 3.330883026123047, "rewards/rejected": -2.8319473266601562, "step": 7997 }, { "epoch": 1.77, "learning_rate": 9.073131781790358e-06, "logits/chosen": -1.38621187210083, "logits/rejected": -1.1996707916259766, "logps/chosen": -85.5126724243164, "logps/rejected": -300.04547119140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.8295120596885681, "rewards/margins": 8.814046859741211, "rewards/rejected": -7.984535217285156, "step": 7998 }, { "epoch": 1.77, "learning_rate": 9.072091994711372e-06, "logits/chosen": -1.477620244026184, "logits/rejected": -1.474324107170105, "logps/chosen": -184.82839965820312, "logps/rejected": -143.43148803710938, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 3.060987949371338, "rewards/margins": 8.444637298583984, "rewards/rejected": -5.383648872375488, "step": 7999 }, { "epoch": 1.77, "learning_rate": 9.071051684378352e-06, "logits/chosen": -0.9512580633163452, "logits/rejected": -0.9512580633163452, "logps/chosen": -113.02711486816406, "logps/rejected": -113.02711486816406, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.5887603759765625, "rewards/margins": 0.0, "rewards/rejected": -5.5887603759765625, "step": 8000 }, { "epoch": 1.77, "learning_rate": 9.07001085092498e-06, "logits/chosen": -1.3739964962005615, "logits/rejected": -1.5299346446990967, "logps/chosen": -146.27894592285156, "logps/rejected": -95.1804428100586, "loss": 0.1276, "rewards/accuracies": 1.0, "rewards/chosen": -0.9051254391670227, "rewards/margins": 1.2421715259552002, "rewards/rejected": -2.147296905517578, "step": 8001 }, { "epoch": 1.77, "learning_rate": 9.068969494484996e-06, "logits/chosen": -1.1169136762619019, "logits/rejected": -0.9035702347755432, "logps/chosen": -140.90521240234375, "logps/rejected": -306.2539367675781, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.19787751138210297, "rewards/margins": 9.412625312805176, "rewards/rejected": -9.214747428894043, "step": 8002 }, { "epoch": 1.77, "learning_rate": 9.067927615192214e-06, "logits/chosen": -1.3210139274597168, "logits/rejected": -0.6362788081169128, "logps/chosen": -116.99592590332031, "logps/rejected": -798.92041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.42966920137405396, "rewards/margins": 61.33695983886719, "rewards/rejected": -61.76662826538086, "step": 8003 }, { "epoch": 1.77, "learning_rate": 9.066885213180512e-06, "logits/chosen": -1.8143056631088257, "logits/rejected": -1.8430185317993164, "logps/chosen": -128.50592041015625, "logps/rejected": -98.80258178710938, "loss": 0.109, "rewards/accuracies": 1.0, "rewards/chosen": -2.7976090908050537, "rewards/margins": 1.4189374446868896, "rewards/rejected": -4.216546535491943, "step": 8004 }, { "epoch": 1.77, "learning_rate": 9.065842288583838e-06, "logits/chosen": -1.2444097995758057, "logits/rejected": -1.2104240655899048, "logps/chosen": -100.43933868408203, "logps/rejected": -149.73965454101562, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -4.166818141937256, "rewards/margins": 3.266404151916504, "rewards/rejected": -7.43322229385376, "step": 8005 }, { "epoch": 1.77, "learning_rate": 9.064798841536203e-06, "logits/chosen": -1.0726957321166992, "logits/rejected": -1.0643796920776367, "logps/chosen": -81.12654113769531, "logps/rejected": -137.20828247070312, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": -4.272682189941406, "rewards/margins": 6.521821975708008, "rewards/rejected": -10.794504165649414, "step": 8006 }, { "epoch": 1.77, "learning_rate": 9.063754872171686e-06, "logits/chosen": -1.6031063795089722, "logits/rejected": -1.8574681282043457, "logps/chosen": -245.39015197753906, "logps/rejected": -199.6053466796875, "loss": 0.028, "rewards/accuracies": 1.0, "rewards/chosen": 1.2418044805526733, "rewards/margins": 16.62911033630371, "rewards/rejected": -15.387306213378906, "step": 8007 }, { "epoch": 1.77, "learning_rate": 9.062710380624439e-06, "logits/chosen": -1.5531600713729858, "logits/rejected": -1.6457799673080444, "logps/chosen": -131.09388732910156, "logps/rejected": -198.36428833007812, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -3.0335731506347656, "rewards/margins": 13.050189971923828, "rewards/rejected": -16.083763122558594, "step": 8008 }, { "epoch": 1.77, "learning_rate": 9.061665367028676e-06, "logits/chosen": -1.5246310234069824, "logits/rejected": -1.4994463920593262, "logps/chosen": -101.25739288330078, "logps/rejected": -193.82040405273438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.779341220855713, "rewards/margins": 9.453523635864258, "rewards/rejected": -13.232864379882812, "step": 8009 }, { "epoch": 1.77, "learning_rate": 9.060619831518676e-06, "logits/chosen": -1.3304176330566406, "logits/rejected": -0.8999739289283752, "logps/chosen": -157.598388671875, "logps/rejected": -753.193603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6726624965667725, "rewards/margins": 32.31570053100586, "rewards/rejected": -34.98836135864258, "step": 8010 }, { "epoch": 1.77, "learning_rate": 9.05957377422879e-06, "logits/chosen": -1.4957488775253296, "logits/rejected": -1.5404584407806396, "logps/chosen": -270.3046875, "logps/rejected": -192.62899780273438, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.998864769935608, "rewards/margins": 4.854715347290039, "rewards/rejected": -6.853579998016357, "step": 8011 }, { "epoch": 1.77, "learning_rate": 9.058527195293431e-06, "logits/chosen": -1.3292611837387085, "logits/rejected": -1.307493805885315, "logps/chosen": -104.15428924560547, "logps/rejected": -140.88722229003906, "loss": 0.4474, "rewards/accuracies": 0.0, "rewards/chosen": -3.268317461013794, "rewards/margins": -0.30175161361694336, "rewards/rejected": -2.9665658473968506, "step": 8012 }, { "epoch": 1.77, "learning_rate": 9.057480094847085e-06, "logits/chosen": -1.2311348915100098, "logits/rejected": -1.1643092632293701, "logps/chosen": -197.40545654296875, "logps/rejected": -475.80218505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.47296142578125, "rewards/margins": 27.204303741455078, "rewards/rejected": -26.731342315673828, "step": 8013 }, { "epoch": 1.77, "learning_rate": 9.056432473024302e-06, "logits/chosen": -1.3151696920394897, "logits/rejected": -1.2678266763687134, "logps/chosen": -86.82991027832031, "logps/rejected": -123.65769958496094, "loss": 0.0505, "rewards/accuracies": 1.0, "rewards/chosen": -2.0542068481445312, "rewards/margins": 2.69801664352417, "rewards/rejected": -4.752223491668701, "step": 8014 }, { "epoch": 1.77, "learning_rate": 9.055384329959695e-06, "logits/chosen": -1.5387134552001953, "logits/rejected": -1.5338771343231201, "logps/chosen": -106.24516296386719, "logps/rejected": -217.1892547607422, "loss": 0.6992, "rewards/accuracies": 1.0, "rewards/chosen": -1.1266998052597046, "rewards/margins": 6.020977973937988, "rewards/rejected": -7.147677898406982, "step": 8015 }, { "epoch": 1.77, "learning_rate": 9.054335665787952e-06, "logits/chosen": -1.1702390909194946, "logits/rejected": -1.0478787422180176, "logps/chosen": -80.38533020019531, "logps/rejected": -202.19552612304688, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/chosen": 0.8640891909599304, "rewards/margins": 2.90995717048645, "rewards/rejected": -2.045867919921875, "step": 8016 }, { "epoch": 1.77, "learning_rate": 9.053286480643822e-06, "logits/chosen": -1.309044361114502, "logits/rejected": -1.2059062719345093, "logps/chosen": -202.35064697265625, "logps/rejected": -294.9722900390625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 3.0626296997070312, "rewards/margins": 6.991690158843994, "rewards/rejected": -3.929060459136963, "step": 8017 }, { "epoch": 1.77, "learning_rate": 9.052236774662123e-06, "logits/chosen": -1.5658644437789917, "logits/rejected": -1.628390908241272, "logps/chosen": -185.2760772705078, "logps/rejected": -257.5779724121094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.9333709478378296, "rewards/margins": 14.429049491882324, "rewards/rejected": -12.495678901672363, "step": 8018 }, { "epoch": 1.77, "learning_rate": 9.051186547977739e-06, "logits/chosen": -1.5357284545898438, "logits/rejected": -1.5099977254867554, "logps/chosen": -121.44807434082031, "logps/rejected": -170.9271240234375, "loss": 0.3095, "rewards/accuracies": 1.0, "rewards/chosen": 0.04085540771484375, "rewards/margins": 6.865731239318848, "rewards/rejected": -6.824875831604004, "step": 8019 }, { "epoch": 1.78, "learning_rate": 9.050135800725623e-06, "logits/chosen": -1.304864525794983, "logits/rejected": -1.2159794569015503, "logps/chosen": -164.01197814941406, "logps/rejected": -346.8011779785156, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -1.5459518432617188, "rewards/margins": 13.34672737121582, "rewards/rejected": -14.892679214477539, "step": 8020 }, { "epoch": 1.78, "learning_rate": 9.049084533040794e-06, "logits/chosen": -1.5431040525436401, "logits/rejected": -1.526437520980835, "logps/chosen": -92.31121826171875, "logps/rejected": -203.58612060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.086801290512085, "rewards/margins": 10.159249305725098, "rewards/rejected": -12.246050834655762, "step": 8021 }, { "epoch": 1.78, "learning_rate": 9.048032745058335e-06, "logits/chosen": -1.758842945098877, "logits/rejected": -1.699352741241455, "logps/chosen": -94.5413818359375, "logps/rejected": -136.76414489746094, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -0.17421112954616547, "rewards/margins": 3.673130989074707, "rewards/rejected": -3.847342014312744, "step": 8022 }, { "epoch": 1.78, "learning_rate": 9.0469804369134e-06, "logits/chosen": -1.6515326499938965, "logits/rejected": -1.5669002532958984, "logps/chosen": -106.38050079345703, "logps/rejected": -247.9490966796875, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": 0.6588577628135681, "rewards/margins": 3.7231552600860596, "rewards/rejected": -3.0642974376678467, "step": 8023 }, { "epoch": 1.78, "learning_rate": 9.045927608741207e-06, "logits/chosen": -1.1675410270690918, "logits/rejected": -1.1882647275924683, "logps/chosen": -259.8627624511719, "logps/rejected": -203.78396606445312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.6981079578399658, "rewards/margins": 8.045720100402832, "rewards/rejected": -6.347611904144287, "step": 8024 }, { "epoch": 1.78, "learning_rate": 9.044874260677043e-06, "logits/chosen": -1.3561729192733765, "logits/rejected": -1.1938844919204712, "logps/chosen": -105.25414276123047, "logps/rejected": -245.23854064941406, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9677215814590454, "rewards/margins": 5.601274490356445, "rewards/rejected": -6.568995952606201, "step": 8025 }, { "epoch": 1.78, "learning_rate": 9.043820392856259e-06, "logits/chosen": -1.3260993957519531, "logits/rejected": -1.287480354309082, "logps/chosen": -151.2312469482422, "logps/rejected": -229.96482849121094, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": 0.8075149655342102, "rewards/margins": 4.3441314697265625, "rewards/rejected": -3.536616563796997, "step": 8026 }, { "epoch": 1.78, "learning_rate": 9.042766005414278e-06, "logits/chosen": -1.2680480480194092, "logits/rejected": -1.2713364362716675, "logps/chosen": -78.08218383789062, "logps/rejected": -89.38946533203125, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -1.8415504693984985, "rewards/margins": 3.420262336730957, "rewards/rejected": -5.261812686920166, "step": 8027 }, { "epoch": 1.78, "learning_rate": 9.041711098486583e-06, "logits/chosen": -1.180578351020813, "logits/rejected": -1.285299301147461, "logps/chosen": -225.26609802246094, "logps/rejected": -257.71917724609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8987991213798523, "rewards/margins": 7.71588134765625, "rewards/rejected": -8.614680290222168, "step": 8028 }, { "epoch": 1.78, "learning_rate": 9.040655672208727e-06, "logits/chosen": -1.4876290559768677, "logits/rejected": -1.4535506963729858, "logps/chosen": -158.34146118164062, "logps/rejected": -234.36077880859375, "loss": 0.1061, "rewards/accuracies": 1.0, "rewards/chosen": -4.106118202209473, "rewards/margins": 1.4599919319152832, "rewards/rejected": -5.566110134124756, "step": 8029 }, { "epoch": 1.78, "learning_rate": 9.03959972671633e-06, "logits/chosen": -1.183665156364441, "logits/rejected": -1.1847628355026245, "logps/chosen": -209.6068115234375, "logps/rejected": -269.7208557128906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.143675327301025, "rewards/margins": 8.177967071533203, "rewards/rejected": -13.321642875671387, "step": 8030 }, { "epoch": 1.78, "learning_rate": 9.03854326214508e-06, "logits/chosen": -1.3695179224014282, "logits/rejected": -0.9734684824943542, "logps/chosen": -164.7559051513672, "logps/rejected": -1050.51953125, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": -6.648838043212891, "rewards/margins": 90.02946472167969, "rewards/rejected": -96.67830657958984, "step": 8031 }, { "epoch": 1.78, "learning_rate": 9.037486278630729e-06, "logits/chosen": -1.324993371963501, "logits/rejected": -1.3124887943267822, "logps/chosen": -196.70167541503906, "logps/rejected": -237.5045166015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.2842956483364105, "rewards/margins": 6.164189338684082, "rewards/rejected": -6.448484897613525, "step": 8032 }, { "epoch": 1.78, "learning_rate": 9.036428776309096e-06, "logits/chosen": -1.2149922847747803, "logits/rejected": -1.1973541975021362, "logps/chosen": -134.8988037109375, "logps/rejected": -257.6724548339844, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 0.30778810381889343, "rewards/margins": 17.786706924438477, "rewards/rejected": -17.478918075561523, "step": 8033 }, { "epoch": 1.78, "learning_rate": 9.03537075531607e-06, "logits/chosen": -1.1718765497207642, "logits/rejected": -1.2301299571990967, "logps/chosen": -132.61328125, "logps/rejected": -188.69725036621094, "loss": 0.3882, "rewards/accuracies": 1.0, "rewards/chosen": -2.808502197265625, "rewards/margins": 2.4450011253356934, "rewards/rejected": -5.253503322601318, "step": 8034 }, { "epoch": 1.78, "learning_rate": 9.034312215787603e-06, "logits/chosen": -1.4271687269210815, "logits/rejected": -1.5520986318588257, "logps/chosen": -223.48756408691406, "logps/rejected": -177.59661865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.0988998413085938, "rewards/margins": 14.686662673950195, "rewards/rejected": -13.587762832641602, "step": 8035 }, { "epoch": 1.78, "learning_rate": 9.033253157859715e-06, "logits/chosen": -1.5363343954086304, "logits/rejected": -1.5144766569137573, "logps/chosen": -144.3829345703125, "logps/rejected": -152.7359619140625, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -4.596260070800781, "rewards/margins": 3.73599910736084, "rewards/rejected": -8.332259178161621, "step": 8036 }, { "epoch": 1.78, "learning_rate": 9.03219358166849e-06, "logits/chosen": -1.2647384405136108, "logits/rejected": -1.2263041734695435, "logps/chosen": -139.157958984375, "logps/rejected": -300.42523193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0433900356292725, "rewards/margins": 14.568426132202148, "rewards/rejected": -16.61181640625, "step": 8037 }, { "epoch": 1.78, "learning_rate": 9.031133487350084e-06, "logits/chosen": -1.6373505592346191, "logits/rejected": -1.6373505592346191, "logps/chosen": -208.50088500976562, "logps/rejected": -208.50088500976562, "loss": 0.3486, "rewards/accuracies": 0.0, "rewards/chosen": -9.754474639892578, "rewards/margins": 0.0, "rewards/rejected": -9.754474639892578, "step": 8038 }, { "epoch": 1.78, "learning_rate": 9.030072875040714e-06, "logits/chosen": -1.7385835647583008, "logits/rejected": -1.7764064073562622, "logps/chosen": -115.80795288085938, "logps/rejected": -132.074462890625, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.8228439092636108, "rewards/margins": 4.085017204284668, "rewards/rejected": -5.907861232757568, "step": 8039 }, { "epoch": 1.78, "learning_rate": 9.029011744876669e-06, "logits/chosen": -1.3837878704071045, "logits/rejected": -1.360300064086914, "logps/chosen": -199.0472412109375, "logps/rejected": -167.98373413085938, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": 1.1094086170196533, "rewards/margins": 7.666094779968262, "rewards/rejected": -6.5566864013671875, "step": 8040 }, { "epoch": 1.78, "learning_rate": 9.027950096994299e-06, "logits/chosen": -1.239404559135437, "logits/rejected": -1.1805992126464844, "logps/chosen": -104.52696228027344, "logps/rejected": -185.02569580078125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.3024581968784332, "rewards/margins": 6.895315647125244, "rewards/rejected": -6.592857360839844, "step": 8041 }, { "epoch": 1.78, "learning_rate": 9.026887931530026e-06, "logits/chosen": -1.4440271854400635, "logits/rejected": -0.8520375490188599, "logps/chosen": -230.5380401611328, "logps/rejected": -475.2635803222656, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.7642928957939148, "rewards/margins": 24.670265197753906, "rewards/rejected": -23.90597152709961, "step": 8042 }, { "epoch": 1.78, "learning_rate": 9.025825248620332e-06, "logits/chosen": -1.0776231288909912, "logits/rejected": -1.1339055299758911, "logps/chosen": -114.68547821044922, "logps/rejected": -105.74456024169922, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -3.411640167236328, "rewards/margins": 4.820265769958496, "rewards/rejected": -8.231905937194824, "step": 8043 }, { "epoch": 1.78, "learning_rate": 9.024762048401775e-06, "logits/chosen": -1.8481258153915405, "logits/rejected": -1.895286202430725, "logps/chosen": -80.18928527832031, "logps/rejected": -118.06550598144531, "loss": 0.0471, "rewards/accuracies": 1.0, "rewards/chosen": -1.6912037134170532, "rewards/margins": 2.715573787689209, "rewards/rejected": -4.406777381896973, "step": 8044 }, { "epoch": 1.78, "learning_rate": 9.023698331010966e-06, "logits/chosen": -1.8032095432281494, "logits/rejected": -1.771366834640503, "logps/chosen": -100.53495025634766, "logps/rejected": -224.8029327392578, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -0.08258743584156036, "rewards/margins": 9.02699089050293, "rewards/rejected": -9.109578132629395, "step": 8045 }, { "epoch": 1.78, "learning_rate": 9.022634096584597e-06, "logits/chosen": -1.1292955875396729, "logits/rejected": -1.2111889123916626, "logps/chosen": -124.04424285888672, "logps/rejected": -97.0953140258789, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": 1.9991401433944702, "rewards/margins": 4.701114654541016, "rewards/rejected": -2.701974630355835, "step": 8046 }, { "epoch": 1.78, "learning_rate": 9.021569345259415e-06, "logits/chosen": -1.5790541172027588, "logits/rejected": -1.0703190565109253, "logps/chosen": -170.71078491210938, "logps/rejected": -502.0425109863281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.7987090945243835, "rewards/margins": 21.900779724121094, "rewards/rejected": -21.102069854736328, "step": 8047 }, { "epoch": 1.78, "learning_rate": 9.02050407717224e-06, "logits/chosen": -1.1431288719177246, "logits/rejected": -1.1633987426757812, "logps/chosen": -141.12420654296875, "logps/rejected": -172.98435974121094, "loss": 0.0367, "rewards/accuracies": 1.0, "rewards/chosen": -3.879075765609741, "rewards/margins": 2.576312303543091, "rewards/rejected": -6.455388069152832, "step": 8048 }, { "epoch": 1.78, "learning_rate": 9.019438292459958e-06, "logits/chosen": -1.6839714050292969, "logits/rejected": -1.5723191499710083, "logps/chosen": -119.61982727050781, "logps/rejected": -174.57225036621094, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -2.5364654064178467, "rewards/margins": 9.12792682647705, "rewards/rejected": -11.664392471313477, "step": 8049 }, { "epoch": 1.78, "learning_rate": 9.018371991259516e-06, "logits/chosen": -1.0809845924377441, "logits/rejected": -0.4446260631084442, "logps/chosen": -98.61795806884766, "logps/rejected": -524.9471435546875, "loss": 0.1961, "rewards/accuracies": 1.0, "rewards/chosen": -0.8558052182197571, "rewards/margins": 41.974937438964844, "rewards/rejected": -42.83074188232422, "step": 8050 }, { "epoch": 1.78, "learning_rate": 9.017305173707932e-06, "logits/chosen": -1.4424697160720825, "logits/rejected": -1.4947563409805298, "logps/chosen": -71.87028503417969, "logps/rejected": -71.16417694091797, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -1.779160737991333, "rewards/margins": 3.0422427654266357, "rewards/rejected": -4.821403503417969, "step": 8051 }, { "epoch": 1.78, "learning_rate": 9.016237839942294e-06, "logits/chosen": -1.346968412399292, "logits/rejected": -1.3081609010696411, "logps/chosen": -113.27625274658203, "logps/rejected": -147.22128295898438, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.7668922543525696, "rewards/margins": 4.844383239746094, "rewards/rejected": -5.611275672912598, "step": 8052 }, { "epoch": 1.78, "learning_rate": 9.015169990099746e-06, "logits/chosen": -1.0882747173309326, "logits/rejected": -1.0882747173309326, "logps/chosen": -217.41737365722656, "logps/rejected": -217.41737365722656, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -9.674091339111328, "rewards/margins": 0.0, "rewards/rejected": -9.674091339111328, "step": 8053 }, { "epoch": 1.78, "learning_rate": 9.014101624317506e-06, "logits/chosen": -1.3207414150238037, "logits/rejected": -1.2815990447998047, "logps/chosen": -95.41053771972656, "logps/rejected": -122.6635971069336, "loss": 0.0627, "rewards/accuracies": 1.0, "rewards/chosen": -0.44158935546875, "rewards/margins": 4.994706153869629, "rewards/rejected": -5.436295509338379, "step": 8054 }, { "epoch": 1.78, "learning_rate": 9.013032742732858e-06, "logits/chosen": -1.5008679628372192, "logits/rejected": -1.477051019668579, "logps/chosen": -140.88833618164062, "logps/rejected": -163.7459716796875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 1.573725938796997, "rewards/margins": 9.781566619873047, "rewards/rejected": -8.207840919494629, "step": 8055 }, { "epoch": 1.78, "learning_rate": 9.01196334548315e-06, "logits/chosen": -1.4208036661148071, "logits/rejected": -1.4208036661148071, "logps/chosen": -70.2724838256836, "logps/rejected": -70.2724838256836, "loss": 0.3472, "rewards/accuracies": 0.0, "rewards/chosen": -1.4274734258651733, "rewards/margins": 0.0, "rewards/rejected": -1.4274734258651733, "step": 8056 }, { "epoch": 1.78, "learning_rate": 9.010893432705796e-06, "logits/chosen": -1.327524185180664, "logits/rejected": -1.2602022886276245, "logps/chosen": -216.0154266357422, "logps/rejected": -370.7583312988281, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.35028383135795593, "rewards/margins": 15.458063125610352, "rewards/rejected": -15.80834674835205, "step": 8057 }, { "epoch": 1.78, "learning_rate": 9.009823004538278e-06, "logits/chosen": -1.4727524518966675, "logits/rejected": -1.4202100038528442, "logps/chosen": -95.37800598144531, "logps/rejected": -162.788818359375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.3264435529708862, "rewards/margins": 5.499567985534668, "rewards/rejected": -6.826011657714844, "step": 8058 }, { "epoch": 1.78, "learning_rate": 9.008752061118143e-06, "logits/chosen": -1.3369815349578857, "logits/rejected": -1.414181113243103, "logps/chosen": -283.3472900390625, "logps/rejected": -250.03347778320312, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -5.995110988616943, "rewards/margins": 5.257452487945557, "rewards/rejected": -11.2525634765625, "step": 8059 }, { "epoch": 1.78, "learning_rate": 9.007680602583005e-06, "logits/chosen": -1.363089680671692, "logits/rejected": -1.4620672464370728, "logps/chosen": -169.39747619628906, "logps/rejected": -124.67842102050781, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 2.3660476207733154, "rewards/margins": 7.628175735473633, "rewards/rejected": -5.262127876281738, "step": 8060 }, { "epoch": 1.78, "learning_rate": 9.006608629070543e-06, "logits/chosen": -1.5789391994476318, "logits/rejected": -1.4414504766464233, "logps/chosen": -133.01425170898438, "logps/rejected": -201.93807983398438, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.7782974243164062, "rewards/margins": 7.027000427246094, "rewards/rejected": -8.8052978515625, "step": 8061 }, { "epoch": 1.78, "learning_rate": 9.005536140718506e-06, "logits/chosen": -1.427714228630066, "logits/rejected": -1.3895400762557983, "logps/chosen": -137.3341064453125, "logps/rejected": -233.19967651367188, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -4.405153751373291, "rewards/margins": 5.257996082305908, "rewards/rejected": -9.6631498336792, "step": 8062 }, { "epoch": 1.78, "learning_rate": 9.004463137664701e-06, "logits/chosen": -1.221239447593689, "logits/rejected": -1.306857705116272, "logps/chosen": -197.58453369140625, "logps/rejected": -164.40780639648438, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -3.0585877895355225, "rewards/margins": 2.6152169704437256, "rewards/rejected": -5.673804759979248, "step": 8063 }, { "epoch": 1.78, "learning_rate": 9.003389620047012e-06, "logits/chosen": -1.318140983581543, "logits/rejected": -1.318140983581543, "logps/chosen": -175.0411376953125, "logps/rejected": -175.0411376953125, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -6.720746040344238, "rewards/margins": 0.0, "rewards/rejected": -6.720746040344238, "step": 8064 }, { "epoch": 1.79, "learning_rate": 9.002315588003378e-06, "logits/chosen": -1.1286150217056274, "logits/rejected": -1.119765281677246, "logps/chosen": -76.57820892333984, "logps/rejected": -151.00225830078125, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": 0.9553291201591492, "rewards/margins": 3.5555977821350098, "rewards/rejected": -2.600268602371216, "step": 8065 }, { "epoch": 1.79, "learning_rate": 9.001241041671814e-06, "logits/chosen": -1.011742115020752, "logits/rejected": -1.043323040008545, "logps/chosen": -229.33474731445312, "logps/rejected": -257.7342529296875, "loss": 0.1564, "rewards/accuracies": 1.0, "rewards/chosen": -9.829054832458496, "rewards/margins": 6.928839683532715, "rewards/rejected": -16.75789451599121, "step": 8066 }, { "epoch": 1.79, "learning_rate": 9.000165981190396e-06, "logits/chosen": -1.345800518989563, "logits/rejected": -1.1788179874420166, "logps/chosen": -200.23008728027344, "logps/rejected": -328.2105407714844, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": 1.6352280378341675, "rewards/margins": 8.203532218933105, "rewards/rejected": -6.568304538726807, "step": 8067 }, { "epoch": 1.79, "learning_rate": 8.999090406697263e-06, "logits/chosen": -1.3437472581863403, "logits/rejected": -1.343671441078186, "logps/chosen": -148.81399536132812, "logps/rejected": -152.27821350097656, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.5127517580986023, "rewards/margins": 7.180042743682861, "rewards/rejected": -6.667291164398193, "step": 8068 }, { "epoch": 1.79, "learning_rate": 8.998014318330627e-06, "logits/chosen": -1.3187799453735352, "logits/rejected": -1.2961852550506592, "logps/chosen": -152.8440399169922, "logps/rejected": -190.75064086914062, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.7534775733947754, "rewards/margins": 4.726644992828369, "rewards/rejected": -7.4801225662231445, "step": 8069 }, { "epoch": 1.79, "learning_rate": 8.996937716228763e-06, "logits/chosen": -1.6062984466552734, "logits/rejected": -1.5642997026443481, "logps/chosen": -147.52182006835938, "logps/rejected": -212.9211883544922, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.204986572265625, "rewards/margins": 7.204073905944824, "rewards/rejected": -9.40906047821045, "step": 8070 }, { "epoch": 1.79, "learning_rate": 8.99586060053001e-06, "logits/chosen": -1.446772575378418, "logits/rejected": -1.4565651416778564, "logps/chosen": -101.41039276123047, "logps/rejected": -136.73239135742188, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.08479537814855576, "rewards/margins": 4.971013069152832, "rewards/rejected": -5.0558085441589355, "step": 8071 }, { "epoch": 1.79, "learning_rate": 8.994782971372776e-06, "logits/chosen": -1.5052834749221802, "logits/rejected": -1.3665728569030762, "logps/chosen": -192.7064208984375, "logps/rejected": -331.20416259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.015402317047119, "rewards/margins": 11.800580978393555, "rewards/rejected": -9.785179138183594, "step": 8072 }, { "epoch": 1.79, "learning_rate": 8.993704828895533e-06, "logits/chosen": -1.6813750267028809, "logits/rejected": -1.7517104148864746, "logps/chosen": -157.50738525390625, "logps/rejected": -123.7620849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.074426293373108, "rewards/margins": 9.527920722961426, "rewards/rejected": -8.45349407196045, "step": 8073 }, { "epoch": 1.79, "learning_rate": 8.99262617323682e-06, "logits/chosen": -1.342305064201355, "logits/rejected": -1.2791413068771362, "logps/chosen": -98.27536010742188, "logps/rejected": -113.22406005859375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.0796966552734375, "rewards/margins": 6.30091667175293, "rewards/rejected": -8.380613327026367, "step": 8074 }, { "epoch": 1.79, "learning_rate": 8.991547004535244e-06, "logits/chosen": -1.5422687530517578, "logits/rejected": -1.4234259128570557, "logps/chosen": -166.69924926757812, "logps/rejected": -360.8353271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6269699335098267, "rewards/margins": 10.84430980682373, "rewards/rejected": -12.471280097961426, "step": 8075 }, { "epoch": 1.79, "learning_rate": 8.99046732292947e-06, "logits/chosen": -1.0442394018173218, "logits/rejected": -1.051263689994812, "logps/chosen": -68.63790893554688, "logps/rejected": -122.69346618652344, "loss": 0.064, "rewards/accuracies": 1.0, "rewards/chosen": -1.8226569890975952, "rewards/margins": 1.9917291402816772, "rewards/rejected": -3.8143861293792725, "step": 8076 }, { "epoch": 1.79, "learning_rate": 8.98938712855824e-06, "logits/chosen": -1.1225043535232544, "logits/rejected": -1.1225043535232544, "logps/chosen": -134.76315307617188, "logps/rejected": -134.76315307617188, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -6.0352396965026855, "rewards/margins": 0.0, "rewards/rejected": -6.0352396965026855, "step": 8077 }, { "epoch": 1.79, "learning_rate": 8.988306421560354e-06, "logits/chosen": -1.324711799621582, "logits/rejected": -1.2660493850708008, "logps/chosen": -141.2059326171875, "logps/rejected": -258.58038330078125, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -3.4643142223358154, "rewards/margins": 3.953678846359253, "rewards/rejected": -7.417993068695068, "step": 8078 }, { "epoch": 1.79, "learning_rate": 8.98722520207468e-06, "logits/chosen": -1.511606216430664, "logits/rejected": -1.5494905710220337, "logps/chosen": -184.8656005859375, "logps/rejected": -182.7744903564453, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.096994161605835, "rewards/margins": 9.532511711120605, "rewards/rejected": -7.435517311096191, "step": 8079 }, { "epoch": 1.79, "learning_rate": 8.986143470240152e-06, "logits/chosen": -1.3792020082473755, "logits/rejected": -1.3285081386566162, "logps/chosen": -202.5355682373047, "logps/rejected": -234.0063018798828, "loss": 0.1077, "rewards/accuracies": 1.0, "rewards/chosen": 2.3001861572265625, "rewards/margins": 1.4258224964141846, "rewards/rejected": 0.8743637204170227, "step": 8080 }, { "epoch": 1.79, "learning_rate": 8.98506122619577e-06, "logits/chosen": -1.6097458600997925, "logits/rejected": -1.7585922479629517, "logps/chosen": -167.26565551757812, "logps/rejected": -96.23727416992188, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -3.9011733531951904, "rewards/margins": 3.3195273876190186, "rewards/rejected": -7.220700740814209, "step": 8081 }, { "epoch": 1.79, "learning_rate": 8.983978470080603e-06, "logits/chosen": -1.7379333972930908, "logits/rejected": -1.787754774093628, "logps/chosen": -119.29644775390625, "logps/rejected": -145.41708374023438, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.853410482406616, "rewards/margins": 6.423795700073242, "rewards/rejected": -10.277206420898438, "step": 8082 }, { "epoch": 1.79, "learning_rate": 8.982895202033776e-06, "logits/chosen": -1.0679988861083984, "logits/rejected": -0.6302286386489868, "logps/chosen": -189.46975708007812, "logps/rejected": -456.7270812988281, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5366775393486023, "rewards/margins": 23.818891525268555, "rewards/rejected": -24.35556983947754, "step": 8083 }, { "epoch": 1.79, "learning_rate": 8.981811422194493e-06, "logits/chosen": -1.8494818210601807, "logits/rejected": -1.709591031074524, "logps/chosen": -89.09812927246094, "logps/rejected": -226.1431884765625, "loss": 0.1643, "rewards/accuracies": 1.0, "rewards/chosen": -3.4441075325012207, "rewards/margins": 1.0896143913269043, "rewards/rejected": -4.533721923828125, "step": 8084 }, { "epoch": 1.79, "learning_rate": 8.980727130702014e-06, "logits/chosen": -1.1440811157226562, "logits/rejected": -1.111676812171936, "logps/chosen": -104.74221801757812, "logps/rejected": -105.54689025878906, "loss": 0.149, "rewards/accuracies": 1.0, "rewards/chosen": -2.41275954246521, "rewards/margins": 1.0579404830932617, "rewards/rejected": -3.4707000255584717, "step": 8085 }, { "epoch": 1.79, "learning_rate": 8.979642327695668e-06, "logits/chosen": -1.7383625507354736, "logits/rejected": -1.8456884622573853, "logps/chosen": -149.01165771484375, "logps/rejected": -136.1375732421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.8663665652275085, "rewards/margins": 9.08752727508545, "rewards/rejected": -8.221160888671875, "step": 8086 }, { "epoch": 1.79, "learning_rate": 8.978557013314848e-06, "logits/chosen": -1.4484353065490723, "logits/rejected": -0.7146199345588684, "logps/chosen": -113.36857604980469, "logps/rejected": -889.2763061523438, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -3.415484666824341, "rewards/margins": 76.13380432128906, "rewards/rejected": -79.54928588867188, "step": 8087 }, { "epoch": 1.79, "learning_rate": 8.977471187699019e-06, "logits/chosen": -1.6598149538040161, "logits/rejected": -1.6006752252578735, "logps/chosen": -78.25167846679688, "logps/rejected": -216.2930450439453, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.7400528192520142, "rewards/margins": 5.217886447906494, "rewards/rejected": -4.4778337478637695, "step": 8088 }, { "epoch": 1.79, "learning_rate": 8.976384850987702e-06, "logits/chosen": -1.5242974758148193, "logits/rejected": -1.4848580360412598, "logps/chosen": -89.88978576660156, "logps/rejected": -171.89231872558594, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.794532060623169, "rewards/margins": 5.143489837646484, "rewards/rejected": -6.938022136688232, "step": 8089 }, { "epoch": 1.79, "learning_rate": 8.97529800332049e-06, "logits/chosen": -1.193861722946167, "logits/rejected": -1.193861722946167, "logps/chosen": -163.08331298828125, "logps/rejected": -163.08331298828125, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -8.538795471191406, "rewards/margins": 0.0, "rewards/rejected": -8.538795471191406, "step": 8090 }, { "epoch": 1.79, "learning_rate": 8.974210644837042e-06, "logits/chosen": -1.5047507286071777, "logits/rejected": -1.4637892246246338, "logps/chosen": -80.34030151367188, "logps/rejected": -251.47486877441406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.15547791123390198, "rewards/margins": 14.567719459533691, "rewards/rejected": -14.41224193572998, "step": 8091 }, { "epoch": 1.79, "learning_rate": 8.973122775677078e-06, "logits/chosen": -1.2386906147003174, "logits/rejected": -1.2445957660675049, "logps/chosen": -134.36013793945312, "logps/rejected": -159.5986328125, "loss": 0.1009, "rewards/accuracies": 1.0, "rewards/chosen": -5.212230682373047, "rewards/margins": 1.497748851776123, "rewards/rejected": -6.70997953414917, "step": 8092 }, { "epoch": 1.79, "learning_rate": 8.97203439598039e-06, "logits/chosen": -1.885875940322876, "logits/rejected": -1.8373053073883057, "logps/chosen": -133.6754150390625, "logps/rejected": -259.1224365234375, "loss": 0.5618, "rewards/accuracies": 1.0, "rewards/chosen": -2.1073760986328125, "rewards/margins": 10.931675910949707, "rewards/rejected": -13.03905200958252, "step": 8093 }, { "epoch": 1.79, "learning_rate": 8.970945505886832e-06, "logits/chosen": -1.7292187213897705, "logits/rejected": -1.6615279912948608, "logps/chosen": -153.940673828125, "logps/rejected": -258.1023254394531, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.560189962387085, "rewards/margins": 5.302093505859375, "rewards/rejected": -7.862283229827881, "step": 8094 }, { "epoch": 1.79, "learning_rate": 8.96985610553632e-06, "logits/chosen": -1.4061496257781982, "logits/rejected": -1.382058024406433, "logps/chosen": -218.86888122558594, "logps/rejected": -242.90145874023438, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 1.6894394159317017, "rewards/margins": 5.186650276184082, "rewards/rejected": -3.497210741043091, "step": 8095 }, { "epoch": 1.79, "learning_rate": 8.968766195068845e-06, "logits/chosen": -1.6365952491760254, "logits/rejected": -1.6446083784103394, "logps/chosen": -212.5828857421875, "logps/rejected": -173.00924682617188, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.0891586542129517, "rewards/margins": 6.727180480957031, "rewards/rejected": -7.816339015960693, "step": 8096 }, { "epoch": 1.79, "learning_rate": 8.967675774624451e-06, "logits/chosen": -1.4175384044647217, "logits/rejected": -1.3905144929885864, "logps/chosen": -210.89639282226562, "logps/rejected": -318.6916198730469, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.7138122916221619, "rewards/margins": 6.994595527648926, "rewards/rejected": -7.708407878875732, "step": 8097 }, { "epoch": 1.79, "learning_rate": 8.96658484434326e-06, "logits/chosen": -1.8217535018920898, "logits/rejected": -1.7110787630081177, "logps/chosen": -82.45237731933594, "logps/rejected": -166.8333282470703, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.4670097827911377, "rewards/margins": 3.682342767715454, "rewards/rejected": -5.149352550506592, "step": 8098 }, { "epoch": 1.79, "learning_rate": 8.96549340436545e-06, "logits/chosen": -1.6353379487991333, "logits/rejected": -1.5839320421218872, "logps/chosen": -135.27017211914062, "logps/rejected": -218.22967529296875, "loss": 1.0656, "rewards/accuracies": 1.0, "rewards/chosen": -4.116201877593994, "rewards/margins": 6.722163677215576, "rewards/rejected": -10.83836555480957, "step": 8099 }, { "epoch": 1.79, "learning_rate": 8.964401454831273e-06, "logits/chosen": -1.5656695365905762, "logits/rejected": -1.5337976217269897, "logps/chosen": -75.2033462524414, "logps/rejected": -172.31829833984375, "loss": 0.3475, "rewards/accuracies": 1.0, "rewards/chosen": -0.9957351684570312, "rewards/margins": 6.317704677581787, "rewards/rejected": -7.313439846038818, "step": 8100 }, { "epoch": 1.79, "learning_rate": 8.963308995881037e-06, "logits/chosen": -1.36863374710083, "logits/rejected": -1.3587614297866821, "logps/chosen": -59.59837341308594, "logps/rejected": -81.2177734375, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": 0.8345596194267273, "rewards/margins": 3.3268754482269287, "rewards/rejected": -2.4923157691955566, "step": 8101 }, { "epoch": 1.79, "learning_rate": 8.962216027655123e-06, "logits/chosen": -1.5988887548446655, "logits/rejected": -1.6013222932815552, "logps/chosen": -107.9736328125, "logps/rejected": -151.54714965820312, "loss": 0.2389, "rewards/accuracies": 1.0, "rewards/chosen": -2.994877576828003, "rewards/margins": 0.49009251594543457, "rewards/rejected": -3.4849700927734375, "step": 8102 }, { "epoch": 1.79, "learning_rate": 8.961122550293975e-06, "logits/chosen": -1.2641299962997437, "logits/rejected": -1.2680269479751587, "logps/chosen": -80.16423797607422, "logps/rejected": -76.56396484375, "loss": 0.0643, "rewards/accuracies": 1.0, "rewards/chosen": -0.36208420991897583, "rewards/margins": 2.012890338897705, "rewards/rejected": -2.374974489212036, "step": 8103 }, { "epoch": 1.79, "learning_rate": 8.960028563938101e-06, "logits/chosen": -0.9722312092781067, "logits/rejected": -0.9823701977729797, "logps/chosen": -249.71266174316406, "logps/rejected": -396.4208984375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 2.485368490219116, "rewards/margins": 11.646391868591309, "rewards/rejected": -9.161023139953613, "step": 8104 }, { "epoch": 1.79, "learning_rate": 8.958934068728078e-06, "logits/chosen": -1.5856181383132935, "logits/rejected": -1.6061787605285645, "logps/chosen": -264.88116455078125, "logps/rejected": -228.8685760498047, "loss": 0.4724, "rewards/accuracies": 0.0, "rewards/chosen": -2.0590851306915283, "rewards/margins": -0.4415924549102783, "rewards/rejected": -1.61749267578125, "step": 8105 }, { "epoch": 1.79, "learning_rate": 8.957839064804542e-06, "logits/chosen": -1.228168249130249, "logits/rejected": -1.228168249130249, "logps/chosen": -228.03201293945312, "logps/rejected": -228.03201293945312, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -10.6264009475708, "rewards/margins": 0.0, "rewards/rejected": -10.6264009475708, "step": 8106 }, { "epoch": 1.79, "learning_rate": 8.9567435523082e-06, "logits/chosen": -1.323228359222412, "logits/rejected": -1.300930380821228, "logps/chosen": -223.93463134765625, "logps/rejected": -314.63214111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.177459716796875, "rewards/margins": 10.450515747070312, "rewards/rejected": -10.273056030273438, "step": 8107 }, { "epoch": 1.79, "learning_rate": 8.955647531379826e-06, "logits/chosen": -1.0722196102142334, "logits/rejected": -1.0129733085632324, "logps/chosen": -177.64266967773438, "logps/rejected": -191.7801971435547, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.06638503074646, "rewards/margins": 5.946195602416992, "rewards/rejected": -8.012580871582031, "step": 8108 }, { "epoch": 1.79, "learning_rate": 8.954551002160252e-06, "logits/chosen": -1.7468341588974, "logits/rejected": -1.711386799812317, "logps/chosen": -163.18466186523438, "logps/rejected": -176.9932861328125, "loss": 0.3841, "rewards/accuracies": 1.0, "rewards/chosen": -6.422351360321045, "rewards/margins": 0.049530029296875, "rewards/rejected": -6.47188138961792, "step": 8109 }, { "epoch": 1.8, "learning_rate": 8.95345396479038e-06, "logits/chosen": -1.511807918548584, "logits/rejected": -1.4799003601074219, "logps/chosen": -62.546600341796875, "logps/rejected": -154.49478149414062, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.3684498071670532, "rewards/margins": 5.6718549728393555, "rewards/rejected": -7.040304660797119, "step": 8110 }, { "epoch": 1.8, "learning_rate": 8.952356419411177e-06, "logits/chosen": -1.419000267982483, "logits/rejected": -1.419000267982483, "logps/chosen": -262.5789794921875, "logps/rejected": -262.5789794921875, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.428491115570068, "rewards/margins": 0.0, "rewards/rejected": -4.428491115570068, "step": 8111 }, { "epoch": 1.8, "learning_rate": 8.951258366163677e-06, "logits/chosen": -1.171072006225586, "logits/rejected": -1.2583290338516235, "logps/chosen": -185.0946044921875, "logps/rejected": -113.39876556396484, "loss": 0.043, "rewards/accuracies": 1.0, "rewards/chosen": -5.516888618469238, "rewards/margins": 3.5750322341918945, "rewards/rejected": -9.091920852661133, "step": 8112 }, { "epoch": 1.8, "learning_rate": 8.950159805188973e-06, "logits/chosen": -1.4447555541992188, "logits/rejected": -1.4290730953216553, "logps/chosen": -113.23497772216797, "logps/rejected": -136.77333068847656, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -0.5092369318008423, "rewards/margins": 3.2407140731811523, "rewards/rejected": -3.749951124191284, "step": 8113 }, { "epoch": 1.8, "learning_rate": 8.949060736628233e-06, "logits/chosen": -1.1245945692062378, "logits/rejected": -1.1245945692062378, "logps/chosen": -84.60803985595703, "logps/rejected": -84.60803985595703, "loss": 0.3571, "rewards/accuracies": 0.0, "rewards/chosen": -1.8879082202911377, "rewards/margins": 0.0, "rewards/rejected": -1.8879082202911377, "step": 8114 }, { "epoch": 1.8, "learning_rate": 8.94796116062268e-06, "logits/chosen": -1.1638094186782837, "logits/rejected": -1.0272014141082764, "logps/chosen": -181.7113037109375, "logps/rejected": -312.18768310546875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.22077636420726776, "rewards/margins": 6.687469482421875, "rewards/rejected": -6.908246040344238, "step": 8115 }, { "epoch": 1.8, "learning_rate": 8.946861077313609e-06, "logits/chosen": -1.9734368324279785, "logits/rejected": -1.9725842475891113, "logps/chosen": -113.07987213134766, "logps/rejected": -180.41136169433594, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 1.3220252990722656, "rewards/margins": 4.464785099029541, "rewards/rejected": -3.1427597999572754, "step": 8116 }, { "epoch": 1.8, "learning_rate": 8.945760486842377e-06, "logits/chosen": -1.333552360534668, "logits/rejected": -1.4674822092056274, "logps/chosen": -224.91139221191406, "logps/rejected": -168.073486328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 1.4205093383789062, "rewards/margins": 9.998614311218262, "rewards/rejected": -8.578104972839355, "step": 8117 }, { "epoch": 1.8, "learning_rate": 8.944659389350409e-06, "logits/chosen": -1.7175060510635376, "logits/rejected": -1.7503069639205933, "logps/chosen": -266.39111328125, "logps/rejected": -189.706787109375, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": 0.90771484375, "rewards/margins": 2.8772385120391846, "rewards/rejected": -1.9695236682891846, "step": 8118 }, { "epoch": 1.8, "learning_rate": 8.94355778497919e-06, "logits/chosen": -1.5339832305908203, "logits/rejected": -1.5339832305908203, "logps/chosen": -186.83706665039062, "logps/rejected": -186.83706665039062, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -7.207064151763916, "rewards/margins": 0.0, "rewards/rejected": -7.207064151763916, "step": 8119 }, { "epoch": 1.8, "learning_rate": 8.942455673870278e-06, "logits/chosen": -1.76966392993927, "logits/rejected": -1.6848390102386475, "logps/chosen": -163.74659729003906, "logps/rejected": -270.1512451171875, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -1.9305709600448608, "rewards/margins": 7.991784572601318, "rewards/rejected": -9.922355651855469, "step": 8120 }, { "epoch": 1.8, "learning_rate": 8.941353056165288e-06, "logits/chosen": -1.4032039642333984, "logits/rejected": -1.3818265199661255, "logps/chosen": -132.20248413085938, "logps/rejected": -263.5409851074219, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2063965797424316, "rewards/margins": 11.127338409423828, "rewards/rejected": -13.333734512329102, "step": 8121 }, { "epoch": 1.8, "learning_rate": 8.940249932005904e-06, "logits/chosen": -1.5205587148666382, "logits/rejected": -1.4662214517593384, "logps/chosen": -106.93086242675781, "logps/rejected": -214.3368682861328, "loss": 0.7255, "rewards/accuracies": 1.0, "rewards/chosen": -2.7619941234588623, "rewards/margins": 4.937183380126953, "rewards/rejected": -7.6991777420043945, "step": 8122 }, { "epoch": 1.8, "learning_rate": 8.939146301533878e-06, "logits/chosen": -1.562371850013733, "logits/rejected": -1.5214927196502686, "logps/chosen": -147.93995666503906, "logps/rejected": -215.5831298828125, "loss": 1.2538, "rewards/accuracies": 1.0, "rewards/chosen": -2.8555359840393066, "rewards/margins": 2.6180052757263184, "rewards/rejected": -5.473541259765625, "step": 8123 }, { "epoch": 1.8, "learning_rate": 8.938042164891021e-06, "logits/chosen": -1.5378516912460327, "logits/rejected": -1.6794980764389038, "logps/chosen": -171.59490966796875, "logps/rejected": -176.46339416503906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.596945285797119, "rewards/margins": 12.846986770629883, "rewards/rejected": -9.250041007995605, "step": 8124 }, { "epoch": 1.8, "learning_rate": 8.936937522219212e-06, "logits/chosen": -1.2376710176467896, "logits/rejected": -1.236435890197754, "logps/chosen": -151.9130096435547, "logps/rejected": -273.047119140625, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -1.2737350463867188, "rewards/margins": 10.375941276550293, "rewards/rejected": -11.649676322937012, "step": 8125 }, { "epoch": 1.8, "learning_rate": 8.935832373660397e-06, "logits/chosen": -1.4038554430007935, "logits/rejected": -1.3562507629394531, "logps/chosen": -138.00146484375, "logps/rejected": -231.0159149169922, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.287634253501892, "rewards/margins": 7.261871814727783, "rewards/rejected": -8.549506187438965, "step": 8126 }, { "epoch": 1.8, "learning_rate": 8.934726719356582e-06, "logits/chosen": -1.3647780418395996, "logits/rejected": -1.3454523086547852, "logps/chosen": -135.462890625, "logps/rejected": -190.9697265625, "loss": 0.3418, "rewards/accuracies": 1.0, "rewards/chosen": -5.308021068572998, "rewards/margins": 0.019344329833984375, "rewards/rejected": -5.327365398406982, "step": 8127 }, { "epoch": 1.8, "learning_rate": 8.933620559449842e-06, "logits/chosen": -1.6572214365005493, "logits/rejected": -1.655822992324829, "logps/chosen": -110.95850372314453, "logps/rejected": -166.0883026123047, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.0980477333068848, "rewards/margins": 6.19036340713501, "rewards/rejected": -8.288411140441895, "step": 8128 }, { "epoch": 1.8, "learning_rate": 8.932513894082317e-06, "logits/chosen": -1.5840764045715332, "logits/rejected": -1.4834450483322144, "logps/chosen": -158.04815673828125, "logps/rejected": -316.692138671875, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -3.024557590484619, "rewards/margins": 4.262956142425537, "rewards/rejected": -7.287513732910156, "step": 8129 }, { "epoch": 1.8, "learning_rate": 8.93140672339621e-06, "logits/chosen": -1.4284108877182007, "logits/rejected": -1.3553452491760254, "logps/chosen": -190.95590209960938, "logps/rejected": -356.3648376464844, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.404763787984848, "rewards/margins": 6.335812568664551, "rewards/rejected": -5.93104887008667, "step": 8130 }, { "epoch": 1.8, "learning_rate": 8.930299047533792e-06, "logits/chosen": -1.3531060218811035, "logits/rejected": -1.3531060218811035, "logps/chosen": -126.90196228027344, "logps/rejected": -126.90196228027344, "loss": 0.3528, "rewards/accuracies": 0.0, "rewards/chosen": -5.636941432952881, "rewards/margins": 0.0, "rewards/rejected": -5.636941432952881, "step": 8131 }, { "epoch": 1.8, "learning_rate": 8.929190866637391e-06, "logits/chosen": -1.3813247680664062, "logits/rejected": -1.3420474529266357, "logps/chosen": -68.62789916992188, "logps/rejected": -100.17161560058594, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": 0.38774871826171875, "rewards/margins": 3.3762269020080566, "rewards/rejected": -2.988478183746338, "step": 8132 }, { "epoch": 1.8, "learning_rate": 8.92808218084941e-06, "logits/chosen": -1.4591411352157593, "logits/rejected": -1.300734281539917, "logps/chosen": -159.16258239746094, "logps/rejected": -269.8373718261719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.1532424688339233, "rewards/margins": 9.032567024230957, "rewards/rejected": -7.879324436187744, "step": 8133 }, { "epoch": 1.8, "learning_rate": 8.926972990312314e-06, "logits/chosen": -1.8036489486694336, "logits/rejected": -1.6076833009719849, "logps/chosen": -114.93905639648438, "logps/rejected": -249.8457794189453, "loss": 0.14, "rewards/accuracies": 1.0, "rewards/chosen": -2.9236435890197754, "rewards/margins": 1.1300568580627441, "rewards/rejected": -4.0537004470825195, "step": 8134 }, { "epoch": 1.8, "learning_rate": 8.925863295168628e-06, "logits/chosen": -1.571245789527893, "logits/rejected": -1.7578973770141602, "logps/chosen": -220.06210327148438, "logps/rejected": -169.11204528808594, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 1.86651611328125, "rewards/margins": 14.43595027923584, "rewards/rejected": -12.56943416595459, "step": 8135 }, { "epoch": 1.8, "learning_rate": 8.924753095560945e-06, "logits/chosen": -1.4556509256362915, "logits/rejected": -1.5004862546920776, "logps/chosen": -120.4107666015625, "logps/rejected": -122.21434783935547, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -4.789704322814941, "rewards/margins": 3.5096559524536133, "rewards/rejected": -8.299360275268555, "step": 8136 }, { "epoch": 1.8, "learning_rate": 8.923642391631924e-06, "logits/chosen": -1.5788476467132568, "logits/rejected": -1.5743306875228882, "logps/chosen": -124.1020278930664, "logps/rejected": -166.87725830078125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 1.297014594078064, "rewards/margins": 8.280282974243164, "rewards/rejected": -6.9832682609558105, "step": 8137 }, { "epoch": 1.8, "learning_rate": 8.922531183524287e-06, "logits/chosen": -1.4708104133605957, "logits/rejected": -1.4948545694351196, "logps/chosen": -222.97140502929688, "logps/rejected": -379.14239501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.395315557718277, "rewards/margins": 11.540047645568848, "rewards/rejected": -11.144732475280762, "step": 8138 }, { "epoch": 1.8, "learning_rate": 8.921419471380826e-06, "logits/chosen": -1.6855531930923462, "logits/rejected": -1.731691837310791, "logps/chosen": -173.5919189453125, "logps/rejected": -281.200927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.3062515258789062, "rewards/margins": 13.145392417907715, "rewards/rejected": -11.839140892028809, "step": 8139 }, { "epoch": 1.8, "learning_rate": 8.920307255344386e-06, "logits/chosen": -1.4628379344940186, "logits/rejected": -1.2141202688217163, "logps/chosen": -186.81417846679688, "logps/rejected": -427.24713134765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7258316278457642, "rewards/margins": 9.145048141479492, "rewards/rejected": -9.870880126953125, "step": 8140 }, { "epoch": 1.8, "learning_rate": 8.91919453555789e-06, "logits/chosen": -1.8119322061538696, "logits/rejected": -1.850867509841919, "logps/chosen": -84.09062194824219, "logps/rejected": -117.0284423828125, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.7948371767997742, "rewards/margins": 4.106006145477295, "rewards/rejected": -4.900843143463135, "step": 8141 }, { "epoch": 1.8, "learning_rate": 8.918081312164318e-06, "logits/chosen": -1.5920137166976929, "logits/rejected": -1.572220802307129, "logps/chosen": -144.13510131835938, "logps/rejected": -180.95941162109375, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -1.782814860343933, "rewards/margins": 4.35828161239624, "rewards/rejected": -6.141096591949463, "step": 8142 }, { "epoch": 1.8, "learning_rate": 8.916967585306715e-06, "logits/chosen": -1.6208196878433228, "logits/rejected": -1.3803168535232544, "logps/chosen": -127.75205993652344, "logps/rejected": -349.66650390625, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -3.1311416625976562, "rewards/margins": 4.876469612121582, "rewards/rejected": -8.007611274719238, "step": 8143 }, { "epoch": 1.8, "learning_rate": 8.915853355128192e-06, "logits/chosen": -1.7455778121948242, "logits/rejected": -1.860613226890564, "logps/chosen": -94.35853576660156, "logps/rejected": -123.22174072265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.32772141695022583, "rewards/margins": 9.349334716796875, "rewards/rejected": -9.021613121032715, "step": 8144 }, { "epoch": 1.8, "learning_rate": 8.91473862177193e-06, "logits/chosen": -1.3930788040161133, "logits/rejected": -1.4268686771392822, "logps/chosen": -86.64027404785156, "logps/rejected": -50.13803482055664, "loss": 0.6226, "rewards/accuracies": 0.0, "rewards/chosen": -0.44789430499076843, "rewards/margins": -0.5863083004951477, "rewards/rejected": 0.13841401040554047, "step": 8145 }, { "epoch": 1.8, "learning_rate": 8.913623385381163e-06, "logits/chosen": -1.4794278144836426, "logits/rejected": -1.3077771663665771, "logps/chosen": -117.5047607421875, "logps/rejected": -244.81045532226562, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": 1.1073760986328125, "rewards/margins": 6.305746555328369, "rewards/rejected": -5.198370456695557, "step": 8146 }, { "epoch": 1.8, "learning_rate": 8.9125076460992e-06, "logits/chosen": -1.1167511940002441, "logits/rejected": -1.1148675680160522, "logps/chosen": -89.94227600097656, "logps/rejected": -108.641357421875, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": 1.0891433954238892, "rewards/margins": 6.3057451248168945, "rewards/rejected": -5.216601848602295, "step": 8147 }, { "epoch": 1.8, "learning_rate": 8.91139140406941e-06, "logits/chosen": -1.078434705734253, "logits/rejected": -1.0510870218276978, "logps/chosen": -210.76272583007812, "logps/rejected": -272.7562561035156, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.5390915274620056, "rewards/margins": 6.119633674621582, "rewards/rejected": -5.580542087554932, "step": 8148 }, { "epoch": 1.8, "learning_rate": 8.910274659435226e-06, "logits/chosen": -1.2914788722991943, "logits/rejected": -1.2829850912094116, "logps/chosen": -122.1676025390625, "logps/rejected": -265.3170166015625, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": -0.44196778535842896, "rewards/margins": 2.9114701747894287, "rewards/rejected": -3.353437900543213, "step": 8149 }, { "epoch": 1.8, "learning_rate": 8.90915741234015e-06, "logits/chosen": -1.3350646495819092, "logits/rejected": -1.364675521850586, "logps/chosen": -260.48638916015625, "logps/rejected": -247.62315368652344, "loss": 0.5735, "rewards/accuracies": 1.0, "rewards/chosen": -4.323922634124756, "rewards/margins": 0.5542407035827637, "rewards/rejected": -4.8781633377075195, "step": 8150 }, { "epoch": 1.8, "learning_rate": 8.908039662927743e-06, "logits/chosen": -1.8968348503112793, "logits/rejected": -1.903414249420166, "logps/chosen": -108.06436157226562, "logps/rejected": -124.01564025878906, "loss": 0.2194, "rewards/accuracies": 1.0, "rewards/chosen": -0.2627304196357727, "rewards/margins": 1.0657150745391846, "rewards/rejected": -1.3284454345703125, "step": 8151 }, { "epoch": 1.8, "learning_rate": 8.906921411341634e-06, "logits/chosen": -1.3348428010940552, "logits/rejected": -1.314638376235962, "logps/chosen": -152.71011352539062, "logps/rejected": -231.92222595214844, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -0.28895875811576843, "rewards/margins": 4.044694900512695, "rewards/rejected": -4.333653450012207, "step": 8152 }, { "epoch": 1.8, "learning_rate": 8.905802657725516e-06, "logits/chosen": -1.3116402626037598, "logits/rejected": -1.2538737058639526, "logps/chosen": -45.1749153137207, "logps/rejected": -100.944580078125, "loss": 0.4047, "rewards/accuracies": 0.0, "rewards/chosen": -0.10754738003015518, "rewards/margins": -0.20580178499221802, "rewards/rejected": 0.09825439751148224, "step": 8153 }, { "epoch": 1.8, "learning_rate": 8.904683402223146e-06, "logits/chosen": -1.3661279678344727, "logits/rejected": -1.3385449647903442, "logps/chosen": -129.7202911376953, "logps/rejected": -167.6156768798828, "loss": 0.1178, "rewards/accuracies": 1.0, "rewards/chosen": -3.240940809249878, "rewards/margins": 4.632388114929199, "rewards/rejected": -7.873329162597656, "step": 8154 }, { "epoch": 1.81, "learning_rate": 8.903563644978346e-06, "logits/chosen": -1.698504090309143, "logits/rejected": -1.6432002782821655, "logps/chosen": -106.03829956054688, "logps/rejected": -233.0431671142578, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.936572313308716, "rewards/margins": 7.490614891052246, "rewards/rejected": -10.427186965942383, "step": 8155 }, { "epoch": 1.81, "learning_rate": 8.902443386135e-06, "logits/chosen": -1.5280011892318726, "logits/rejected": -1.4591258764266968, "logps/chosen": -109.29931640625, "logps/rejected": -197.95166015625, "loss": 0.5036, "rewards/accuracies": 0.0, "rewards/chosen": -0.6146149039268494, "rewards/margins": -0.5516220331192017, "rewards/rejected": -0.06299286335706711, "step": 8156 }, { "epoch": 1.81, "learning_rate": 8.90132262583706e-06, "logits/chosen": -1.652635097503662, "logits/rejected": -1.6540112495422363, "logps/chosen": -106.391845703125, "logps/rejected": -137.9077911376953, "loss": 0.1318, "rewards/accuracies": 1.0, "rewards/chosen": -3.0993964672088623, "rewards/margins": 1.1993892192840576, "rewards/rejected": -4.29878568649292, "step": 8157 }, { "epoch": 1.81, "learning_rate": 8.900201364228542e-06, "logits/chosen": -1.3415287733078003, "logits/rejected": -1.2764341831207275, "logps/chosen": -132.90618896484375, "logps/rejected": -214.24293518066406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.0814437866210938, "rewards/margins": 7.86873722076416, "rewards/rejected": -8.950181007385254, "step": 8158 }, { "epoch": 1.81, "learning_rate": 8.899079601453524e-06, "logits/chosen": -1.5248894691467285, "logits/rejected": -1.556895136833191, "logps/chosen": -208.67352294921875, "logps/rejected": -185.76992797851562, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.01807861402630806, "rewards/margins": 5.6000165939331055, "rewards/rejected": -5.618095397949219, "step": 8159 }, { "epoch": 1.81, "learning_rate": 8.897957337656151e-06, "logits/chosen": -1.2665973901748657, "logits/rejected": -1.3087526559829712, "logps/chosen": -182.38583374023438, "logps/rejected": -151.98025512695312, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.32354736328125, "rewards/margins": 5.1991167068481445, "rewards/rejected": -5.5226640701293945, "step": 8160 }, { "epoch": 1.81, "learning_rate": 8.89683457298063e-06, "logits/chosen": -1.5337533950805664, "logits/rejected": -1.5395666360855103, "logps/chosen": -89.7269287109375, "logps/rejected": -70.2944564819336, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -2.7351155281066895, "rewards/margins": 2.6161603927612305, "rewards/rejected": -5.35127592086792, "step": 8161 }, { "epoch": 1.81, "learning_rate": 8.895711307571235e-06, "logits/chosen": -1.2351552248001099, "logits/rejected": -1.2351552248001099, "logps/chosen": -222.81585693359375, "logps/rejected": -222.81585693359375, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -6.948190212249756, "rewards/margins": 0.0, "rewards/rejected": -6.948190212249756, "step": 8162 }, { "epoch": 1.81, "learning_rate": 8.894587541572301e-06, "logits/chosen": -1.282153606414795, "logits/rejected": -1.3866890668869019, "logps/chosen": -103.28692626953125, "logps/rejected": -154.18719482421875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.3166444301605225, "rewards/margins": 6.1173505783081055, "rewards/rejected": -8.433995246887207, "step": 8163 }, { "epoch": 1.81, "learning_rate": 8.89346327512823e-06, "logits/chosen": -1.2778940200805664, "logits/rejected": -1.281005620956421, "logps/chosen": -98.85028076171875, "logps/rejected": -176.0381622314453, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.11950836330652237, "rewards/margins": 5.9559125900268555, "rewards/rejected": -5.836404323577881, "step": 8164 }, { "epoch": 1.81, "learning_rate": 8.89233850838349e-06, "logits/chosen": -1.2954940795898438, "logits/rejected": -1.3095260858535767, "logps/chosen": -107.88162231445312, "logps/rejected": -157.22705078125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 0.22643433511257172, "rewards/margins": 5.076767921447754, "rewards/rejected": -4.8503336906433105, "step": 8165 }, { "epoch": 1.81, "learning_rate": 8.891213241482606e-06, "logits/chosen": -1.2750120162963867, "logits/rejected": -1.1867552995681763, "logps/chosen": -59.799373626708984, "logps/rejected": -180.75408935546875, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.43669816851615906, "rewards/margins": 4.18488883972168, "rewards/rejected": -4.621586799621582, "step": 8166 }, { "epoch": 1.81, "learning_rate": 8.890087474570174e-06, "logits/chosen": -1.2748571634292603, "logits/rejected": -1.2956879138946533, "logps/chosen": -116.25740051269531, "logps/rejected": -166.6300048828125, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -3.3014893531799316, "rewards/margins": 3.744797706604004, "rewards/rejected": -7.0462870597839355, "step": 8167 }, { "epoch": 1.81, "learning_rate": 8.888961207790856e-06, "logits/chosen": -1.3708034753799438, "logits/rejected": -1.353155255317688, "logps/chosen": -153.91639709472656, "logps/rejected": -320.2379150390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 2.221688985824585, "rewards/margins": 12.692806243896484, "rewards/rejected": -10.47111701965332, "step": 8168 }, { "epoch": 1.81, "learning_rate": 8.887834441289369e-06, "logits/chosen": -1.4167766571044922, "logits/rejected": -1.439630150794983, "logps/chosen": -242.9458465576172, "logps/rejected": -271.66070556640625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.093946933746338, "rewards/margins": 10.850658416748047, "rewards/rejected": -12.944604873657227, "step": 8169 }, { "epoch": 1.81, "learning_rate": 8.886707175210503e-06, "logits/chosen": -1.443278431892395, "logits/rejected": -1.6258482933044434, "logps/chosen": -289.9561767578125, "logps/rejected": -163.26690673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.674163818359375, "rewards/margins": 10.80044174194336, "rewards/rejected": -10.126277923583984, "step": 8170 }, { "epoch": 1.81, "learning_rate": 8.88557940969911e-06, "logits/chosen": -1.3598536252975464, "logits/rejected": -1.3341293334960938, "logps/chosen": -68.60859680175781, "logps/rejected": -100.09915161132812, "loss": 0.1187, "rewards/accuracies": 1.0, "rewards/chosen": -1.446144461631775, "rewards/margins": 1.3170026540756226, "rewards/rejected": -2.7631471157073975, "step": 8171 }, { "epoch": 1.81, "learning_rate": 8.884451144900104e-06, "logits/chosen": -1.4162092208862305, "logits/rejected": -1.551477074623108, "logps/chosen": -166.33164978027344, "logps/rejected": -208.6888885498047, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.9573654532432556, "rewards/margins": 8.08099365234375, "rewards/rejected": -7.12362813949585, "step": 8172 }, { "epoch": 1.81, "learning_rate": 8.88332238095846e-06, "logits/chosen": -1.6066255569458008, "logits/rejected": -1.041526436805725, "logps/chosen": -207.84971618652344, "logps/rejected": -832.0911865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.5560898184776306, "rewards/margins": 73.26984405517578, "rewards/rejected": -72.71375274658203, "step": 8173 }, { "epoch": 1.81, "learning_rate": 8.882193118019229e-06, "logits/chosen": -1.274096131324768, "logits/rejected": -1.244804859161377, "logps/chosen": -87.27832794189453, "logps/rejected": -115.81233215332031, "loss": 0.1113, "rewards/accuracies": 1.0, "rewards/chosen": -1.6889435052871704, "rewards/margins": 2.125391960144043, "rewards/rejected": -3.814335584640503, "step": 8174 }, { "epoch": 1.81, "learning_rate": 8.881063356227513e-06, "logits/chosen": -1.8760111331939697, "logits/rejected": -1.8786587715148926, "logps/chosen": -126.77110290527344, "logps/rejected": -172.7759552001953, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -2.253188371658325, "rewards/margins": 3.694638967514038, "rewards/rejected": -5.947827339172363, "step": 8175 }, { "epoch": 1.81, "learning_rate": 8.879933095728485e-06, "logits/chosen": -1.394066333770752, "logits/rejected": -1.3885074853897095, "logps/chosen": -115.62649536132812, "logps/rejected": -161.77056884765625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.027474213391542435, "rewards/margins": 5.085412502288818, "rewards/rejected": -5.112886905670166, "step": 8176 }, { "epoch": 1.81, "learning_rate": 8.878802336667384e-06, "logits/chosen": -1.3465479612350464, "logits/rejected": -1.4313443899154663, "logps/chosen": -116.68333435058594, "logps/rejected": -102.15885162353516, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -2.194936513900757, "rewards/margins": 5.434320449829102, "rewards/rejected": -7.629256725311279, "step": 8177 }, { "epoch": 1.81, "learning_rate": 8.877671079189505e-06, "logits/chosen": -0.9343790411949158, "logits/rejected": -0.8380773663520813, "logps/chosen": -75.79457092285156, "logps/rejected": -173.82777404785156, "loss": 0.3842, "rewards/accuracies": 1.0, "rewards/chosen": -1.0619186162948608, "rewards/margins": 6.348936557769775, "rewards/rejected": -7.410855293273926, "step": 8178 }, { "epoch": 1.81, "learning_rate": 8.876539323440214e-06, "logits/chosen": -1.841070294380188, "logits/rejected": -1.832924485206604, "logps/chosen": -125.966064453125, "logps/rejected": -200.205078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3558311462402344, "rewards/margins": 8.527579307556152, "rewards/rejected": -9.883410453796387, "step": 8179 }, { "epoch": 1.81, "learning_rate": 8.87540706956494e-06, "logits/chosen": -1.0662082433700562, "logits/rejected": -1.0662082433700562, "logps/chosen": -214.87612915039062, "logps/rejected": -214.87612915039062, "loss": 0.3476, "rewards/accuracies": 0.0, "rewards/chosen": -4.761465549468994, "rewards/margins": 0.0, "rewards/rejected": -4.761465549468994, "step": 8180 }, { "epoch": 1.81, "learning_rate": 8.874274317709173e-06, "logits/chosen": -1.2899880409240723, "logits/rejected": -1.275351881980896, "logps/chosen": -97.89309692382812, "logps/rejected": -147.29824829101562, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 0.0012985229259356856, "rewards/margins": 5.286235809326172, "rewards/rejected": -5.284937381744385, "step": 8181 }, { "epoch": 1.81, "learning_rate": 8.873141068018469e-06, "logits/chosen": -1.462191104888916, "logits/rejected": -1.5141746997833252, "logps/chosen": -168.72396850585938, "logps/rejected": -129.2732391357422, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.4808928966522217, "rewards/margins": 5.443143844604492, "rewards/rejected": -7.924036502838135, "step": 8182 }, { "epoch": 1.81, "learning_rate": 8.872007320638449e-06, "logits/chosen": -1.4279199838638306, "logits/rejected": -1.4391090869903564, "logps/chosen": -61.03318405151367, "logps/rejected": -75.68550872802734, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.5200966000556946, "rewards/margins": 5.549563407897949, "rewards/rejected": -6.069660186767578, "step": 8183 }, { "epoch": 1.81, "learning_rate": 8.870873075714797e-06, "logits/chosen": -1.623198390007019, "logits/rejected": -1.4650110006332397, "logps/chosen": -142.224365234375, "logps/rejected": -308.7528076171875, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": 0.9620422720909119, "rewards/margins": 8.065325736999512, "rewards/rejected": -7.103283882141113, "step": 8184 }, { "epoch": 1.81, "learning_rate": 8.86973833339326e-06, "logits/chosen": -1.4505805969238281, "logits/rejected": -1.4249067306518555, "logps/chosen": -91.210205078125, "logps/rejected": -215.91184997558594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.7274407148361206, "rewards/margins": 9.17495346069336, "rewards/rejected": -10.90239429473877, "step": 8185 }, { "epoch": 1.81, "learning_rate": 8.86860309381965e-06, "logits/chosen": -1.1567002534866333, "logits/rejected": -1.1936955451965332, "logps/chosen": -226.26177978515625, "logps/rejected": -264.82763671875, "loss": 0.0647, "rewards/accuracies": 1.0, "rewards/chosen": -4.997483730316162, "rewards/margins": 1.9801654815673828, "rewards/rejected": -6.977649211883545, "step": 8186 }, { "epoch": 1.81, "learning_rate": 8.867467357139842e-06, "logits/chosen": -1.348171353340149, "logits/rejected": -1.2273845672607422, "logps/chosen": -247.737548828125, "logps/rejected": -213.4117889404297, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9439254999160767, "rewards/margins": 7.972697734832764, "rewards/rejected": -8.91662311553955, "step": 8187 }, { "epoch": 1.81, "learning_rate": 8.866331123499775e-06, "logits/chosen": -1.2632719278335571, "logits/rejected": -1.2555463314056396, "logps/chosen": -111.32794189453125, "logps/rejected": -203.44146728515625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -3.635796308517456, "rewards/margins": 4.275631904602051, "rewards/rejected": -7.911427974700928, "step": 8188 }, { "epoch": 1.81, "learning_rate": 8.865194393045452e-06, "logits/chosen": -1.3517650365829468, "logits/rejected": -1.2703914642333984, "logps/chosen": -293.7088623046875, "logps/rejected": -228.88494873046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.261909484863281, "rewards/margins": 8.370402336120605, "rewards/rejected": -13.632311820983887, "step": 8189 }, { "epoch": 1.81, "learning_rate": 8.864057165922944e-06, "logits/chosen": -1.6405564546585083, "logits/rejected": -1.6113990545272827, "logps/chosen": -152.3447723388672, "logps/rejected": -185.2750701904297, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.145159959793091, "rewards/margins": 5.918946266174316, "rewards/rejected": -9.064105987548828, "step": 8190 }, { "epoch": 1.81, "learning_rate": 8.862919442278379e-06, "logits/chosen": -1.4804438352584839, "logits/rejected": -1.4627225399017334, "logps/chosen": -125.92300415039062, "logps/rejected": -223.70135498046875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -3.0934860706329346, "rewards/margins": 5.089082717895508, "rewards/rejected": -8.182568550109863, "step": 8191 }, { "epoch": 1.81, "learning_rate": 8.86178122225795e-06, "logits/chosen": -1.768198847770691, "logits/rejected": -1.7669720649719238, "logps/chosen": -109.6705093383789, "logps/rejected": -152.62905883789062, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -3.5897774696350098, "rewards/margins": 5.619663715362549, "rewards/rejected": -9.209441184997559, "step": 8192 }, { "epoch": 1.81, "learning_rate": 8.860642506007919e-06, "logits/chosen": -1.2927039861679077, "logits/rejected": -1.1832841634750366, "logps/chosen": -69.3115234375, "logps/rejected": -260.146728515625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.467337042093277, "rewards/margins": 10.656232833862305, "rewards/rejected": -10.188896179199219, "step": 8193 }, { "epoch": 1.81, "learning_rate": 8.859503293674605e-06, "logits/chosen": -1.1982043981552124, "logits/rejected": -1.2439014911651611, "logps/chosen": -227.5230712890625, "logps/rejected": -203.8527374267578, "loss": 2.3069, "rewards/accuracies": 0.0, "rewards/chosen": -2.841665744781494, "rewards/margins": -4.6027116775512695, "rewards/rejected": 1.7610458135604858, "step": 8194 }, { "epoch": 1.81, "learning_rate": 8.858363585404397e-06, "logits/chosen": -1.8948734998703003, "logits/rejected": -1.8654836416244507, "logps/chosen": -110.83292388916016, "logps/rejected": -95.3724365234375, "loss": 0.7646, "rewards/accuracies": 0.0, "rewards/chosen": -3.2732346057891846, "rewards/margins": -1.2846390008926392, "rewards/rejected": -1.9885956048965454, "step": 8195 }, { "epoch": 1.81, "learning_rate": 8.857223381343742e-06, "logits/chosen": -1.302769422531128, "logits/rejected": -1.4653048515319824, "logps/chosen": -178.01425170898438, "logps/rejected": -126.68614196777344, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 4.885022163391113, "rewards/margins": 7.2880353927612305, "rewards/rejected": -2.403012990951538, "step": 8196 }, { "epoch": 1.81, "learning_rate": 8.856082681639158e-06, "logits/chosen": -1.5153518915176392, "logits/rejected": -1.4055275917053223, "logps/chosen": -209.53741455078125, "logps/rejected": -274.81976318359375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 0.07907714694738388, "rewards/margins": 5.1078996658325195, "rewards/rejected": -5.028822422027588, "step": 8197 }, { "epoch": 1.81, "learning_rate": 8.854941486437216e-06, "logits/chosen": -1.1125562191009521, "logits/rejected": -1.1125562191009521, "logps/chosen": -152.72384643554688, "logps/rejected": -152.72384643554688, "loss": 0.3479, "rewards/accuracies": 0.0, "rewards/chosen": -3.1180412769317627, "rewards/margins": 0.0, "rewards/rejected": -3.1180412769317627, "step": 8198 }, { "epoch": 1.81, "learning_rate": 8.853799795884562e-06, "logits/chosen": -1.2265912294387817, "logits/rejected": -1.2265912294387817, "logps/chosen": -188.5770263671875, "logps/rejected": -188.5770263671875, "loss": 0.3472, "rewards/accuracies": 0.0, "rewards/chosen": -7.429290771484375, "rewards/margins": 0.0, "rewards/rejected": -7.429290771484375, "step": 8199 }, { "epoch": 1.81, "learning_rate": 8.852657610127898e-06, "logits/chosen": -1.2829803228378296, "logits/rejected": -1.241672158241272, "logps/chosen": -67.07131958007812, "logps/rejected": -151.0065155029297, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.1876785308122635, "rewards/margins": 9.36983585357666, "rewards/rejected": -9.557514190673828, "step": 8200 }, { "epoch": 1.82, "learning_rate": 8.851514929313992e-06, "logits/chosen": -1.3517305850982666, "logits/rejected": -1.3182387351989746, "logps/chosen": -111.16429901123047, "logps/rejected": -155.77882385253906, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.370237112045288, "rewards/margins": 6.929905891418457, "rewards/rejected": -9.300143241882324, "step": 8201 }, { "epoch": 1.82, "learning_rate": 8.850371753589677e-06, "logits/chosen": -1.5903147459030151, "logits/rejected": -1.5904555320739746, "logps/chosen": -92.13179016113281, "logps/rejected": -100.8702392578125, "loss": 0.0913, "rewards/accuracies": 1.0, "rewards/chosen": -0.8074378967285156, "rewards/margins": 1.608618974685669, "rewards/rejected": -2.4160568714141846, "step": 8202 }, { "epoch": 1.82, "learning_rate": 8.849228083101847e-06, "logits/chosen": -1.479422926902771, "logits/rejected": -1.590207576751709, "logps/chosen": -150.2642822265625, "logps/rejected": -162.61431884765625, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": -3.829876661300659, "rewards/margins": 7.3020477294921875, "rewards/rejected": -11.131924629211426, "step": 8203 }, { "epoch": 1.82, "learning_rate": 8.848083917997463e-06, "logits/chosen": -1.7515331506729126, "logits/rejected": -1.761260747909546, "logps/chosen": -107.84989929199219, "logps/rejected": -101.17088317871094, "loss": 0.2269, "rewards/accuracies": 1.0, "rewards/chosen": -2.0260283946990967, "rewards/margins": 0.6373977661132812, "rewards/rejected": -2.663426160812378, "step": 8204 }, { "epoch": 1.82, "learning_rate": 8.846939258423545e-06, "logits/chosen": -1.5417135953903198, "logits/rejected": -1.5695979595184326, "logps/chosen": -107.19217681884766, "logps/rejected": -144.47036743164062, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -1.9290817975997925, "rewards/margins": 9.605890274047852, "rewards/rejected": -11.534972190856934, "step": 8205 }, { "epoch": 1.82, "learning_rate": 8.84579410452718e-06, "logits/chosen": -1.3631631135940552, "logits/rejected": -1.2168207168579102, "logps/chosen": -171.6982421875, "logps/rejected": -464.74835205078125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.27597352862358093, "rewards/margins": 19.237699508666992, "rewards/rejected": -18.96172523498535, "step": 8206 }, { "epoch": 1.82, "learning_rate": 8.844648456455518e-06, "logits/chosen": -1.4133843183517456, "logits/rejected": -1.4229602813720703, "logps/chosen": -93.64468383789062, "logps/rejected": -89.20465087890625, "loss": 0.5492, "rewards/accuracies": 0.0, "rewards/chosen": -1.976806640625, "rewards/margins": -0.6689498424530029, "rewards/rejected": -1.307856798171997, "step": 8207 }, { "epoch": 1.82, "learning_rate": 8.843502314355771e-06, "logits/chosen": -1.024551510810852, "logits/rejected": -1.0856329202651978, "logps/chosen": -328.57928466796875, "logps/rejected": -196.985595703125, "loss": 0.67, "rewards/accuracies": 0.0, "rewards/chosen": -8.534621238708496, "rewards/margins": -0.9305362701416016, "rewards/rejected": -7.6040849685668945, "step": 8208 }, { "epoch": 1.82, "learning_rate": 8.842355678375217e-06, "logits/chosen": -1.1676676273345947, "logits/rejected": -1.244490385055542, "logps/chosen": -135.40101623535156, "logps/rejected": -118.69622802734375, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -4.827517032623291, "rewards/margins": 4.570655345916748, "rewards/rejected": -9.398172378540039, "step": 8209 }, { "epoch": 1.82, "learning_rate": 8.841208548661195e-06, "logits/chosen": -1.263696312904358, "logits/rejected": -1.2030291557312012, "logps/chosen": -235.629150390625, "logps/rejected": -362.9212341308594, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -4.311654567718506, "rewards/margins": 4.847250461578369, "rewards/rejected": -9.158905029296875, "step": 8210 }, { "epoch": 1.82, "learning_rate": 8.840060925361109e-06, "logits/chosen": -1.5767420530319214, "logits/rejected": -1.5767420530319214, "logps/chosen": -181.5297088623047, "logps/rejected": -181.5297088623047, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -12.329301834106445, "rewards/margins": 0.0, "rewards/rejected": -12.329301834106445, "step": 8211 }, { "epoch": 1.82, "learning_rate": 8.838912808622424e-06, "logits/chosen": -1.282415509223938, "logits/rejected": -1.3154675960540771, "logps/chosen": -103.26828002929688, "logps/rejected": -152.3495635986328, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.7073478698730469, "rewards/margins": 4.516794681549072, "rewards/rejected": -6.224142551422119, "step": 8212 }, { "epoch": 1.82, "learning_rate": 8.837764198592672e-06, "logits/chosen": -1.1407783031463623, "logits/rejected": -1.0994620323181152, "logps/chosen": -85.77455139160156, "logps/rejected": -302.8052062988281, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -2.044210195541382, "rewards/margins": 13.755316734313965, "rewards/rejected": -15.799527168273926, "step": 8213 }, { "epoch": 1.82, "learning_rate": 8.836615095419448e-06, "logits/chosen": -1.0976063013076782, "logits/rejected": -1.0470976829528809, "logps/chosen": -207.01124572753906, "logps/rejected": -275.00628662109375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -5.883174419403076, "rewards/margins": 5.409837245941162, "rewards/rejected": -11.293011665344238, "step": 8214 }, { "epoch": 1.82, "learning_rate": 8.835465499250404e-06, "logits/chosen": -1.400341510772705, "logits/rejected": -1.2148561477661133, "logps/chosen": -110.50898742675781, "logps/rejected": -276.38409423828125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.1676727533340454, "rewards/margins": 5.476828098297119, "rewards/rejected": -6.644500732421875, "step": 8215 }, { "epoch": 1.82, "learning_rate": 8.834315410233264e-06, "logits/chosen": -1.704435110092163, "logits/rejected": -2.0242340564727783, "logps/chosen": -265.4053649902344, "logps/rejected": -107.63058471679688, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": -4.505502223968506, "rewards/margins": 2.141632080078125, "rewards/rejected": -6.647134304046631, "step": 8216 }, { "epoch": 1.82, "learning_rate": 8.833164828515815e-06, "logits/chosen": -1.419965386390686, "logits/rejected": -1.3983314037322998, "logps/chosen": -87.84059143066406, "logps/rejected": -204.33807373046875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.103327989578247, "rewards/margins": 4.935299873352051, "rewards/rejected": -6.038627624511719, "step": 8217 }, { "epoch": 1.82, "learning_rate": 8.832013754245895e-06, "logits/chosen": -1.2281160354614258, "logits/rejected": -1.1935323476791382, "logps/chosen": -212.7436065673828, "logps/rejected": -341.28009033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5751785635948181, "rewards/margins": 9.925446510314941, "rewards/rejected": -10.500624656677246, "step": 8218 }, { "epoch": 1.82, "learning_rate": 8.830862187571423e-06, "logits/chosen": -1.6053197383880615, "logits/rejected": -1.5626640319824219, "logps/chosen": -96.96862030029297, "logps/rejected": -223.40553283691406, "loss": 0.6529, "rewards/accuracies": 1.0, "rewards/chosen": 0.1547798216342926, "rewards/margins": 12.387300491333008, "rewards/rejected": -12.232521057128906, "step": 8219 }, { "epoch": 1.82, "learning_rate": 8.829710128640368e-06, "logits/chosen": -1.1227543354034424, "logits/rejected": -1.090348243713379, "logps/chosen": -66.02810668945312, "logps/rejected": -95.81863403320312, "loss": 0.1623, "rewards/accuracies": 1.0, "rewards/chosen": -3.297616958618164, "rewards/margins": 0.9598293304443359, "rewards/rejected": -4.2574462890625, "step": 8220 }, { "epoch": 1.82, "learning_rate": 8.828557577600769e-06, "logits/chosen": -1.558879017829895, "logits/rejected": -1.6803189516067505, "logps/chosen": -125.21217346191406, "logps/rejected": -121.93803405761719, "loss": 0.0643, "rewards/accuracies": 1.0, "rewards/chosen": -1.5491241216659546, "rewards/margins": 1.9867936372756958, "rewards/rejected": -3.5359177589416504, "step": 8221 }, { "epoch": 1.82, "learning_rate": 8.827404534600723e-06, "logits/chosen": -1.5302486419677734, "logits/rejected": -1.6398934125900269, "logps/chosen": -187.234375, "logps/rejected": -168.17098999023438, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.10091095417737961, "rewards/margins": 6.248207092285156, "rewards/rejected": -6.349118232727051, "step": 8222 }, { "epoch": 1.82, "learning_rate": 8.826250999788397e-06, "logits/chosen": -1.7886905670166016, "logits/rejected": -1.9284919500350952, "logps/chosen": -152.5446319580078, "logps/rejected": -111.5262451171875, "loss": 0.0802, "rewards/accuracies": 1.0, "rewards/chosen": -0.7438156008720398, "rewards/margins": 1.8077378273010254, "rewards/rejected": -2.55155348777771, "step": 8223 }, { "epoch": 1.82, "learning_rate": 8.825096973312014e-06, "logits/chosen": -1.2686116695404053, "logits/rejected": -1.2881075143814087, "logps/chosen": -121.00398254394531, "logps/rejected": -168.81582641601562, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.6855576038360596, "rewards/margins": 3.838083505630493, "rewards/rejected": -5.523641109466553, "step": 8224 }, { "epoch": 1.82, "learning_rate": 8.823942455319866e-06, "logits/chosen": -1.5857210159301758, "logits/rejected": -1.5857210159301758, "logps/chosen": -154.89031982421875, "logps/rejected": -154.89031982421875, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -7.634468078613281, "rewards/margins": 0.0, "rewards/rejected": -7.634468078613281, "step": 8225 }, { "epoch": 1.82, "learning_rate": 8.822787445960303e-06, "logits/chosen": -1.370864748954773, "logits/rejected": -1.388481616973877, "logps/chosen": -98.50527954101562, "logps/rejected": -188.11732482910156, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.5392410159111023, "rewards/margins": 8.841351509094238, "rewards/rejected": -9.380592346191406, "step": 8226 }, { "epoch": 1.82, "learning_rate": 8.821631945381746e-06, "logits/chosen": -1.2192742824554443, "logits/rejected": -0.8626450300216675, "logps/chosen": -135.384521484375, "logps/rejected": -969.527099609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.696430206298828, "rewards/margins": 86.76068115234375, "rewards/rejected": -89.45711517333984, "step": 8227 }, { "epoch": 1.82, "learning_rate": 8.82047595373267e-06, "logits/chosen": -1.0735697746276855, "logits/rejected": -1.0716012716293335, "logps/chosen": -249.95425415039062, "logps/rejected": -288.71484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.1886260509490967, "rewards/margins": 7.459652900695801, "rewards/rejected": -10.648279190063477, "step": 8228 }, { "epoch": 1.82, "learning_rate": 8.819319471161617e-06, "logits/chosen": -1.5050873756408691, "logits/rejected": -1.4465745687484741, "logps/chosen": -113.18934631347656, "logps/rejected": -241.024658203125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -3.023266553878784, "rewards/margins": 7.255666732788086, "rewards/rejected": -10.27893352508545, "step": 8229 }, { "epoch": 1.82, "learning_rate": 8.818162497817195e-06, "logits/chosen": -1.639377474784851, "logits/rejected": -1.615310549736023, "logps/chosen": -84.04234313964844, "logps/rejected": -140.57498168945312, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 0.3305923640727997, "rewards/margins": 4.030414581298828, "rewards/rejected": -3.699822187423706, "step": 8230 }, { "epoch": 1.82, "learning_rate": 8.81700503384807e-06, "logits/chosen": -1.3580509424209595, "logits/rejected": -1.3591662645339966, "logps/chosen": -58.85686111450195, "logps/rejected": -85.3692398071289, "loss": 0.4025, "rewards/accuracies": 1.0, "rewards/chosen": -1.0373715162277222, "rewards/margins": 2.13503360748291, "rewards/rejected": -3.172405242919922, "step": 8231 }, { "epoch": 1.82, "learning_rate": 8.815847079402972e-06, "logits/chosen": -1.6759989261627197, "logits/rejected": -1.7047439813613892, "logps/chosen": -242.40484619140625, "logps/rejected": -467.67095947265625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.8484513759613037, "rewards/margins": 25.699832916259766, "rewards/rejected": -28.54828453063965, "step": 8232 }, { "epoch": 1.82, "learning_rate": 8.814688634630699e-06, "logits/chosen": -1.391277551651001, "logits/rejected": -1.353830099105835, "logps/chosen": -246.9533233642578, "logps/rejected": -246.145751953125, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.5887085199356079, "rewards/margins": 5.063074111938477, "rewards/rejected": -5.651782512664795, "step": 8233 }, { "epoch": 1.82, "learning_rate": 8.813529699680108e-06, "logits/chosen": -1.4434367418289185, "logits/rejected": -1.4434367418289185, "logps/chosen": -196.15545654296875, "logps/rejected": -196.15545654296875, "loss": 0.3718, "rewards/accuracies": 0.0, "rewards/chosen": -9.845860481262207, "rewards/margins": 0.0, "rewards/rejected": -9.845860481262207, "step": 8234 }, { "epoch": 1.82, "learning_rate": 8.812370274700117e-06, "logits/chosen": -1.4696307182312012, "logits/rejected": -1.4628654718399048, "logps/chosen": -115.26433563232422, "logps/rejected": -132.74603271484375, "loss": 0.0947, "rewards/accuracies": 1.0, "rewards/chosen": -1.716731309890747, "rewards/margins": 1.8328826427459717, "rewards/rejected": -3.5496139526367188, "step": 8235 }, { "epoch": 1.82, "learning_rate": 8.81121035983971e-06, "logits/chosen": -1.2896381616592407, "logits/rejected": -0.8554388284683228, "logps/chosen": -188.6851348876953, "logps/rejected": -563.397705078125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 1.9799728393554688, "rewards/margins": 43.3647575378418, "rewards/rejected": -41.38478469848633, "step": 8236 }, { "epoch": 1.82, "learning_rate": 8.810049955247933e-06, "logits/chosen": -1.242336630821228, "logits/rejected": -0.8566718697547913, "logps/chosen": -234.38720703125, "logps/rejected": -657.014892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.943981885910034, "rewards/margins": 55.6473503112793, "rewards/rejected": -51.703369140625, "step": 8237 }, { "epoch": 1.82, "learning_rate": 8.808889061073897e-06, "logits/chosen": -1.1308519840240479, "logits/rejected": -1.1381992101669312, "logps/chosen": -73.60519409179688, "logps/rejected": -120.14705657958984, "loss": 0.1199, "rewards/accuracies": 1.0, "rewards/chosen": -1.4560215473175049, "rewards/margins": 1.4775707721710205, "rewards/rejected": -2.9335923194885254, "step": 8238 }, { "epoch": 1.82, "learning_rate": 8.807727677466773e-06, "logits/chosen": -1.4000873565673828, "logits/rejected": -1.525291085243225, "logps/chosen": -244.17105102539062, "logps/rejected": -254.19520568847656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.7865692377090454, "rewards/margins": 13.777070999145508, "rewards/rejected": -11.990501403808594, "step": 8239 }, { "epoch": 1.82, "learning_rate": 8.806565804575796e-06, "logits/chosen": -1.8139238357543945, "logits/rejected": -1.7756446599960327, "logps/chosen": -79.47860717773438, "logps/rejected": -220.4990692138672, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.007374573033303022, "rewards/margins": 7.311474323272705, "rewards/rejected": -7.318849086761475, "step": 8240 }, { "epoch": 1.82, "learning_rate": 8.805403442550261e-06, "logits/chosen": -1.0924830436706543, "logits/rejected": -0.9092076420783997, "logps/chosen": -191.12513732910156, "logps/rejected": -427.2078857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.6427658200263977, "rewards/margins": 12.705644607543945, "rewards/rejected": -12.062878608703613, "step": 8241 }, { "epoch": 1.82, "learning_rate": 8.804240591539537e-06, "logits/chosen": -1.1771037578582764, "logits/rejected": -1.2190310955047607, "logps/chosen": -258.6504821777344, "logps/rejected": -196.88987731933594, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": 0.5219787955284119, "rewards/margins": 4.256033420562744, "rewards/rejected": -3.7340545654296875, "step": 8242 }, { "epoch": 1.82, "learning_rate": 8.80307725169304e-06, "logits/chosen": -1.0821865797042847, "logits/rejected": -1.135006070137024, "logps/chosen": -103.91946411132812, "logps/rejected": -94.5235595703125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.7178550958633423, "rewards/margins": 4.874119758605957, "rewards/rejected": -6.59197473526001, "step": 8243 }, { "epoch": 1.82, "learning_rate": 8.801913423160256e-06, "logits/chosen": -1.2894576787948608, "logits/rejected": -1.3240643739700317, "logps/chosen": -120.45310974121094, "logps/rejected": -146.35882568359375, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -3.3999404907226562, "rewards/margins": 2.614330291748047, "rewards/rejected": -6.014270782470703, "step": 8244 }, { "epoch": 1.82, "learning_rate": 8.800749106090739e-06, "logits/chosen": -1.0822423696517944, "logits/rejected": -1.0274473428726196, "logps/chosen": -189.35916137695312, "logps/rejected": -247.21075439453125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.7929900884628296, "rewards/margins": 7.908090114593506, "rewards/rejected": -9.701080322265625, "step": 8245 }, { "epoch": 1.83, "learning_rate": 8.799584300634096e-06, "logits/chosen": -1.1245633363723755, "logits/rejected": -1.201429843902588, "logps/chosen": -317.31207275390625, "logps/rejected": -117.68788146972656, "loss": 1.5957, "rewards/accuracies": 0.0, "rewards/chosen": -8.289275169372559, "rewards/margins": -3.1487202644348145, "rewards/rejected": -5.140554904937744, "step": 8246 }, { "epoch": 1.83, "learning_rate": 8.798419006940008e-06, "logits/chosen": -1.3711413145065308, "logits/rejected": -1.3390806913375854, "logps/chosen": -229.514404296875, "logps/rejected": -213.2484130859375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 0.05527038499712944, "rewards/margins": 4.5810089111328125, "rewards/rejected": -4.525738716125488, "step": 8247 }, { "epoch": 1.83, "learning_rate": 8.797253225158206e-06, "logits/chosen": -1.3276891708374023, "logits/rejected": -1.3422876596450806, "logps/chosen": -111.17228698730469, "logps/rejected": -195.35240173339844, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": -1.4261726140975952, "rewards/margins": 2.7234292030334473, "rewards/rejected": -4.149601936340332, "step": 8248 }, { "epoch": 1.83, "learning_rate": 8.796086955438494e-06, "logits/chosen": -1.9317076206207275, "logits/rejected": -1.8723379373550415, "logps/chosen": -131.6209716796875, "logps/rejected": -205.997314453125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -4.551274299621582, "rewards/margins": 4.556881904602051, "rewards/rejected": -9.108156204223633, "step": 8249 }, { "epoch": 1.83, "learning_rate": 8.794920197930735e-06, "logits/chosen": -1.1877318620681763, "logits/rejected": -1.2380359172821045, "logps/chosen": -262.7276611328125, "logps/rejected": -246.8385009765625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -3.349810838699341, "rewards/margins": 4.558496475219727, "rewards/rejected": -7.908307075500488, "step": 8250 }, { "epoch": 1.83, "learning_rate": 8.79375295278485e-06, "logits/chosen": -1.2046948671340942, "logits/rejected": -1.4716012477874756, "logps/chosen": -373.3892517089844, "logps/rejected": -153.98451232910156, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.391876220703125, "rewards/margins": 5.369324684143066, "rewards/rejected": -8.761200904846191, "step": 8251 }, { "epoch": 1.83, "learning_rate": 8.792585220150834e-06, "logits/chosen": -1.358799695968628, "logits/rejected": -0.819210946559906, "logps/chosen": -89.32838439941406, "logps/rejected": -602.9100341796875, "loss": 0.4688, "rewards/accuracies": 1.0, "rewards/chosen": -0.847913384437561, "rewards/margins": 50.1195182800293, "rewards/rejected": -50.967430114746094, "step": 8252 }, { "epoch": 1.83, "learning_rate": 8.791417000178732e-06, "logits/chosen": -1.4828153848648071, "logits/rejected": -1.5286140441894531, "logps/chosen": -158.43048095703125, "logps/rejected": -207.46075439453125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.738970935344696, "rewards/margins": 4.761578559875488, "rewards/rejected": -5.50054931640625, "step": 8253 }, { "epoch": 1.83, "learning_rate": 8.790248293018662e-06, "logits/chosen": -1.524320363998413, "logits/rejected": -1.513561725616455, "logps/chosen": -95.51214599609375, "logps/rejected": -193.55638122558594, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.4101318418979645, "rewards/margins": 6.068934440612793, "rewards/rejected": -6.479066371917725, "step": 8254 }, { "epoch": 1.83, "learning_rate": 8.789079098820796e-06, "logits/chosen": -0.9941174387931824, "logits/rejected": -0.9001979827880859, "logps/chosen": -153.19677734375, "logps/rejected": -244.14195251464844, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5149078369140625, "rewards/margins": 3.5580062866210938, "rewards/rejected": -4.072914123535156, "step": 8255 }, { "epoch": 1.83, "learning_rate": 8.787909417735374e-06, "logits/chosen": -1.5487895011901855, "logits/rejected": -0.605620801448822, "logps/chosen": -128.35984802246094, "logps/rejected": -709.7136840820312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9256042838096619, "rewards/margins": 59.66037368774414, "rewards/rejected": -60.58597946166992, "step": 8256 }, { "epoch": 1.83, "learning_rate": 8.7867392499127e-06, "logits/chosen": -1.5941119194030762, "logits/rejected": -1.6385047435760498, "logps/chosen": -87.34544372558594, "logps/rejected": -71.05936431884766, "loss": 0.1599, "rewards/accuracies": 1.0, "rewards/chosen": -1.8980194330215454, "rewards/margins": 0.9760497808456421, "rewards/rejected": -2.8740692138671875, "step": 8257 }, { "epoch": 1.83, "learning_rate": 8.785568595503134e-06, "logits/chosen": -1.5061618089675903, "logits/rejected": -1.4939597845077515, "logps/chosen": -138.9488983154297, "logps/rejected": -154.5697479248047, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.6484893560409546, "rewards/margins": 5.7720537185668945, "rewards/rejected": -7.420543193817139, "step": 8258 }, { "epoch": 1.83, "learning_rate": 8.784397454657103e-06, "logits/chosen": -1.5737261772155762, "logits/rejected": -1.4238916635513306, "logps/chosen": -151.85614013671875, "logps/rejected": -370.1090087890625, "loss": 0.1092, "rewards/accuracies": 1.0, "rewards/chosen": -5.611827373504639, "rewards/margins": 1.4180922508239746, "rewards/rejected": -7.029919624328613, "step": 8259 }, { "epoch": 1.83, "learning_rate": 8.783225827525098e-06, "logits/chosen": -1.2316319942474365, "logits/rejected": -1.2888672351837158, "logps/chosen": -185.17276000976562, "logps/rejected": -171.39849853515625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 1.029902696609497, "rewards/margins": 5.262651443481445, "rewards/rejected": -4.232748508453369, "step": 8260 }, { "epoch": 1.83, "learning_rate": 8.782053714257668e-06, "logits/chosen": -1.4746038913726807, "logits/rejected": -1.5742524862289429, "logps/chosen": -124.94664764404297, "logps/rejected": -159.17617797851562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6935852766036987, "rewards/margins": 10.931992530822754, "rewards/rejected": -12.625577926635742, "step": 8261 }, { "epoch": 1.83, "learning_rate": 8.780881115005428e-06, "logits/chosen": -1.5800131559371948, "logits/rejected": -0.8675884008407593, "logps/chosen": -121.54997253417969, "logps/rejected": -1111.3662109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.8530426025390625, "rewards/margins": 102.8217544555664, "rewards/rejected": -101.96871185302734, "step": 8262 }, { "epoch": 1.83, "learning_rate": 8.779708029919054e-06, "logits/chosen": -1.4647297859191895, "logits/rejected": -1.473191499710083, "logps/chosen": -166.42701721191406, "logps/rejected": -139.099609375, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -2.4214751720428467, "rewards/margins": 2.836442708969116, "rewards/rejected": -5.257917881011963, "step": 8263 }, { "epoch": 1.83, "learning_rate": 8.778534459149283e-06, "logits/chosen": -1.5902276039123535, "logits/rejected": -1.5741101503372192, "logps/chosen": -90.6904525756836, "logps/rejected": -155.63844299316406, "loss": 0.0351, "rewards/accuracies": 1.0, "rewards/chosen": -3.0141029357910156, "rewards/margins": 2.621598720550537, "rewards/rejected": -5.635701656341553, "step": 8264 }, { "epoch": 1.83, "learning_rate": 8.777360402846919e-06, "logits/chosen": -1.2322640419006348, "logits/rejected": -1.3133885860443115, "logps/chosen": -252.64646911621094, "logps/rejected": -123.93856811523438, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": -2.0768189430236816, "rewards/margins": 3.2489380836486816, "rewards/rejected": -5.325757026672363, "step": 8265 }, { "epoch": 1.83, "learning_rate": 8.776185861162822e-06, "logits/chosen": -1.5000368356704712, "logits/rejected": -1.4042093753814697, "logps/chosen": -109.11094665527344, "logps/rejected": -259.3999938964844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.5701401233673096, "rewards/margins": 9.10948371887207, "rewards/rejected": -6.539343357086182, "step": 8266 }, { "epoch": 1.83, "learning_rate": 8.77501083424792e-06, "logits/chosen": -1.337242841720581, "logits/rejected": -1.349911093711853, "logps/chosen": -162.18736267089844, "logps/rejected": -135.36614990234375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 1.0612961053848267, "rewards/margins": 6.25177526473999, "rewards/rejected": -5.190479278564453, "step": 8267 }, { "epoch": 1.83, "learning_rate": 8.773835322253202e-06, "logits/chosen": -1.6963776350021362, "logits/rejected": -1.7569812536239624, "logps/chosen": -196.5876007080078, "logps/rejected": -172.06781005859375, "loss": 0.0649, "rewards/accuracies": 1.0, "rewards/chosen": -5.224494934082031, "rewards/margins": 2.7903451919555664, "rewards/rejected": -8.014840126037598, "step": 8268 }, { "epoch": 1.83, "learning_rate": 8.772659325329717e-06, "logits/chosen": -1.79879629611969, "logits/rejected": -1.837225317955017, "logps/chosen": -103.68461608886719, "logps/rejected": -123.20306396484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.3642410337924957, "rewards/margins": 9.107194900512695, "rewards/rejected": -9.471435546875, "step": 8269 }, { "epoch": 1.83, "learning_rate": 8.771482843628576e-06, "logits/chosen": -1.5254355669021606, "logits/rejected": -1.4131453037261963, "logps/chosen": -153.88970947265625, "logps/rejected": -312.6206359863281, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.907543182373047, "rewards/margins": 7.325658798217773, "rewards/rejected": -10.23320198059082, "step": 8270 }, { "epoch": 1.83, "learning_rate": 8.770305877300958e-06, "logits/chosen": -1.630692958831787, "logits/rejected": -1.698978304862976, "logps/chosen": -106.7406997680664, "logps/rejected": -110.5525894165039, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.285792589187622, "rewards/margins": 5.163734436035156, "rewards/rejected": -6.449527263641357, "step": 8271 }, { "epoch": 1.83, "learning_rate": 8.769128426498098e-06, "logits/chosen": -1.652691125869751, "logits/rejected": -1.6140978336334229, "logps/chosen": -130.9846649169922, "logps/rejected": -259.79095458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2709670960903168, "rewards/margins": 11.447954177856445, "rewards/rejected": -11.176986694335938, "step": 8272 }, { "epoch": 1.83, "learning_rate": 8.767950491371295e-06, "logits/chosen": -1.3312760591506958, "logits/rejected": -1.2788035869598389, "logps/chosen": -101.50161743164062, "logps/rejected": -147.01307678222656, "loss": 0.0669, "rewards/accuracies": 1.0, "rewards/chosen": -1.9660576581954956, "rewards/margins": 3.2128195762634277, "rewards/rejected": -5.178877353668213, "step": 8273 }, { "epoch": 1.83, "learning_rate": 8.766772072071911e-06, "logits/chosen": -1.7826263904571533, "logits/rejected": -1.7612141370773315, "logps/chosen": -115.24005126953125, "logps/rejected": -209.4785614013672, "loss": 0.1644, "rewards/accuracies": 1.0, "rewards/chosen": -2.0302207469940186, "rewards/margins": 3.081920862197876, "rewards/rejected": -5.1121416091918945, "step": 8274 }, { "epoch": 1.83, "learning_rate": 8.765593168751373e-06, "logits/chosen": -1.2792481184005737, "logits/rejected": -1.3836880922317505, "logps/chosen": -267.552978515625, "logps/rejected": -275.8175354003906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.5178406238555908, "rewards/margins": 10.089457511901855, "rewards/rejected": -8.571617126464844, "step": 8275 }, { "epoch": 1.83, "learning_rate": 8.764413781561164e-06, "logits/chosen": -1.8681485652923584, "logits/rejected": -1.8234432935714722, "logps/chosen": -127.62150573730469, "logps/rejected": -234.53773498535156, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.9980942010879517, "rewards/margins": 7.481701850891113, "rewards/rejected": -6.483607769012451, "step": 8276 }, { "epoch": 1.83, "learning_rate": 8.763233910652833e-06, "logits/chosen": -1.1723251342773438, "logits/rejected": -1.1859859228134155, "logps/chosen": -106.69879150390625, "logps/rejected": -150.55471801757812, "loss": 0.0638, "rewards/accuracies": 1.0, "rewards/chosen": -1.2775627374649048, "rewards/margins": 8.394997596740723, "rewards/rejected": -9.672560691833496, "step": 8277 }, { "epoch": 1.83, "learning_rate": 8.762053556177991e-06, "logits/chosen": -1.6721794605255127, "logits/rejected": -1.6448736190795898, "logps/chosen": -179.84429931640625, "logps/rejected": -193.03773498535156, "loss": 0.3473, "rewards/accuracies": 1.0, "rewards/chosen": -7.219429016113281, "rewards/margins": 0.004246711730957031, "rewards/rejected": -7.223675727844238, "step": 8278 }, { "epoch": 1.83, "learning_rate": 8.760872718288311e-06, "logits/chosen": -1.5551379919052124, "logits/rejected": -1.5635427236557007, "logps/chosen": -114.60983276367188, "logps/rejected": -127.21068572998047, "loss": 0.0579, "rewards/accuracies": 1.0, "rewards/chosen": -0.6997711062431335, "rewards/margins": 2.1061408519744873, "rewards/rejected": -2.8059120178222656, "step": 8279 }, { "epoch": 1.83, "learning_rate": 8.759691397135528e-06, "logits/chosen": -1.3982131481170654, "logits/rejected": -1.237794280052185, "logps/chosen": -83.17727661132812, "logps/rejected": -224.57754516601562, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.04283447191119194, "rewards/margins": 7.892611980438232, "rewards/rejected": -7.935446262359619, "step": 8280 }, { "epoch": 1.83, "learning_rate": 8.758509592871439e-06, "logits/chosen": -1.5416128635406494, "logits/rejected": -1.5614099502563477, "logps/chosen": -123.99803924560547, "logps/rejected": -136.7757110595703, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.996999502182007, "rewards/margins": 7.339534759521484, "rewards/rejected": -10.33653450012207, "step": 8281 }, { "epoch": 1.83, "learning_rate": 8.7573273056479e-06, "logits/chosen": -1.3674079179763794, "logits/rejected": -1.3674079179763794, "logps/chosen": -167.45542907714844, "logps/rejected": -167.45542907714844, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -8.896089553833008, "rewards/margins": 0.0, "rewards/rejected": -8.896089553833008, "step": 8282 }, { "epoch": 1.83, "learning_rate": 8.756144535616838e-06, "logits/chosen": -1.2416419982910156, "logits/rejected": -1.2184734344482422, "logps/chosen": -170.06802368164062, "logps/rejected": -200.6300048828125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0108550786972046, "rewards/margins": 7.043529987335205, "rewards/rejected": -8.0543851852417, "step": 8283 }, { "epoch": 1.83, "learning_rate": 8.754961282930231e-06, "logits/chosen": -1.3820908069610596, "logits/rejected": -1.3827531337738037, "logps/chosen": -58.736000061035156, "logps/rejected": -108.70426940917969, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -0.8380317687988281, "rewards/margins": 3.1083145141601562, "rewards/rejected": -3.9463462829589844, "step": 8284 }, { "epoch": 1.83, "learning_rate": 8.753777547740126e-06, "logits/chosen": -1.5520204305648804, "logits/rejected": -1.4919254779815674, "logps/chosen": -118.06871032714844, "logps/rejected": -167.5335693359375, "loss": 0.3771, "rewards/accuracies": 0.0, "rewards/chosen": -3.8022186756134033, "rewards/margins": -0.1169891357421875, "rewards/rejected": -3.685229539871216, "step": 8285 }, { "epoch": 1.83, "learning_rate": 8.752593330198631e-06, "logits/chosen": -1.6620111465454102, "logits/rejected": -1.5706539154052734, "logps/chosen": -210.1939697265625, "logps/rejected": -239.88641357421875, "loss": 0.0534, "rewards/accuracies": 1.0, "rewards/chosen": 3.787602186203003, "rewards/margins": 7.823277473449707, "rewards/rejected": -4.035675048828125, "step": 8286 }, { "epoch": 1.83, "learning_rate": 8.751408630457911e-06, "logits/chosen": -1.1123547554016113, "logits/rejected": -1.0969667434692383, "logps/chosen": -113.76908874511719, "logps/rejected": -224.13702392578125, "loss": 0.085, "rewards/accuracies": 1.0, "rewards/chosen": -1.0688354969024658, "rewards/margins": 2.1881346702575684, "rewards/rejected": -3.256970167160034, "step": 8287 }, { "epoch": 1.83, "learning_rate": 8.750223448670204e-06, "logits/chosen": -1.2730501890182495, "logits/rejected": -1.2679500579833984, "logps/chosen": -75.27074432373047, "logps/rejected": -144.046875, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -0.8911186456680298, "rewards/margins": 4.051697731018066, "rewards/rejected": -4.942816257476807, "step": 8288 }, { "epoch": 1.83, "learning_rate": 8.749037784987797e-06, "logits/chosen": -1.583478331565857, "logits/rejected": -1.696106195449829, "logps/chosen": -239.69818115234375, "logps/rejected": -183.4721221923828, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.1108887195587158, "rewards/margins": 4.170306205749512, "rewards/rejected": -5.281195163726807, "step": 8289 }, { "epoch": 1.83, "learning_rate": 8.747851639563048e-06, "logits/chosen": -1.517729640007019, "logits/rejected": -1.4415273666381836, "logps/chosen": -181.0438232421875, "logps/rejected": -339.59661865234375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.2947326600551605, "rewards/margins": 9.2557373046875, "rewards/rejected": -9.550470352172852, "step": 8290 }, { "epoch": 1.84, "learning_rate": 8.746665012548373e-06, "logits/chosen": -1.4356542825698853, "logits/rejected": -0.7590798139572144, "logps/chosen": -78.4198226928711, "logps/rejected": -660.4371337890625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7903160452842712, "rewards/margins": 46.373313903808594, "rewards/rejected": -47.163631439208984, "step": 8291 }, { "epoch": 1.84, "learning_rate": 8.745477904096247e-06, "logits/chosen": -1.5519559383392334, "logits/rejected": -1.5519559383392334, "logps/chosen": -97.12649536132812, "logps/rejected": -97.12649536132812, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -0.5714096426963806, "rewards/margins": 0.0, "rewards/rejected": -0.5714096426963806, "step": 8292 }, { "epoch": 1.84, "learning_rate": 8.744290314359219e-06, "logits/chosen": -1.970335602760315, "logits/rejected": -1.907518744468689, "logps/chosen": -85.4717025756836, "logps/rejected": -139.4829864501953, "loss": 0.0853, "rewards/accuracies": 1.0, "rewards/chosen": -2.2812232971191406, "rewards/margins": 1.687410831451416, "rewards/rejected": -3.9686341285705566, "step": 8293 }, { "epoch": 1.84, "learning_rate": 8.743102243489885e-06, "logits/chosen": -1.2201417684555054, "logits/rejected": -1.4136223793029785, "logps/chosen": -250.9473876953125, "logps/rejected": -208.03704833984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.04941711574792862, "rewards/margins": 9.126604080200195, "rewards/rejected": -9.077186584472656, "step": 8294 }, { "epoch": 1.84, "learning_rate": 8.74191369164091e-06, "logits/chosen": -1.5450540781021118, "logits/rejected": -1.4161179065704346, "logps/chosen": -96.6285171508789, "logps/rejected": -283.62261962890625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.1852028369903564, "rewards/margins": 5.053689002990723, "rewards/rejected": -6.2388916015625, "step": 8295 }, { "epoch": 1.84, "learning_rate": 8.74072465896502e-06, "logits/chosen": -1.6212351322174072, "logits/rejected": -1.0750614404678345, "logps/chosen": -195.74118041992188, "logps/rejected": -1047.2396240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8010238409042358, "rewards/margins": 90.79217529296875, "rewards/rejected": -92.59320068359375, "step": 8296 }, { "epoch": 1.84, "learning_rate": 8.739535145615005e-06, "logits/chosen": -1.4102375507354736, "logits/rejected": -1.4141008853912354, "logps/chosen": -227.79989624023438, "logps/rejected": -317.64422607421875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.09807739406824112, "rewards/margins": 6.545098781585693, "rewards/rejected": -6.447021484375, "step": 8297 }, { "epoch": 1.84, "learning_rate": 8.738345151743715e-06, "logits/chosen": -1.3710837364196777, "logits/rejected": -0.7001084685325623, "logps/chosen": -215.1788787841797, "logps/rejected": -967.6861572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.24928437173366547, "rewards/margins": 81.99893951416016, "rewards/rejected": -82.24822235107422, "step": 8298 }, { "epoch": 1.84, "learning_rate": 8.737154677504059e-06, "logits/chosen": -1.5952781438827515, "logits/rejected": -1.6373218297958374, "logps/chosen": -119.91111755371094, "logps/rejected": -114.58848571777344, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.5865676403045654, "rewards/margins": 6.0069169998168945, "rewards/rejected": -8.593484878540039, "step": 8299 }, { "epoch": 1.84, "learning_rate": 8.73596372304901e-06, "logits/chosen": -1.6292616128921509, "logits/rejected": -1.559755563735962, "logps/chosen": -132.96755981445312, "logps/rejected": -239.8576202392578, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.2501144409179688, "rewards/margins": 7.214454650878906, "rewards/rejected": -10.464569091796875, "step": 8300 }, { "epoch": 1.84, "learning_rate": 8.734772288531604e-06, "logits/chosen": -1.6123228073120117, "logits/rejected": -1.582486867904663, "logps/chosen": -183.02899169921875, "logps/rejected": -166.64749145507812, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": 0.008921814151108265, "rewards/margins": 2.9017045497894287, "rewards/rejected": -2.892782688140869, "step": 8301 }, { "epoch": 1.84, "learning_rate": 8.733580374104936e-06, "logits/chosen": -1.9119908809661865, "logits/rejected": -1.8591128587722778, "logps/chosen": -135.87310791015625, "logps/rejected": -250.86636352539062, "loss": 0.3684, "rewards/accuracies": 1.0, "rewards/chosen": -2.2922089099884033, "rewards/margins": 3.107318162918091, "rewards/rejected": -5.399527072906494, "step": 8302 }, { "epoch": 1.84, "learning_rate": 8.732387979922167e-06, "logits/chosen": -1.0073546171188354, "logits/rejected": -1.0052980184555054, "logps/chosen": -87.88996887207031, "logps/rejected": -238.379150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1230742931365967, "rewards/margins": 12.034062385559082, "rewards/rejected": -14.157136917114258, "step": 8303 }, { "epoch": 1.84, "learning_rate": 8.731195106136515e-06, "logits/chosen": -1.4646940231323242, "logits/rejected": -1.4260987043380737, "logps/chosen": -138.4842071533203, "logps/rejected": -177.93252563476562, "loss": 0.6599, "rewards/accuracies": 0.0, "rewards/chosen": -1.6196205615997314, "rewards/margins": -1.0079751014709473, "rewards/rejected": -0.611645519733429, "step": 8304 }, { "epoch": 1.84, "learning_rate": 8.730001752901258e-06, "logits/chosen": -1.565448522567749, "logits/rejected": -1.5740220546722412, "logps/chosen": -141.6667022705078, "logps/rejected": -226.62188720703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.377294898033142, "rewards/margins": 8.859421730041504, "rewards/rejected": -10.236716270446777, "step": 8305 }, { "epoch": 1.84, "learning_rate": 8.728807920369747e-06, "logits/chosen": -1.465312123298645, "logits/rejected": -1.4290974140167236, "logps/chosen": -168.9388885498047, "logps/rejected": -290.9873046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8589798212051392, "rewards/margins": 10.975695610046387, "rewards/rejected": -11.834675788879395, "step": 8306 }, { "epoch": 1.84, "learning_rate": 8.727613608695379e-06, "logits/chosen": -1.8035770654678345, "logits/rejected": -1.894750952720642, "logps/chosen": -118.89585876464844, "logps/rejected": -140.15911865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.25483474135398865, "rewards/margins": 9.422274589538574, "rewards/rejected": -9.167439460754395, "step": 8307 }, { "epoch": 1.84, "learning_rate": 8.726418818031623e-06, "logits/chosen": -1.5127047300338745, "logits/rejected": -1.4854111671447754, "logps/chosen": -71.03108215332031, "logps/rejected": -167.90945434570312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.142276406288147, "rewards/margins": 6.117754936218262, "rewards/rejected": -7.260031223297119, "step": 8308 }, { "epoch": 1.84, "learning_rate": 8.72522354853201e-06, "logits/chosen": -1.2529418468475342, "logits/rejected": -1.1558839082717896, "logps/chosen": -130.19482421875, "logps/rejected": -341.4150695800781, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": 0.8232681155204773, "rewards/margins": 17.17668342590332, "rewards/rejected": -16.35341453552246, "step": 8309 }, { "epoch": 1.84, "learning_rate": 8.724027800350123e-06, "logits/chosen": -1.6338005065917969, "logits/rejected": -1.4479411840438843, "logps/chosen": -113.34748077392578, "logps/rejected": -364.6054382324219, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.452983856201172, "rewards/margins": 5.575486183166504, "rewards/rejected": -8.028470039367676, "step": 8310 }, { "epoch": 1.84, "learning_rate": 8.722831573639618e-06, "logits/chosen": -1.5773119926452637, "logits/rejected": -1.7190732955932617, "logps/chosen": -208.6968994140625, "logps/rejected": -163.0524444580078, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 3.0527374744415283, "rewards/margins": 9.90122127532959, "rewards/rejected": -6.848484039306641, "step": 8311 }, { "epoch": 1.84, "learning_rate": 8.721634868554204e-06, "logits/chosen": -1.2708061933517456, "logits/rejected": -1.2574304342269897, "logps/chosen": -159.413818359375, "logps/rejected": -205.53900146484375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 4.160138130187988, "rewards/margins": 6.237373352050781, "rewards/rejected": -2.077235460281372, "step": 8312 }, { "epoch": 1.84, "learning_rate": 8.720437685247657e-06, "logits/chosen": -1.7774507999420166, "logits/rejected": -1.7807000875473022, "logps/chosen": -77.28761291503906, "logps/rejected": -168.31150817871094, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -1.5288165807724, "rewards/margins": 6.067954063415527, "rewards/rejected": -7.596770763397217, "step": 8313 }, { "epoch": 1.84, "learning_rate": 8.719240023873809e-06, "logits/chosen": -1.37381911277771, "logits/rejected": -1.4194128513336182, "logps/chosen": -155.21807861328125, "logps/rejected": -222.55120849609375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.105908155441284, "rewards/margins": 10.478857040405273, "rewards/rejected": -12.584765434265137, "step": 8314 }, { "epoch": 1.84, "learning_rate": 8.71804188458656e-06, "logits/chosen": -1.220262885093689, "logits/rejected": -1.2155747413635254, "logps/chosen": -377.668212890625, "logps/rejected": -388.954833984375, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/chosen": -4.964745998382568, "rewards/margins": 3.051663875579834, "rewards/rejected": -8.016409873962402, "step": 8315 }, { "epoch": 1.84, "learning_rate": 8.716843267539868e-06, "logits/chosen": -1.4840952157974243, "logits/rejected": -1.492822289466858, "logps/chosen": -100.64886474609375, "logps/rejected": -181.25466918945312, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -4.813551902770996, "rewards/margins": 5.097904205322266, "rewards/rejected": -9.911456108093262, "step": 8316 }, { "epoch": 1.84, "learning_rate": 8.715644172887751e-06, "logits/chosen": -1.3040111064910889, "logits/rejected": -1.3257787227630615, "logps/chosen": -227.84042358398438, "logps/rejected": -230.69700622558594, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.385278344154358, "rewards/margins": 6.434234619140625, "rewards/rejected": -7.819512844085693, "step": 8317 }, { "epoch": 1.84, "learning_rate": 8.714444600784289e-06, "logits/chosen": -1.725459098815918, "logits/rejected": -1.6965168714523315, "logps/chosen": -83.17131042480469, "logps/rejected": -123.12124633789062, "loss": 0.184, "rewards/accuracies": 1.0, "rewards/chosen": -1.1207504272460938, "rewards/margins": 0.8097488880157471, "rewards/rejected": -1.9304993152618408, "step": 8318 }, { "epoch": 1.84, "learning_rate": 8.713244551383626e-06, "logits/chosen": -1.3740789890289307, "logits/rejected": -1.3219202756881714, "logps/chosen": -101.66011047363281, "logps/rejected": -196.26222229003906, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.9845672845840454, "rewards/margins": 9.677284240722656, "rewards/rejected": -10.66185188293457, "step": 8319 }, { "epoch": 1.84, "learning_rate": 8.712044024839962e-06, "logits/chosen": -1.5832915306091309, "logits/rejected": -1.6878567934036255, "logps/chosen": -143.74497985839844, "logps/rejected": -149.89505004882812, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -2.063507080078125, "rewards/margins": 9.08043384552002, "rewards/rejected": -11.143940925598145, "step": 8320 }, { "epoch": 1.84, "learning_rate": 8.710843021307567e-06, "logits/chosen": -1.4635287523269653, "logits/rejected": -1.786041259765625, "logps/chosen": -171.35910034179688, "logps/rejected": -117.9572525024414, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.29317018389701843, "rewards/margins": 8.781227111816406, "rewards/rejected": -9.074397087097168, "step": 8321 }, { "epoch": 1.84, "learning_rate": 8.709641540940764e-06, "logits/chosen": -1.4301130771636963, "logits/rejected": -1.4091827869415283, "logps/chosen": -111.86326599121094, "logps/rejected": -205.94467163085938, "loss": 0.2712, "rewards/accuracies": 1.0, "rewards/chosen": -1.4018363952636719, "rewards/margins": 0.3287849426269531, "rewards/rejected": -1.730621337890625, "step": 8322 }, { "epoch": 1.84, "learning_rate": 8.70843958389394e-06, "logits/chosen": -1.6829936504364014, "logits/rejected": -1.6313644647598267, "logps/chosen": -86.21690368652344, "logps/rejected": -168.994140625, "loss": 0.0475, "rewards/accuracies": 1.0, "rewards/chosen": -0.8167724609375, "rewards/margins": 4.054792881011963, "rewards/rejected": -4.871565341949463, "step": 8323 }, { "epoch": 1.84, "learning_rate": 8.707237150321544e-06, "logits/chosen": -1.3682094812393188, "logits/rejected": -1.3246405124664307, "logps/chosen": -139.41915893554688, "logps/rejected": -229.51181030273438, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.0274932384490967, "rewards/margins": 5.693384170532227, "rewards/rejected": -7.720877170562744, "step": 8324 }, { "epoch": 1.84, "learning_rate": 8.706034240378087e-06, "logits/chosen": -1.680016279220581, "logits/rejected": -1.7278250455856323, "logps/chosen": -167.60549926757812, "logps/rejected": -148.24331665039062, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.3981781005859375, "rewards/margins": 6.473658084869385, "rewards/rejected": -6.871836185455322, "step": 8325 }, { "epoch": 1.84, "learning_rate": 8.704830854218138e-06, "logits/chosen": -1.0808079242706299, "logits/rejected": -1.0872629880905151, "logps/chosen": -210.9933319091797, "logps/rejected": -186.88284301757812, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -2.583613634109497, "rewards/margins": 4.31633186340332, "rewards/rejected": -6.899945259094238, "step": 8326 }, { "epoch": 1.84, "learning_rate": 8.703626991996333e-06, "logits/chosen": -1.2105690240859985, "logits/rejected": -1.1500318050384521, "logps/chosen": -67.91304779052734, "logps/rejected": -189.04083251953125, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.8604549765586853, "rewards/margins": 6.595452308654785, "rewards/rejected": -7.455907344818115, "step": 8327 }, { "epoch": 1.84, "learning_rate": 8.70242265386736e-06, "logits/chosen": -1.3457585573196411, "logits/rejected": -1.3565353155136108, "logps/chosen": -86.27337646484375, "logps/rejected": -82.18667602539062, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.056941270828247, "rewards/margins": 5.20390510559082, "rewards/rejected": -6.2608466148376465, "step": 8328 }, { "epoch": 1.84, "learning_rate": 8.701217839985978e-06, "logits/chosen": -1.3276854753494263, "logits/rejected": -1.302551031112671, "logps/chosen": -97.63130187988281, "logps/rejected": -150.32247924804688, "loss": 0.0329, "rewards/accuracies": 1.0, "rewards/chosen": -1.5408912897109985, "rewards/margins": 2.7225875854492188, "rewards/rejected": -4.263478755950928, "step": 8329 }, { "epoch": 1.84, "learning_rate": 8.700012550507e-06, "logits/chosen": -1.3353477716445923, "logits/rejected": -1.32949697971344, "logps/chosen": -192.8491973876953, "logps/rejected": -199.91036987304688, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.4823044538497925, "rewards/margins": 7.632497787475586, "rewards/rejected": -9.114802360534668, "step": 8330 }, { "epoch": 1.84, "learning_rate": 8.698806785585305e-06, "logits/chosen": -1.632003903388977, "logits/rejected": -1.6422147750854492, "logps/chosen": -102.32666015625, "logps/rejected": -141.3295135498047, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -0.4019355773925781, "rewards/margins": 3.3307945728302, "rewards/rejected": -3.7327301502227783, "step": 8331 }, { "epoch": 1.84, "learning_rate": 8.697600545375829e-06, "logits/chosen": -1.1372357606887817, "logits/rejected": -0.9075424075126648, "logps/chosen": -200.62445068359375, "logps/rejected": -1262.559814453125, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -1.2305939197540283, "rewards/margins": 97.18138122558594, "rewards/rejected": -98.41197204589844, "step": 8332 }, { "epoch": 1.84, "learning_rate": 8.696393830033571e-06, "logits/chosen": -1.2929213047027588, "logits/rejected": -1.3285174369812012, "logps/chosen": -88.18684387207031, "logps/rejected": -121.79258728027344, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -1.307154893875122, "rewards/margins": 3.798548936843872, "rewards/rejected": -5.105703830718994, "step": 8333 }, { "epoch": 1.84, "learning_rate": 8.695186639713593e-06, "logits/chosen": -1.5328385829925537, "logits/rejected": -1.5669429302215576, "logps/chosen": -114.7396011352539, "logps/rejected": -98.63351440429688, "loss": 0.1204, "rewards/accuracies": 1.0, "rewards/chosen": -1.1623772382736206, "rewards/margins": 1.3035484552383423, "rewards/rejected": -2.465925693511963, "step": 8334 }, { "epoch": 1.84, "learning_rate": 8.693978974571013e-06, "logits/chosen": -1.3142904043197632, "logits/rejected": -1.3334683179855347, "logps/chosen": -243.33218383789062, "logps/rejected": -277.5414733886719, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 1.2913239002227783, "rewards/margins": 5.385106086730957, "rewards/rejected": -4.0937819480896, "step": 8335 }, { "epoch": 1.85, "learning_rate": 8.692770834761017e-06, "logits/chosen": -1.3564549684524536, "logits/rejected": -1.4298784732818604, "logps/chosen": -223.14907836914062, "logps/rejected": -148.572265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.4503463804721832, "rewards/margins": 7.810417175292969, "rewards/rejected": -7.360070705413818, "step": 8336 }, { "epoch": 1.85, "learning_rate": 8.691562220438845e-06, "logits/chosen": -1.35188627243042, "logits/rejected": -1.3461347818374634, "logps/chosen": -210.168212890625, "logps/rejected": -268.1268005371094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 3.613638401031494, "rewards/margins": 7.265301704406738, "rewards/rejected": -3.651663303375244, "step": 8337 }, { "epoch": 1.85, "learning_rate": 8.690353131759802e-06, "logits/chosen": -1.5189111232757568, "logits/rejected": -1.5843541622161865, "logps/chosen": -122.91348266601562, "logps/rejected": -144.86001586914062, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -1.8140060901641846, "rewards/margins": 3.3243448734283447, "rewards/rejected": -5.138350963592529, "step": 8338 }, { "epoch": 1.85, "learning_rate": 8.689143568879252e-06, "logits/chosen": -1.5280671119689941, "logits/rejected": -1.4811346530914307, "logps/chosen": -147.3223876953125, "logps/rejected": -212.49728393554688, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": -1.1281861066818237, "rewards/margins": 2.99749755859375, "rewards/rejected": -4.125683784484863, "step": 8339 }, { "epoch": 1.85, "learning_rate": 8.687933531952624e-06, "logits/chosen": -1.2365261316299438, "logits/rejected": -1.203444004058838, "logps/chosen": -63.84807586669922, "logps/rejected": -104.99698638916016, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9217765927314758, "rewards/margins": 7.068932056427002, "rewards/rejected": -7.990708827972412, "step": 8340 }, { "epoch": 1.85, "learning_rate": 8.686723021135402e-06, "logits/chosen": -1.8699328899383545, "logits/rejected": -1.7549995183944702, "logps/chosen": -100.1333236694336, "logps/rejected": -191.83892822265625, "loss": 0.3595, "rewards/accuracies": 1.0, "rewards/chosen": 0.16106338798999786, "rewards/margins": 3.643631935119629, "rewards/rejected": -3.4825685024261475, "step": 8341 }, { "epoch": 1.85, "learning_rate": 8.685512036583132e-06, "logits/chosen": -1.7224385738372803, "logits/rejected": -1.6828609704971313, "logps/chosen": -133.93202209472656, "logps/rejected": -176.88145446777344, "loss": 0.8196, "rewards/accuracies": 0.0, "rewards/chosen": -2.44366455078125, "rewards/margins": -1.4231597185134888, "rewards/rejected": -1.0205048322677612, "step": 8342 }, { "epoch": 1.85, "learning_rate": 8.684300578451428e-06, "logits/chosen": -1.264235496520996, "logits/rejected": -1.000101089477539, "logps/chosen": -173.83810424804688, "logps/rejected": -361.45721435546875, "loss": 0.0782, "rewards/accuracies": 1.0, "rewards/chosen": -5.7811198234558105, "rewards/margins": 1.9351277351379395, "rewards/rejected": -7.71624755859375, "step": 8343 }, { "epoch": 1.85, "learning_rate": 8.683088646895955e-06, "logits/chosen": -1.6283926963806152, "logits/rejected": -1.447486162185669, "logps/chosen": -107.8486099243164, "logps/rejected": -263.07513427734375, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -3.9271461963653564, "rewards/margins": 3.427358388900757, "rewards/rejected": -7.354504585266113, "step": 8344 }, { "epoch": 1.85, "learning_rate": 8.681876242072445e-06, "logits/chosen": -1.300492763519287, "logits/rejected": -1.299293041229248, "logps/chosen": -113.56388092041016, "logps/rejected": -103.08335876464844, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.0258209705352783, "rewards/margins": 4.62352180480957, "rewards/rejected": -5.6493425369262695, "step": 8345 }, { "epoch": 1.85, "learning_rate": 8.68066336413669e-06, "logits/chosen": -1.4074740409851074, "logits/rejected": -1.496138572692871, "logps/chosen": -153.4912567138672, "logps/rejected": -176.70855712890625, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -2.461747884750366, "rewards/margins": 4.03718376159668, "rewards/rejected": -6.498931884765625, "step": 8346 }, { "epoch": 1.85, "learning_rate": 8.67945001324454e-06, "logits/chosen": -1.4951452016830444, "logits/rejected": -1.452871322631836, "logps/chosen": -88.40494537353516, "logps/rejected": -142.19625854492188, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": -0.9817299246788025, "rewards/margins": 2.584057569503784, "rewards/rejected": -3.5657875537872314, "step": 8347 }, { "epoch": 1.85, "learning_rate": 8.678236189551907e-06, "logits/chosen": -1.5361716747283936, "logits/rejected": -1.470574975013733, "logps/chosen": -164.6962127685547, "logps/rejected": -368.89727783203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.3968398571014404, "rewards/margins": 7.867143630981445, "rewards/rejected": -10.263983726501465, "step": 8348 }, { "epoch": 1.85, "learning_rate": 8.677021893214768e-06, "logits/chosen": -1.6128554344177246, "logits/rejected": -1.5901669263839722, "logps/chosen": -111.26774597167969, "logps/rejected": -154.05105590820312, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.74434894323349, "rewards/margins": 3.9457082748413086, "rewards/rejected": -4.690057277679443, "step": 8349 }, { "epoch": 1.85, "learning_rate": 8.675807124389153e-06, "logits/chosen": -1.3810676336288452, "logits/rejected": -1.3367359638214111, "logps/chosen": -134.23822021484375, "logps/rejected": -194.5247802734375, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -3.233105421066284, "rewards/margins": 6.294747352600098, "rewards/rejected": -9.527853012084961, "step": 8350 }, { "epoch": 1.85, "learning_rate": 8.67459188323116e-06, "logits/chosen": -1.5821651220321655, "logits/rejected": -1.5821651220321655, "logps/chosen": -161.0790557861328, "logps/rejected": -161.0790557861328, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -6.664150238037109, "rewards/margins": 0.0, "rewards/rejected": -6.664150238037109, "step": 8351 }, { "epoch": 1.85, "learning_rate": 8.673376169896944e-06, "logits/chosen": -1.0849796533584595, "logits/rejected": -1.231184959411621, "logps/chosen": -236.25228881835938, "logps/rejected": -121.923583984375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.5486007928848267, "rewards/margins": 7.92938756942749, "rewards/rejected": -9.477988243103027, "step": 8352 }, { "epoch": 1.85, "learning_rate": 8.672159984542721e-06, "logits/chosen": -2.030384063720703, "logits/rejected": -1.924472689628601, "logps/chosen": -93.90686798095703, "logps/rejected": -196.91600036621094, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": 0.6502365469932556, "rewards/margins": 6.3511552810668945, "rewards/rejected": -5.700918674468994, "step": 8353 }, { "epoch": 1.85, "learning_rate": 8.670943327324767e-06, "logits/chosen": -1.2808817625045776, "logits/rejected": -1.2507392168045044, "logps/chosen": -103.49813842773438, "logps/rejected": -135.68252563476562, "loss": 0.3651, "rewards/accuracies": 1.0, "rewards/chosen": -1.7982208728790283, "rewards/margins": 1.457822322845459, "rewards/rejected": -3.2560431957244873, "step": 8354 }, { "epoch": 1.85, "learning_rate": 8.66972619839942e-06, "logits/chosen": -1.6436243057250977, "logits/rejected": -1.6428303718566895, "logps/chosen": -222.1425323486328, "logps/rejected": -209.12530517578125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.6613449454307556, "rewards/margins": 7.2072038650512695, "rewards/rejected": -7.86854887008667, "step": 8355 }, { "epoch": 1.85, "learning_rate": 8.668508597923077e-06, "logits/chosen": -1.3951258659362793, "logits/rejected": -1.4236243963241577, "logps/chosen": -137.6327667236328, "logps/rejected": -93.34391021728516, "loss": 0.0777, "rewards/accuracies": 1.0, "rewards/chosen": -5.054619789123535, "rewards/margins": 1.7839570045471191, "rewards/rejected": -6.838576793670654, "step": 8356 }, { "epoch": 1.85, "learning_rate": 8.6672905260522e-06, "logits/chosen": -1.3805408477783203, "logits/rejected": -1.3397612571716309, "logps/chosen": -93.78555297851562, "logps/rejected": -150.7135467529297, "loss": 0.0375, "rewards/accuracies": 1.0, "rewards/chosen": -3.1919095516204834, "rewards/margins": 2.5624196529388428, "rewards/rejected": -5.754329204559326, "step": 8357 }, { "epoch": 1.85, "learning_rate": 8.666071982943306e-06, "logits/chosen": -1.2491573095321655, "logits/rejected": -1.2256213426589966, "logps/chosen": -161.31863403320312, "logps/rejected": -213.45645141601562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.5432876944541931, "rewards/margins": 10.985325813293457, "rewards/rejected": -10.442038536071777, "step": 8358 }, { "epoch": 1.85, "learning_rate": 8.664852968752975e-06, "logits/chosen": -1.6363717317581177, "logits/rejected": -1.5772043466567993, "logps/chosen": -49.1151123046875, "logps/rejected": -146.57479858398438, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": 0.5738994479179382, "rewards/margins": 3.9816906452178955, "rewards/rejected": -3.4077911376953125, "step": 8359 }, { "epoch": 1.85, "learning_rate": 8.663633483637847e-06, "logits/chosen": -1.4117001295089722, "logits/rejected": -1.4734772443771362, "logps/chosen": -235.5684356689453, "logps/rejected": -227.48243713378906, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 1.7170120477676392, "rewards/margins": 4.947325229644775, "rewards/rejected": -3.2303130626678467, "step": 8360 }, { "epoch": 1.85, "learning_rate": 8.662413527754624e-06, "logits/chosen": -1.4622557163238525, "logits/rejected": -1.352157711982727, "logps/chosen": -164.60232543945312, "logps/rejected": -369.3515930175781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.40619203448295593, "rewards/margins": 14.384379386901855, "rewards/rejected": -13.978187561035156, "step": 8361 }, { "epoch": 1.85, "learning_rate": 8.661193101260067e-06, "logits/chosen": -1.3505213260650635, "logits/rejected": -1.4489176273345947, "logps/chosen": -154.0908966064453, "logps/rejected": -162.47689819335938, "loss": 0.0651, "rewards/accuracies": 1.0, "rewards/chosen": -1.532313585281372, "rewards/margins": 8.570464134216309, "rewards/rejected": -10.102777481079102, "step": 8362 }, { "epoch": 1.85, "learning_rate": 8.659972204310998e-06, "logits/chosen": -1.7451496124267578, "logits/rejected": -1.8052045106887817, "logps/chosen": -91.81468200683594, "logps/rejected": -119.26409912109375, "loss": 0.1108, "rewards/accuracies": 1.0, "rewards/chosen": 0.6152817010879517, "rewards/margins": 5.939227104187012, "rewards/rejected": -5.32394552230835, "step": 8363 }, { "epoch": 1.85, "learning_rate": 8.658750837064299e-06, "logits/chosen": -1.1517956256866455, "logits/rejected": -1.1606348752975464, "logps/chosen": -121.07698059082031, "logps/rejected": -106.13583374023438, "loss": 0.2392, "rewards/accuracies": 1.0, "rewards/chosen": -3.6333673000335693, "rewards/margins": 0.4910714626312256, "rewards/rejected": -4.124438762664795, "step": 8364 }, { "epoch": 1.85, "learning_rate": 8.657528999676912e-06, "logits/chosen": -1.1903334856033325, "logits/rejected": -1.139705777168274, "logps/chosen": -256.0250549316406, "logps/rejected": -294.0815734863281, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.40188905596733093, "rewards/margins": 7.0368194580078125, "rewards/rejected": -6.634930610656738, "step": 8365 }, { "epoch": 1.85, "learning_rate": 8.65630669230584e-06, "logits/chosen": -1.802746295928955, "logits/rejected": -1.6773600578308105, "logps/chosen": -132.6753692626953, "logps/rejected": -262.4452819824219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.0500717163085938, "rewards/margins": 10.028203010559082, "rewards/rejected": -7.978131294250488, "step": 8366 }, { "epoch": 1.85, "learning_rate": 8.65508391510815e-06, "logits/chosen": -1.281742811203003, "logits/rejected": -1.3121283054351807, "logps/chosen": -90.45712280273438, "logps/rejected": -157.23654174804688, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.4298500120639801, "rewards/margins": 7.613179683685303, "rewards/rejected": -8.04302978515625, "step": 8367 }, { "epoch": 1.85, "learning_rate": 8.653860668240963e-06, "logits/chosen": -1.34293794631958, "logits/rejected": -1.3194469213485718, "logps/chosen": -82.13594055175781, "logps/rejected": -274.57952880859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.5567229986190796, "rewards/margins": 8.392257690429688, "rewards/rejected": -6.835534572601318, "step": 8368 }, { "epoch": 1.85, "learning_rate": 8.652636951861463e-06, "logits/chosen": -1.3421111106872559, "logits/rejected": -1.23130202293396, "logps/chosen": -196.79864501953125, "logps/rejected": -349.30426025390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.082073926925659, "rewards/margins": 6.825213432312012, "rewards/rejected": -9.90728759765625, "step": 8369 }, { "epoch": 1.85, "learning_rate": 8.651412766126896e-06, "logits/chosen": -1.7616064548492432, "logits/rejected": -1.753217339515686, "logps/chosen": -161.27398681640625, "logps/rejected": -203.3731689453125, "loss": 0.0304, "rewards/accuracies": 1.0, "rewards/chosen": -5.6360015869140625, "rewards/margins": 4.478874206542969, "rewards/rejected": -10.114875793457031, "step": 8370 }, { "epoch": 1.85, "learning_rate": 8.650188111194565e-06, "logits/chosen": -1.6421301364898682, "logits/rejected": -1.7115373611450195, "logps/chosen": -120.40038299560547, "logps/rejected": -166.23443603515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.4872932434082031, "rewards/margins": 8.70832347869873, "rewards/rejected": -10.195616722106934, "step": 8371 }, { "epoch": 1.85, "learning_rate": 8.648962987221837e-06, "logits/chosen": -1.655074954032898, "logits/rejected": -1.655074954032898, "logps/chosen": -197.7325897216797, "logps/rejected": -197.7325897216797, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.065312147140503, "rewards/margins": 0.0, "rewards/rejected": -2.065312147140503, "step": 8372 }, { "epoch": 1.85, "learning_rate": 8.647737394366138e-06, "logits/chosen": -1.2264039516448975, "logits/rejected": -1.2252761125564575, "logps/chosen": -177.58010864257812, "logps/rejected": -166.6707763671875, "loss": 0.0675, "rewards/accuracies": 1.0, "rewards/chosen": -3.1304900646209717, "rewards/margins": 5.976163864135742, "rewards/rejected": -9.106654167175293, "step": 8373 }, { "epoch": 1.85, "learning_rate": 8.646511332784953e-06, "logits/chosen": -1.384579062461853, "logits/rejected": -1.3350205421447754, "logps/chosen": -92.95687866210938, "logps/rejected": -290.906494140625, "loss": 0.4706, "rewards/accuracies": 1.0, "rewards/chosen": -3.0069377422332764, "rewards/margins": 11.971905708312988, "rewards/rejected": -14.978843688964844, "step": 8374 }, { "epoch": 1.85, "learning_rate": 8.645284802635827e-06, "logits/chosen": -1.4322534799575806, "logits/rejected": -1.3535606861114502, "logps/chosen": -129.42672729492188, "logps/rejected": -256.60906982421875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.1041665077209473, "rewards/margins": 7.637166500091553, "rewards/rejected": -10.7413330078125, "step": 8375 }, { "epoch": 1.85, "learning_rate": 8.644057804076367e-06, "logits/chosen": -1.2009567022323608, "logits/rejected": -1.2909663915634155, "logps/chosen": -179.703857421875, "logps/rejected": -216.87808227539062, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 1.6596710681915283, "rewards/margins": 5.233819961547852, "rewards/rejected": -3.574148654937744, "step": 8376 }, { "epoch": 1.85, "learning_rate": 8.642830337264239e-06, "logits/chosen": -1.1926347017288208, "logits/rejected": -0.7808157801628113, "logps/chosen": -188.37649536132812, "logps/rejected": -599.9112548828125, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -2.6601808071136475, "rewards/margins": 31.81553077697754, "rewards/rejected": -34.475711822509766, "step": 8377 }, { "epoch": 1.85, "learning_rate": 8.641602402357168e-06, "logits/chosen": -1.4348390102386475, "logits/rejected": -1.381103754043579, "logps/chosen": -211.62881469726562, "logps/rejected": -302.96319580078125, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": 3.2757935523986816, "rewards/margins": 11.76771354675293, "rewards/rejected": -8.49191951751709, "step": 8378 }, { "epoch": 1.85, "learning_rate": 8.640373999512946e-06, "logits/chosen": -1.3815940618515015, "logits/rejected": -1.5040961503982544, "logps/chosen": -172.53460693359375, "logps/rejected": -230.98385620117188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.811848521232605, "rewards/margins": 9.77000617980957, "rewards/rejected": -11.581854820251465, "step": 8379 }, { "epoch": 1.85, "learning_rate": 8.639145128889415e-06, "logits/chosen": -1.5420118570327759, "logits/rejected": -1.4589884281158447, "logps/chosen": -151.84970092773438, "logps/rejected": -197.5219268798828, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8348129391670227, "rewards/margins": 7.0343017578125, "rewards/rejected": -7.869114875793457, "step": 8380 }, { "epoch": 1.86, "learning_rate": 8.637915790644482e-06, "logits/chosen": -1.5289827585220337, "logits/rejected": -1.4187411069869995, "logps/chosen": -153.33087158203125, "logps/rejected": -273.17333984375, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": -2.7994041442871094, "rewards/margins": 6.119284629821777, "rewards/rejected": -8.918688774108887, "step": 8381 }, { "epoch": 1.86, "learning_rate": 8.636685984936115e-06, "logits/chosen": -1.30404794216156, "logits/rejected": -1.3486133813858032, "logps/chosen": -305.0312194824219, "logps/rejected": -230.8483428955078, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0020599365234375, "rewards/margins": 11.956748008728027, "rewards/rejected": -14.958807945251465, "step": 8382 }, { "epoch": 1.86, "learning_rate": 8.635455711922343e-06, "logits/chosen": -1.2513588666915894, "logits/rejected": -1.1960798501968384, "logps/chosen": -155.7471923828125, "logps/rejected": -237.4628143310547, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": 0.138285830616951, "rewards/margins": 5.687303066253662, "rewards/rejected": -5.549017429351807, "step": 8383 }, { "epoch": 1.86, "learning_rate": 8.634224971761251e-06, "logits/chosen": -1.2459661960601807, "logits/rejected": -0.7374427318572998, "logps/chosen": -184.02366638183594, "logps/rejected": -818.9603881835938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4195511043071747, "rewards/margins": 68.21546173095703, "rewards/rejected": -68.635009765625, "step": 8384 }, { "epoch": 1.86, "learning_rate": 8.632993764610986e-06, "logits/chosen": -1.6713374853134155, "logits/rejected": -1.1492834091186523, "logps/chosen": -71.13334655761719, "logps/rejected": -711.740234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.7448623776435852, "rewards/margins": 55.76341247558594, "rewards/rejected": -55.018550872802734, "step": 8385 }, { "epoch": 1.86, "learning_rate": 8.631762090629756e-06, "logits/chosen": -1.7295401096343994, "logits/rejected": -1.7249503135681152, "logps/chosen": -113.44441223144531, "logps/rejected": -122.10540771484375, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": 0.4455520808696747, "rewards/margins": 4.160125732421875, "rewards/rejected": -3.714573621749878, "step": 8386 }, { "epoch": 1.86, "learning_rate": 8.630529949975828e-06, "logits/chosen": -1.6060445308685303, "logits/rejected": -1.662088394165039, "logps/chosen": -135.75625610351562, "logps/rejected": -114.09053039550781, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.9903160333633423, "rewards/margins": 4.573369979858398, "rewards/rejected": -5.563685894012451, "step": 8387 }, { "epoch": 1.86, "learning_rate": 8.629297342807528e-06, "logits/chosen": -1.0107113122940063, "logits/rejected": -0.30687084794044495, "logps/chosen": -61.75210952758789, "logps/rejected": -492.2888488769531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.395398736000061, "rewards/margins": 39.62502670288086, "rewards/rejected": -41.020423889160156, "step": 8388 }, { "epoch": 1.86, "learning_rate": 8.628064269283246e-06, "logits/chosen": -1.6164324283599854, "logits/rejected": -1.433862566947937, "logps/chosen": -149.9456787109375, "logps/rejected": -293.2431945800781, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.7860794067382812, "rewards/margins": 9.996684074401855, "rewards/rejected": -11.782763481140137, "step": 8389 }, { "epoch": 1.86, "learning_rate": 8.626830729561426e-06, "logits/chosen": -1.9583784341812134, "logits/rejected": -2.0055062770843506, "logps/chosen": -91.94728088378906, "logps/rejected": -95.15872192382812, "loss": 0.2998, "rewards/accuracies": 1.0, "rewards/chosen": -2.019662618637085, "rewards/margins": 0.2422943115234375, "rewards/rejected": -2.2619569301605225, "step": 8390 }, { "epoch": 1.86, "learning_rate": 8.625596723800575e-06, "logits/chosen": -1.556545615196228, "logits/rejected": -1.5201566219329834, "logps/chosen": -135.0806884765625, "logps/rejected": -292.46630859375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -3.637310028076172, "rewards/margins": 10.161245346069336, "rewards/rejected": -13.798555374145508, "step": 8391 }, { "epoch": 1.86, "learning_rate": 8.624362252159262e-06, "logits/chosen": -1.7680696249008179, "logits/rejected": -1.7895219326019287, "logps/chosen": -83.674560546875, "logps/rejected": -71.42543029785156, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -1.6832627058029175, "rewards/margins": 3.481379508972168, "rewards/rejected": -5.164642333984375, "step": 8392 }, { "epoch": 1.86, "learning_rate": 8.623127314796111e-06, "logits/chosen": -1.3518368005752563, "logits/rejected": -1.3518368005752563, "logps/chosen": -229.9778594970703, "logps/rejected": -229.9778594970703, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -6.550023078918457, "rewards/margins": 0.0, "rewards/rejected": -6.550023078918457, "step": 8393 }, { "epoch": 1.86, "learning_rate": 8.621891911869811e-06, "logits/chosen": -1.7188035249710083, "logits/rejected": -1.6489149332046509, "logps/chosen": -84.67997741699219, "logps/rejected": -141.88131713867188, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/chosen": -0.391012579202652, "rewards/margins": 2.472512722015381, "rewards/rejected": -2.863525390625, "step": 8394 }, { "epoch": 1.86, "learning_rate": 8.620656043539106e-06, "logits/chosen": -1.2742849588394165, "logits/rejected": -1.2742849588394165, "logps/chosen": -234.07354736328125, "logps/rejected": -234.07354736328125, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.293542385101318, "rewards/margins": 0.0, "rewards/rejected": -5.293542385101318, "step": 8395 }, { "epoch": 1.86, "learning_rate": 8.619419709962804e-06, "logits/chosen": -1.3288359642028809, "logits/rejected": -1.255852222442627, "logps/chosen": -198.70428466796875, "logps/rejected": -357.38385009765625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.5421554446220398, "rewards/margins": 7.020625591278076, "rewards/rejected": -7.562780857086182, "step": 8396 }, { "epoch": 1.86, "learning_rate": 8.61818291129977e-06, "logits/chosen": -1.2283364534378052, "logits/rejected": -1.1695225238800049, "logps/chosen": -91.45854187011719, "logps/rejected": -142.33233642578125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.3674468994140625, "rewards/margins": 10.954221725463867, "rewards/rejected": -11.32166862487793, "step": 8397 }, { "epoch": 1.86, "learning_rate": 8.61694564770893e-06, "logits/chosen": -1.1442070007324219, "logits/rejected": -1.1799566745758057, "logps/chosen": -185.38775634765625, "logps/rejected": -218.78878784179688, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.6874817609786987, "rewards/margins": 8.425363540649414, "rewards/rejected": -10.112845420837402, "step": 8398 }, { "epoch": 1.86, "learning_rate": 8.61570791934927e-06, "logits/chosen": -0.9762265682220459, "logits/rejected": -0.9477365016937256, "logps/chosen": -138.5491943359375, "logps/rejected": -114.53898620605469, "loss": 0.4579, "rewards/accuracies": 0.0, "rewards/chosen": -4.012465953826904, "rewards/margins": -0.20279717445373535, "rewards/rejected": -3.809668779373169, "step": 8399 }, { "epoch": 1.86, "learning_rate": 8.614469726379833e-06, "logits/chosen": -0.9767031669616699, "logits/rejected": -1.0515393018722534, "logps/chosen": -281.6656188964844, "logps/rejected": -184.49319458007812, "loss": 0.1868, "rewards/accuracies": 1.0, "rewards/chosen": -6.419467449188232, "rewards/margins": 0.8395400047302246, "rewards/rejected": -7.259007453918457, "step": 8400 }, { "epoch": 1.86, "learning_rate": 8.613231068959726e-06, "logits/chosen": -1.5594080686569214, "logits/rejected": -1.586675763130188, "logps/chosen": -106.40010833740234, "logps/rejected": -134.49288940429688, "loss": 0.0899, "rewards/accuracies": 1.0, "rewards/chosen": -1.704293131828308, "rewards/margins": 1.6242705583572388, "rewards/rejected": -3.328563690185547, "step": 8401 }, { "epoch": 1.86, "learning_rate": 8.61199194724811e-06, "logits/chosen": -1.016113519668579, "logits/rejected": -1.016113519668579, "logps/chosen": -86.59526824951172, "logps/rejected": -86.59526824951172, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -6.761651515960693, "rewards/margins": 0.0, "rewards/rejected": -6.761651515960693, "step": 8402 }, { "epoch": 1.86, "learning_rate": 8.610752361404216e-06, "logits/chosen": -1.201552391052246, "logits/rejected": -1.184950351715088, "logps/chosen": -106.66690063476562, "logps/rejected": -121.3943099975586, "loss": 0.1588, "rewards/accuracies": 1.0, "rewards/chosen": -2.400315999984741, "rewards/margins": 0.9897353649139404, "rewards/rejected": -3.3900513648986816, "step": 8403 }, { "epoch": 1.86, "learning_rate": 8.60951231158732e-06, "logits/chosen": -1.5007134675979614, "logits/rejected": -1.5007134675979614, "logps/chosen": -294.39935302734375, "logps/rejected": -294.39935302734375, "loss": 0.3548, "rewards/accuracies": 0.0, "rewards/chosen": -11.628085136413574, "rewards/margins": 0.0, "rewards/rejected": -11.628085136413574, "step": 8404 }, { "epoch": 1.86, "learning_rate": 8.60827179795677e-06, "logits/chosen": -0.8442478775978088, "logits/rejected": -0.9199594855308533, "logps/chosen": -190.12954711914062, "logps/rejected": -150.2525634765625, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -0.47304078936576843, "rewards/margins": 3.449429988861084, "rewards/rejected": -3.922470808029175, "step": 8405 }, { "epoch": 1.86, "learning_rate": 8.607030820671969e-06, "logits/chosen": -1.374602198600769, "logits/rejected": -1.3560036420822144, "logps/chosen": -101.34236145019531, "logps/rejected": -92.49815368652344, "loss": 0.7753, "rewards/accuracies": 0.0, "rewards/chosen": -4.679393768310547, "rewards/margins": -1.3122210502624512, "rewards/rejected": -3.3671727180480957, "step": 8406 }, { "epoch": 1.86, "learning_rate": 8.605789379892378e-06, "logits/chosen": -1.6075801849365234, "logits/rejected": -1.5773773193359375, "logps/chosen": -97.89912414550781, "logps/rejected": -182.55960083007812, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -0.06040039286017418, "rewards/margins": 4.083343505859375, "rewards/rejected": -4.143743991851807, "step": 8407 }, { "epoch": 1.86, "learning_rate": 8.60454747577752e-06, "logits/chosen": -1.2765308618545532, "logits/rejected": -1.1648736000061035, "logps/chosen": -246.39962768554688, "logps/rejected": -361.8655700683594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.6615936756134033, "rewards/margins": 12.578378677368164, "rewards/rejected": -9.91678524017334, "step": 8408 }, { "epoch": 1.86, "learning_rate": 8.603305108486975e-06, "logits/chosen": -1.2479578256607056, "logits/rejected": -1.237418532371521, "logps/chosen": -134.81350708007812, "logps/rejected": -154.50279235839844, "loss": 0.1482, "rewards/accuracies": 1.0, "rewards/chosen": -1.3923546075820923, "rewards/margins": 2.112860679626465, "rewards/rejected": -3.5052154064178467, "step": 8409 }, { "epoch": 1.86, "learning_rate": 8.602062278180388e-06, "logits/chosen": -1.575628638267517, "logits/rejected": -1.559907078742981, "logps/chosen": -136.3383331298828, "logps/rejected": -171.46774291992188, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.013055443763733, "rewards/margins": 3.9121484756469727, "rewards/rejected": -4.925203800201416, "step": 8410 }, { "epoch": 1.86, "learning_rate": 8.600818985017457e-06, "logits/chosen": -1.5903937816619873, "logits/rejected": -1.6247339248657227, "logps/chosen": -78.94470977783203, "logps/rejected": -129.06277465820312, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.4238990843296051, "rewards/margins": 4.902297019958496, "rewards/rejected": -5.326196193695068, "step": 8411 }, { "epoch": 1.86, "learning_rate": 8.59957522915794e-06, "logits/chosen": -1.4177528619766235, "logits/rejected": -1.4863613843917847, "logps/chosen": -203.23129272460938, "logps/rejected": -206.33175659179688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8366608023643494, "rewards/margins": 8.611983299255371, "rewards/rejected": -9.448643684387207, "step": 8412 }, { "epoch": 1.86, "learning_rate": 8.598331010761662e-06, "logits/chosen": -1.533136248588562, "logits/rejected": -1.5771018266677856, "logps/chosen": -177.66305541992188, "logps/rejected": -300.5271911621094, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 2.198230028152466, "rewards/margins": 16.972143173217773, "rewards/rejected": -14.773913383483887, "step": 8413 }, { "epoch": 1.86, "learning_rate": 8.597086329988498e-06, "logits/chosen": -1.4196391105651855, "logits/rejected": -1.309124231338501, "logps/chosen": -154.0603790283203, "logps/rejected": -256.78070068359375, "loss": 0.1016, "rewards/accuracies": 1.0, "rewards/chosen": -3.2956788539886475, "rewards/margins": 3.6767561435699463, "rewards/rejected": -6.972434997558594, "step": 8414 }, { "epoch": 1.86, "learning_rate": 8.595841186998388e-06, "logits/chosen": -1.6118203401565552, "logits/rejected": -1.6059092283248901, "logps/chosen": -109.48735046386719, "logps/rejected": -183.3613739013672, "loss": 0.3998, "rewards/accuracies": 1.0, "rewards/chosen": -3.88669753074646, "rewards/margins": 3.4739701747894287, "rewards/rejected": -7.360667705535889, "step": 8415 }, { "epoch": 1.86, "learning_rate": 8.594595581951329e-06, "logits/chosen": -1.3375195264816284, "logits/rejected": -1.3375195264816284, "logps/chosen": -90.04202270507812, "logps/rejected": -90.04202270507812, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -1.7255417108535767, "rewards/margins": 0.0, "rewards/rejected": -1.7255417108535767, "step": 8416 }, { "epoch": 1.86, "learning_rate": 8.593349515007379e-06, "logits/chosen": -1.4566516876220703, "logits/rejected": -1.318537950515747, "logps/chosen": -185.44931030273438, "logps/rejected": -339.48138427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.16784973442554474, "rewards/margins": 9.606024742126465, "rewards/rejected": -9.773874282836914, "step": 8417 }, { "epoch": 1.86, "learning_rate": 8.592102986326656e-06, "logits/chosen": -1.7691940069198608, "logits/rejected": -1.6806364059448242, "logps/chosen": -97.64251708984375, "logps/rejected": -259.8630065917969, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.8629310727119446, "rewards/margins": 9.686213493347168, "rewards/rejected": -8.823282241821289, "step": 8418 }, { "epoch": 1.86, "learning_rate": 8.590855996069334e-06, "logits/chosen": -1.8700051307678223, "logits/rejected": -1.9141170978546143, "logps/chosen": -200.94122314453125, "logps/rejected": -180.6062774658203, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.21290922164917, "rewards/margins": 5.6356282234191895, "rewards/rejected": -10.84853744506836, "step": 8419 }, { "epoch": 1.86, "learning_rate": 8.589608544395646e-06, "logits/chosen": -1.3969073295593262, "logits/rejected": -1.4299830198287964, "logps/chosen": -172.21951293945312, "logps/rejected": -185.73776245117188, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.9879699945449829, "rewards/margins": 9.717653274536133, "rewards/rejected": -8.729682922363281, "step": 8420 }, { "epoch": 1.86, "learning_rate": 8.588360631465893e-06, "logits/chosen": -1.2726454734802246, "logits/rejected": -1.2315523624420166, "logps/chosen": -101.954345703125, "logps/rejected": -203.0283660888672, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.2935958802700043, "rewards/margins": 10.015305519104004, "rewards/rejected": -9.721709251403809, "step": 8421 }, { "epoch": 1.86, "learning_rate": 8.587112257440422e-06, "logits/chosen": -1.7541358470916748, "logits/rejected": -1.7361053228378296, "logps/chosen": -140.0308837890625, "logps/rejected": -222.48455810546875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.0883407592773438, "rewards/margins": 6.526883125305176, "rewards/rejected": -9.61522388458252, "step": 8422 }, { "epoch": 1.86, "learning_rate": 8.585863422479652e-06, "logits/chosen": -1.4547863006591797, "logits/rejected": -1.5055090188980103, "logps/chosen": -94.28085327148438, "logps/rejected": -93.61890411376953, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.04232330247759819, "rewards/margins": 7.477750778198242, "rewards/rejected": -7.520073890686035, "step": 8423 }, { "epoch": 1.86, "learning_rate": 8.584614126744051e-06, "logits/chosen": -1.3247365951538086, "logits/rejected": -1.3139020204544067, "logps/chosen": -58.55944061279297, "logps/rejected": -82.28012084960938, "loss": 0.2827, "rewards/accuracies": 1.0, "rewards/chosen": -2.333158493041992, "rewards/margins": 0.27442145347595215, "rewards/rejected": -2.6075799465179443, "step": 8424 }, { "epoch": 1.86, "learning_rate": 8.583364370394152e-06, "logits/chosen": -1.4759387969970703, "logits/rejected": -1.4703465700149536, "logps/chosen": -227.47158813476562, "logps/rejected": -277.1031188964844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.891156196594238, "rewards/margins": 8.273386001586914, "rewards/rejected": -14.164542198181152, "step": 8425 }, { "epoch": 1.86, "learning_rate": 8.582114153590543e-06, "logits/chosen": -1.4000953435897827, "logits/rejected": -1.384139895439148, "logps/chosen": -124.27464294433594, "logps/rejected": -172.44497680664062, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -5.653436183929443, "rewards/margins": 4.119566440582275, "rewards/rejected": -9.773002624511719, "step": 8426 }, { "epoch": 1.87, "learning_rate": 8.58086347649388e-06, "logits/chosen": -1.648733377456665, "logits/rejected": -1.6103215217590332, "logps/chosen": -114.86239624023438, "logps/rejected": -294.29156494140625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.6391022205352783, "rewards/margins": 6.657009124755859, "rewards/rejected": -9.296111106872559, "step": 8427 }, { "epoch": 1.87, "learning_rate": 8.579612339264867e-06, "logits/chosen": -1.7322529554367065, "logits/rejected": -1.836126685142517, "logps/chosen": -197.5661163330078, "logps/rejected": -168.02635192871094, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -2.9140031337738037, "rewards/margins": 4.038208961486816, "rewards/rejected": -6.952211856842041, "step": 8428 }, { "epoch": 1.87, "learning_rate": 8.578360742064274e-06, "logits/chosen": -1.1169894933700562, "logits/rejected": -0.6157119274139404, "logps/chosen": -179.51126098632812, "logps/rejected": -753.0739135742188, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -3.158825635910034, "rewards/margins": 54.40754699707031, "rewards/rejected": -57.56637191772461, "step": 8429 }, { "epoch": 1.87, "learning_rate": 8.577108685052927e-06, "logits/chosen": -1.8903915882110596, "logits/rejected": -1.8916548490524292, "logps/chosen": -109.21961975097656, "logps/rejected": -107.3638687133789, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.5697076320648193, "rewards/margins": 5.323253631591797, "rewards/rejected": -7.892961025238037, "step": 8430 }, { "epoch": 1.87, "learning_rate": 8.575856168391714e-06, "logits/chosen": -1.8235960006713867, "logits/rejected": -1.8034213781356812, "logps/chosen": -123.91120910644531, "logps/rejected": -165.95376586914062, "loss": 0.4102, "rewards/accuracies": 0.0, "rewards/chosen": -3.3170578479766846, "rewards/margins": -0.23815155029296875, "rewards/rejected": -3.078906297683716, "step": 8431 }, { "epoch": 1.87, "learning_rate": 8.57460319224158e-06, "logits/chosen": -1.448243260383606, "logits/rejected": -1.4772080183029175, "logps/chosen": -138.96978759765625, "logps/rejected": -248.66958618164062, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.0605392456054688, "rewards/margins": 11.342740058898926, "rewards/rejected": -13.403279304504395, "step": 8432 }, { "epoch": 1.87, "learning_rate": 8.573349756763527e-06, "logits/chosen": -1.5893802642822266, "logits/rejected": -1.5314664840698242, "logps/chosen": -119.87503051757812, "logps/rejected": -197.46456909179688, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.5316827297210693, "rewards/margins": 6.777010917663574, "rewards/rejected": -9.308693885803223, "step": 8433 }, { "epoch": 1.87, "learning_rate": 8.572095862118621e-06, "logits/chosen": -1.4276636838912964, "logits/rejected": -1.3494040966033936, "logps/chosen": -69.34880065917969, "logps/rejected": -237.988037109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1599597930908203, "rewards/margins": 8.531174659729004, "rewards/rejected": -9.691134452819824, "step": 8434 }, { "epoch": 1.87, "learning_rate": 8.570841508467984e-06, "logits/chosen": -1.8640515804290771, "logits/rejected": -1.8514853715896606, "logps/chosen": -135.9099578857422, "logps/rejected": -119.8474349975586, "loss": 0.0342, "rewards/accuracies": 1.0, "rewards/chosen": -0.5669296383857727, "rewards/margins": 2.6725990772247314, "rewards/rejected": -3.2395286560058594, "step": 8435 }, { "epoch": 1.87, "learning_rate": 8.569586695972798e-06, "logits/chosen": -1.5114903450012207, "logits/rejected": -1.5114903450012207, "logps/chosen": -80.60282897949219, "logps/rejected": -80.60282897949219, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -1.5202438831329346, "rewards/margins": 0.0, "rewards/rejected": -1.5202438831329346, "step": 8436 }, { "epoch": 1.87, "learning_rate": 8.568331424794301e-06, "logits/chosen": -1.8333497047424316, "logits/rejected": -1.7507660388946533, "logps/chosen": -133.30401611328125, "logps/rejected": -306.3118591308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.3834335505962372, "rewards/margins": 15.794000625610352, "rewards/rejected": -15.410567283630371, "step": 8437 }, { "epoch": 1.87, "learning_rate": 8.567075695093796e-06, "logits/chosen": -1.6274983882904053, "logits/rejected": -1.572519063949585, "logps/chosen": -133.65243530273438, "logps/rejected": -178.47181701660156, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -1.6797096729278564, "rewards/margins": 3.0067880153656006, "rewards/rejected": -4.686497688293457, "step": 8438 }, { "epoch": 1.87, "learning_rate": 8.565819507032637e-06, "logits/chosen": -1.9160873889923096, "logits/rejected": -1.9008216857910156, "logps/chosen": -165.87530517578125, "logps/rejected": -228.11859130859375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -8.1246976852417, "rewards/margins": 5.09135627746582, "rewards/rejected": -13.21605396270752, "step": 8439 }, { "epoch": 1.87, "learning_rate": 8.564562860772246e-06, "logits/chosen": -1.7344129085540771, "logits/rejected": -1.7607824802398682, "logps/chosen": -212.03195190429688, "logps/rejected": -201.12525939941406, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -3.4390761852264404, "rewards/margins": 3.3317201137542725, "rewards/rejected": -6.770796298980713, "step": 8440 }, { "epoch": 1.87, "learning_rate": 8.563305756474094e-06, "logits/chosen": -1.4170554876327515, "logits/rejected": -1.4369456768035889, "logps/chosen": -112.27369689941406, "logps/rejected": -112.32782745361328, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.27651748061180115, "rewards/margins": 8.644341468811035, "rewards/rejected": -8.920859336853027, "step": 8441 }, { "epoch": 1.87, "learning_rate": 8.562048194299719e-06, "logits/chosen": -1.7180683612823486, "logits/rejected": -1.7164644002914429, "logps/chosen": -255.982177734375, "logps/rejected": -292.1787414550781, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -3.342089891433716, "rewards/margins": 3.1785905361175537, "rewards/rejected": -6.5206804275512695, "step": 8442 }, { "epoch": 1.87, "learning_rate": 8.560790174410713e-06, "logits/chosen": -1.6594362258911133, "logits/rejected": -1.5687992572784424, "logps/chosen": -93.94145965576172, "logps/rejected": -169.0413818359375, "loss": 0.0928, "rewards/accuracies": 1.0, "rewards/chosen": -2.1894187927246094, "rewards/margins": 1.589994192123413, "rewards/rejected": -3.7794129848480225, "step": 8443 }, { "epoch": 1.87, "learning_rate": 8.559531696968733e-06, "logits/chosen": -1.9918466806411743, "logits/rejected": -1.8917129039764404, "logps/chosen": -71.65685272216797, "logps/rejected": -143.89816284179688, "loss": 0.1664, "rewards/accuracies": 1.0, "rewards/chosen": -4.268449306488037, "rewards/margins": 0.9290971755981445, "rewards/rejected": -5.197546482086182, "step": 8444 }, { "epoch": 1.87, "learning_rate": 8.558272762135483e-06, "logits/chosen": -1.6248154640197754, "logits/rejected": -1.5904110670089722, "logps/chosen": -129.92767333984375, "logps/rejected": -270.53521728515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2829269468784332, "rewards/margins": 8.22810173034668, "rewards/rejected": -7.9451751708984375, "step": 8445 }, { "epoch": 1.87, "learning_rate": 8.557013370072737e-06, "logits/chosen": -1.6660420894622803, "logits/rejected": -1.8876394033432007, "logps/chosen": -201.36480712890625, "logps/rejected": -120.4894790649414, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.17717590928077698, "rewards/margins": 9.123883247375488, "rewards/rejected": -9.301058769226074, "step": 8446 }, { "epoch": 1.87, "learning_rate": 8.555753520942327e-06, "logits/chosen": -1.3804430961608887, "logits/rejected": -1.4613133668899536, "logps/chosen": -169.83413696289062, "logps/rejected": -184.7649688720703, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.713604748249054, "rewards/margins": 9.962326049804688, "rewards/rejected": -10.675930976867676, "step": 8447 }, { "epoch": 1.87, "learning_rate": 8.554493214906135e-06, "logits/chosen": -1.420161485671997, "logits/rejected": -1.3718461990356445, "logps/chosen": -160.41925048828125, "logps/rejected": -253.8136749267578, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.0543991327285767, "rewards/margins": 13.669150352478027, "rewards/rejected": -12.614750862121582, "step": 8448 }, { "epoch": 1.87, "learning_rate": 8.55323245212611e-06, "logits/chosen": -1.1042934656143188, "logits/rejected": -1.2351033687591553, "logps/chosen": -331.304931640625, "logps/rejected": -171.83889770507812, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7049835324287415, "rewards/margins": 6.2743377685546875, "rewards/rejected": -6.979321479797363, "step": 8449 }, { "epoch": 1.87, "learning_rate": 8.551971232764255e-06, "logits/chosen": -1.4581823348999023, "logits/rejected": -1.4837783575057983, "logps/chosen": -150.126708984375, "logps/rejected": -188.3540496826172, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": 0.8077850341796875, "rewards/margins": 4.247349739074707, "rewards/rejected": -3.4395644664764404, "step": 8450 }, { "epoch": 1.87, "learning_rate": 8.550709556982637e-06, "logits/chosen": -1.634626030921936, "logits/rejected": -1.6005442142486572, "logps/chosen": -154.2477569580078, "logps/rejected": -253.80526733398438, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.078948974609375, "rewards/margins": 6.281096458435059, "rewards/rejected": -9.360045433044434, "step": 8451 }, { "epoch": 1.87, "learning_rate": 8.549447424943379e-06, "logits/chosen": -1.3647325038909912, "logits/rejected": -1.433260440826416, "logps/chosen": -202.43048095703125, "logps/rejected": -148.54713439941406, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -2.031564474105835, "rewards/margins": 4.348906517028809, "rewards/rejected": -6.380471229553223, "step": 8452 }, { "epoch": 1.87, "learning_rate": 8.548184836808657e-06, "logits/chosen": -1.5982962846755981, "logits/rejected": -1.8185079097747803, "logps/chosen": -253.02452087402344, "logps/rejected": -136.2439422607422, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.14027558267116547, "rewards/margins": 9.130095481872559, "rewards/rejected": -8.989819526672363, "step": 8453 }, { "epoch": 1.87, "learning_rate": 8.546921792740712e-06, "logits/chosen": -1.2746466398239136, "logits/rejected": -1.1972354650497437, "logps/chosen": -178.16299438476562, "logps/rejected": -327.1319580078125, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -2.9440338611602783, "rewards/margins": 10.488008499145508, "rewards/rejected": -13.432042121887207, "step": 8454 }, { "epoch": 1.87, "learning_rate": 8.545658292901844e-06, "logits/chosen": -1.2307857275009155, "logits/rejected": -1.2967809438705444, "logps/chosen": -205.47195434570312, "logps/rejected": -104.86077117919922, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.5312652587890625, "rewards/margins": 7.130883693695068, "rewards/rejected": -7.662148952484131, "step": 8455 }, { "epoch": 1.87, "learning_rate": 8.544394337454409e-06, "logits/chosen": -1.384352207183838, "logits/rejected": -1.4775151014328003, "logps/chosen": -165.94886779785156, "logps/rejected": -344.25909423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3029372692108154, "rewards/margins": 17.542827606201172, "rewards/rejected": -20.84576416015625, "step": 8456 }, { "epoch": 1.87, "learning_rate": 8.543129926560822e-06, "logits/chosen": -1.5846362113952637, "logits/rejected": -1.6294399499893188, "logps/chosen": -164.21795654296875, "logps/rejected": -157.30059814453125, "loss": 0.1031, "rewards/accuracies": 1.0, "rewards/chosen": -3.4565765857696533, "rewards/margins": 1.543804407119751, "rewards/rejected": -5.000380992889404, "step": 8457 }, { "epoch": 1.87, "learning_rate": 8.541865060383559e-06, "logits/chosen": -1.3649930953979492, "logits/rejected": -1.2746578454971313, "logps/chosen": -85.14177703857422, "logps/rejected": -268.8180847167969, "loss": 0.06, "rewards/accuracies": 1.0, "rewards/chosen": -1.4692344665527344, "rewards/margins": 8.345121383666992, "rewards/rejected": -9.814355850219727, "step": 8458 }, { "epoch": 1.87, "learning_rate": 8.540599739085147e-06, "logits/chosen": -1.6927027702331543, "logits/rejected": -1.7877414226531982, "logps/chosen": -207.89749145507812, "logps/rejected": -174.8028564453125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.7472381591796875, "rewards/margins": 10.279180526733398, "rewards/rejected": -9.531942367553711, "step": 8459 }, { "epoch": 1.87, "learning_rate": 8.539333962828182e-06, "logits/chosen": -1.166434407234192, "logits/rejected": -1.115628957748413, "logps/chosen": -150.3050994873047, "logps/rejected": -267.8428039550781, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -3.9566733837127686, "rewards/margins": 4.050002098083496, "rewards/rejected": -8.006675720214844, "step": 8460 }, { "epoch": 1.87, "learning_rate": 8.53806773177531e-06, "logits/chosen": -1.322163462638855, "logits/rejected": -1.3561800718307495, "logps/chosen": -249.94786071777344, "logps/rejected": -198.4235382080078, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -3.572688341140747, "rewards/margins": 6.011065483093262, "rewards/rejected": -9.58375358581543, "step": 8461 }, { "epoch": 1.87, "learning_rate": 8.53680104608924e-06, "logits/chosen": -1.4483551979064941, "logits/rejected": -1.4879868030548096, "logps/chosen": -89.28974914550781, "logps/rejected": -97.05958557128906, "loss": 0.0702, "rewards/accuracies": 1.0, "rewards/chosen": -0.5130195617675781, "rewards/margins": 1.8937242031097412, "rewards/rejected": -2.4067437648773193, "step": 8462 }, { "epoch": 1.87, "learning_rate": 8.535533905932739e-06, "logits/chosen": -1.524038553237915, "logits/rejected": -1.581945776939392, "logps/chosen": -249.7929229736328, "logps/rejected": -197.48611450195312, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -2.250544786453247, "rewards/margins": 4.624137878417969, "rewards/rejected": -6.874682903289795, "step": 8463 }, { "epoch": 1.87, "learning_rate": 8.534266311468629e-06, "logits/chosen": -1.7680737972259521, "logits/rejected": -1.7680737972259521, "logps/chosen": -266.5516357421875, "logps/rejected": -266.5516357421875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": -12.761946678161621, "rewards/margins": 0.0, "rewards/rejected": -12.761946678161621, "step": 8464 }, { "epoch": 1.87, "learning_rate": 8.532998262859794e-06, "logits/chosen": -1.406083106994629, "logits/rejected": -1.406083106994629, "logps/chosen": -183.72763061523438, "logps/rejected": -183.72763061523438, "loss": 0.3544, "rewards/accuracies": 0.0, "rewards/chosen": -5.428596496582031, "rewards/margins": 0.0, "rewards/rejected": -5.428596496582031, "step": 8465 }, { "epoch": 1.87, "learning_rate": 8.531729760269176e-06, "logits/chosen": -1.9309970140457153, "logits/rejected": -1.7781320810317993, "logps/chosen": -106.67848205566406, "logps/rejected": -243.89248657226562, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.2534210681915283, "rewards/margins": 8.283809661865234, "rewards/rejected": -10.537230491638184, "step": 8466 }, { "epoch": 1.87, "learning_rate": 8.530460803859772e-06, "logits/chosen": -1.190518856048584, "logits/rejected": -1.1154046058654785, "logps/chosen": -157.0509033203125, "logps/rejected": -232.5457763671875, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.10107117146253586, "rewards/margins": 4.076385498046875, "rewards/rejected": -4.177456855773926, "step": 8467 }, { "epoch": 1.87, "learning_rate": 8.529191393794645e-06, "logits/chosen": -1.2412289381027222, "logits/rejected": -1.1992095708847046, "logps/chosen": -208.35073852539062, "logps/rejected": -229.07815551757812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.05039381980896, "rewards/margins": 11.413678169250488, "rewards/rejected": -9.36328411102295, "step": 8468 }, { "epoch": 1.87, "learning_rate": 8.527921530236905e-06, "logits/chosen": -1.1126362085342407, "logits/rejected": -0.7002407312393188, "logps/chosen": -133.22976684570312, "logps/rejected": -790.7973022460938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.0589966773986816, "rewards/margins": 48.63385772705078, "rewards/rejected": -46.574859619140625, "step": 8469 }, { "epoch": 1.87, "learning_rate": 8.52665121334973e-06, "logits/chosen": -1.8888580799102783, "logits/rejected": -1.8396128416061401, "logps/chosen": -128.36358642578125, "logps/rejected": -207.87576293945312, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.91002357006073, "rewards/margins": 6.838383674621582, "rewards/rejected": -8.748407363891602, "step": 8470 }, { "epoch": 1.87, "learning_rate": 8.525380443296353e-06, "logits/chosen": -1.26179039478302, "logits/rejected": -1.151181936264038, "logps/chosen": -130.90647888183594, "logps/rejected": -286.8985595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.354997396469116, "rewards/margins": 10.391618728637695, "rewards/rejected": -7.03662109375, "step": 8471 }, { "epoch": 1.88, "learning_rate": 8.524109220240064e-06, "logits/chosen": -1.5425095558166504, "logits/rejected": -1.5068286657333374, "logps/chosen": -156.17974853515625, "logps/rejected": -174.12957763671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2827743589878082, "rewards/margins": 8.435147285461426, "rewards/rejected": -8.152373313903809, "step": 8472 }, { "epoch": 1.88, "learning_rate": 8.52283754434421e-06, "logits/chosen": -1.5360580682754517, "logits/rejected": -1.5452762842178345, "logps/chosen": -69.11040496826172, "logps/rejected": -120.80027770996094, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.2086762189865112, "rewards/margins": 6.698260307312012, "rewards/rejected": -7.9069366455078125, "step": 8473 }, { "epoch": 1.88, "learning_rate": 8.521565415772201e-06, "logits/chosen": -1.4952081441879272, "logits/rejected": -1.4952081441879272, "logps/chosen": -64.67210388183594, "logps/rejected": -64.67210388183594, "loss": 0.3469, "rewards/accuracies": 0.0, "rewards/chosen": -4.93661642074585, "rewards/margins": 0.0, "rewards/rejected": -4.93661642074585, "step": 8474 }, { "epoch": 1.88, "learning_rate": 8.520292834687503e-06, "logits/chosen": -1.384168267250061, "logits/rejected": -1.4715008735656738, "logps/chosen": -198.27340698242188, "logps/rejected": -230.22064208984375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 1.2404907941818237, "rewards/margins": 12.289046287536621, "rewards/rejected": -11.048555374145508, "step": 8475 }, { "epoch": 1.88, "learning_rate": 8.519019801253637e-06, "logits/chosen": -1.212665319442749, "logits/rejected": -1.204222321510315, "logps/chosen": -101.812255859375, "logps/rejected": -188.20053100585938, "loss": 0.0291, "rewards/accuracies": 1.0, "rewards/chosen": -3.757497549057007, "rewards/margins": 3.0123846530914307, "rewards/rejected": -6.7698822021484375, "step": 8476 }, { "epoch": 1.88, "learning_rate": 8.517746315634186e-06, "logits/chosen": -1.6423475742340088, "logits/rejected": -1.6423475742340088, "logps/chosen": -181.5159912109375, "logps/rejected": -181.5159912109375, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.5386383533477783, "rewards/margins": 0.0, "rewards/rejected": -2.5386383533477783, "step": 8477 }, { "epoch": 1.88, "learning_rate": 8.51647237799279e-06, "logits/chosen": -1.3742281198501587, "logits/rejected": -1.3375661373138428, "logps/chosen": -141.20419311523438, "logps/rejected": -168.15048217773438, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.288507103919983, "rewards/margins": 4.641814708709717, "rewards/rejected": -5.93032169342041, "step": 8478 }, { "epoch": 1.88, "learning_rate": 8.515197988493146e-06, "logits/chosen": -1.3974320888519287, "logits/rejected": -1.3652377128601074, "logps/chosen": -42.11793518066406, "logps/rejected": -86.0543441772461, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -1.6162556409835815, "rewards/margins": 3.3979368209838867, "rewards/rejected": -5.014192581176758, "step": 8479 }, { "epoch": 1.88, "learning_rate": 8.513923147299012e-06, "logits/chosen": -1.5779125690460205, "logits/rejected": -1.5690799951553345, "logps/chosen": -86.0087890625, "logps/rejected": -87.80260467529297, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.8102806210517883, "rewards/margins": 5.36796760559082, "rewards/rejected": -6.178248405456543, "step": 8480 }, { "epoch": 1.88, "learning_rate": 8.512647854574201e-06, "logits/chosen": -1.6515942811965942, "logits/rejected": -1.5206243991851807, "logps/chosen": -159.5423126220703, "logps/rejected": -291.2594909667969, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 3.079942464828491, "rewards/margins": 7.262526512145996, "rewards/rejected": -4.182583808898926, "step": 8481 }, { "epoch": 1.88, "learning_rate": 8.511372110482583e-06, "logits/chosen": -1.6340866088867188, "logits/rejected": -1.588884949684143, "logps/chosen": -69.44615173339844, "logps/rejected": -231.775390625, "loss": 0.0679, "rewards/accuracies": 1.0, "rewards/chosen": -2.9751229286193848, "rewards/margins": 2.838677406311035, "rewards/rejected": -5.81380033493042, "step": 8482 }, { "epoch": 1.88, "learning_rate": 8.510095915188093e-06, "logits/chosen": -1.7325314283370972, "logits/rejected": -1.718216896057129, "logps/chosen": -93.20631408691406, "logps/rejected": -107.70118713378906, "loss": 0.0788, "rewards/accuracies": 1.0, "rewards/chosen": -0.5095176696777344, "rewards/margins": 1.7955818176269531, "rewards/rejected": -2.3050994873046875, "step": 8483 }, { "epoch": 1.88, "learning_rate": 8.508819268854713e-06, "logits/chosen": -1.268215298652649, "logits/rejected": -1.2334535121917725, "logps/chosen": -78.62007904052734, "logps/rejected": -205.83633422851562, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.8324201107025146, "rewards/margins": 7.150991439819336, "rewards/rejected": -9.98341178894043, "step": 8484 }, { "epoch": 1.88, "learning_rate": 8.507542171646493e-06, "logits/chosen": -1.6241806745529175, "logits/rejected": -1.6241806745529175, "logps/chosen": -166.55990600585938, "logps/rejected": -166.55990600585938, "loss": 0.3481, "rewards/accuracies": 0.0, "rewards/chosen": -9.489825248718262, "rewards/margins": 0.0, "rewards/rejected": -9.489825248718262, "step": 8485 }, { "epoch": 1.88, "learning_rate": 8.506264623727536e-06, "logits/chosen": -1.2366321086883545, "logits/rejected": -1.2569162845611572, "logps/chosen": -140.14306640625, "logps/rejected": -245.732177734375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -4.5786895751953125, "rewards/margins": 5.491154670715332, "rewards/rejected": -10.069844245910645, "step": 8486 }, { "epoch": 1.88, "learning_rate": 8.504986625262004e-06, "logits/chosen": -1.2460579872131348, "logits/rejected": -1.35714590549469, "logps/chosen": -235.13514709472656, "logps/rejected": -210.62266540527344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.209126353263855, "rewards/margins": 11.633618354797363, "rewards/rejected": -10.424491882324219, "step": 8487 }, { "epoch": 1.88, "learning_rate": 8.503708176414115e-06, "logits/chosen": -1.5149692296981812, "logits/rejected": -1.4593653678894043, "logps/chosen": -79.60490417480469, "logps/rejected": -129.3712615966797, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.3014930784702301, "rewards/margins": 5.172475337982178, "rewards/rejected": -5.473968505859375, "step": 8488 }, { "epoch": 1.88, "learning_rate": 8.50242927734815e-06, "logits/chosen": -1.4153735637664795, "logits/rejected": -1.4153735637664795, "logps/chosen": -168.6890869140625, "logps/rejected": -168.6890869140625, "loss": 0.8198, "rewards/accuracies": 0.0, "rewards/chosen": -9.479715347290039, "rewards/margins": 0.0, "rewards/rejected": -9.479715347290039, "step": 8489 }, { "epoch": 1.88, "learning_rate": 8.501149928228441e-06, "logits/chosen": -1.4326545000076294, "logits/rejected": -1.4380651712417603, "logps/chosen": -114.06458282470703, "logps/rejected": -99.3148193359375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9162300229072571, "rewards/margins": 6.364376068115234, "rewards/rejected": -7.280606269836426, "step": 8490 }, { "epoch": 1.88, "learning_rate": 8.499870129219383e-06, "logits/chosen": -1.7412678003311157, "logits/rejected": -1.6476918458938599, "logps/chosen": -173.89210510253906, "logps/rejected": -337.13037109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.4468125104904175, "rewards/margins": 17.15287971496582, "rewards/rejected": -18.59969139099121, "step": 8491 }, { "epoch": 1.88, "learning_rate": 8.498589880485428e-06, "logits/chosen": -1.6568800210952759, "logits/rejected": -1.6025010347366333, "logps/chosen": -82.60758972167969, "logps/rejected": -246.3523712158203, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -1.5539512634277344, "rewards/margins": 9.291216850280762, "rewards/rejected": -10.845168113708496, "step": 8492 }, { "epoch": 1.88, "learning_rate": 8.497309182191082e-06, "logits/chosen": -1.2765414714813232, "logits/rejected": -1.2671016454696655, "logps/chosen": -100.3583755493164, "logps/rejected": -127.14398193359375, "loss": 0.0708, "rewards/accuracies": 1.0, "rewards/chosen": -2.4991347789764404, "rewards/margins": 1.8832437992095947, "rewards/rejected": -4.382378578186035, "step": 8493 }, { "epoch": 1.88, "learning_rate": 8.496028034500914e-06, "logits/chosen": -1.334324598312378, "logits/rejected": -1.3060091733932495, "logps/chosen": -187.94285583496094, "logps/rejected": -228.809814453125, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -9.941515922546387, "rewards/margins": 3.5358705520629883, "rewards/rejected": -13.477386474609375, "step": 8494 }, { "epoch": 1.88, "learning_rate": 8.49474643757955e-06, "logits/chosen": -1.3837229013442993, "logits/rejected": -1.2863589525222778, "logps/chosen": -142.77572631835938, "logps/rejected": -254.04420471191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.09506072849035263, "rewards/margins": 14.45207405090332, "rewards/rejected": -14.547134399414062, "step": 8495 }, { "epoch": 1.88, "learning_rate": 8.493464391591665e-06, "logits/chosen": -1.1973812580108643, "logits/rejected": -1.1890486478805542, "logps/chosen": -186.05160522460938, "logps/rejected": -206.0939483642578, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.904943823814392, "rewards/margins": 12.511406898498535, "rewards/rejected": -10.606463432312012, "step": 8496 }, { "epoch": 1.88, "learning_rate": 8.492181896702008e-06, "logits/chosen": -1.3932626247406006, "logits/rejected": -1.3007683753967285, "logps/chosen": -78.18386840820312, "logps/rejected": -254.76271057128906, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 1.2998443841934204, "rewards/margins": 7.600895881652832, "rewards/rejected": -6.301051616668701, "step": 8497 }, { "epoch": 1.88, "learning_rate": 8.49089895307537e-06, "logits/chosen": -2.008922815322876, "logits/rejected": -1.7423595190048218, "logps/chosen": -160.38430786132812, "logps/rejected": -331.1583557128906, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -3.2871811389923096, "rewards/margins": 4.644548416137695, "rewards/rejected": -7.931729316711426, "step": 8498 }, { "epoch": 1.88, "learning_rate": 8.48961556087661e-06, "logits/chosen": -2.009960889816284, "logits/rejected": -2.0069568157196045, "logps/chosen": -121.57040405273438, "logps/rejected": -166.70750427246094, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -1.2329635620117188, "rewards/margins": 7.810564994812012, "rewards/rejected": -9.04352855682373, "step": 8499 }, { "epoch": 1.88, "learning_rate": 8.48833172027064e-06, "logits/chosen": -1.2914093732833862, "logits/rejected": -1.3713254928588867, "logps/chosen": -148.59207153320312, "logps/rejected": -179.80606079101562, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3956574201583862, "rewards/margins": 6.49472188949585, "rewards/rejected": -7.890379428863525, "step": 8500 }, { "epoch": 1.88, "learning_rate": 8.487047431422426e-06, "logits/chosen": -1.8136321306228638, "logits/rejected": -1.8591653108596802, "logps/chosen": -199.5252685546875, "logps/rejected": -211.69540405273438, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.870915174484253, "rewards/margins": 7.642643928527832, "rewards/rejected": -10.513559341430664, "step": 8501 }, { "epoch": 1.88, "learning_rate": 8.485762694497001e-06, "logits/chosen": -1.4996531009674072, "logits/rejected": -1.6815223693847656, "logps/chosen": -254.28985595703125, "logps/rejected": -188.3888397216797, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.649584949016571, "rewards/margins": 11.218542098999023, "rewards/rejected": -10.568957328796387, "step": 8502 }, { "epoch": 1.88, "learning_rate": 8.484477509659452e-06, "logits/chosen": -1.994109869003296, "logits/rejected": -1.926021695137024, "logps/chosen": -105.11456298828125, "logps/rejected": -127.79539489746094, "loss": 0.39, "rewards/accuracies": 0.0, "rewards/chosen": -3.394479513168335, "rewards/margins": -0.16682147979736328, "rewards/rejected": -3.2276580333709717, "step": 8503 }, { "epoch": 1.88, "learning_rate": 8.483191877074916e-06, "logits/chosen": -1.5304882526397705, "logits/rejected": -1.5396696329116821, "logps/chosen": -203.18017578125, "logps/rejected": -227.4655303955078, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.3890090882778168, "rewards/margins": 7.902804374694824, "rewards/rejected": -7.513795375823975, "step": 8504 }, { "epoch": 1.88, "learning_rate": 8.4819057969086e-06, "logits/chosen": -1.5555576086044312, "logits/rejected": -1.686829686164856, "logps/chosen": -190.61590576171875, "logps/rejected": -189.55911254882812, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.7337799072265625, "rewards/margins": 5.622740268707275, "rewards/rejected": -4.888960361480713, "step": 8505 }, { "epoch": 1.88, "learning_rate": 8.480619269325759e-06, "logits/chosen": -1.106680989265442, "logits/rejected": -1.1399033069610596, "logps/chosen": -108.64179992675781, "logps/rejected": -100.57890319824219, "loss": 0.065, "rewards/accuracies": 1.0, "rewards/chosen": -4.6721625328063965, "rewards/margins": 1.9748835563659668, "rewards/rejected": -6.647046089172363, "step": 8506 }, { "epoch": 1.88, "learning_rate": 8.479332294491707e-06, "logits/chosen": -1.6388872861862183, "logits/rejected": -1.6939609050750732, "logps/chosen": -219.944091796875, "logps/rejected": -136.33721923828125, "loss": 0.6247, "rewards/accuracies": 0.0, "rewards/chosen": -10.589313507080078, "rewards/margins": -0.9115104675292969, "rewards/rejected": -9.677803039550781, "step": 8507 }, { "epoch": 1.88, "learning_rate": 8.47804487257182e-06, "logits/chosen": -1.7968170642852783, "logits/rejected": -1.8349114656448364, "logps/chosen": -162.53875732421875, "logps/rejected": -175.42147827148438, "loss": 0.1173, "rewards/accuracies": 1.0, "rewards/chosen": -4.683876991271973, "rewards/margins": 3.062030792236328, "rewards/rejected": -7.745907783508301, "step": 8508 }, { "epoch": 1.88, "learning_rate": 8.47675700373153e-06, "logits/chosen": -1.7404872179031372, "logits/rejected": -1.77662193775177, "logps/chosen": -136.2729034423828, "logps/rejected": -199.1058349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9039084911346436, "rewards/margins": 13.631272315979004, "rewards/rejected": -16.535181045532227, "step": 8509 }, { "epoch": 1.88, "learning_rate": 8.475468688136322e-06, "logits/chosen": -1.4099793434143066, "logits/rejected": -1.4967103004455566, "logps/chosen": -132.545654296875, "logps/rejected": -131.83644104003906, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.618828535079956, "rewards/margins": 7.03148078918457, "rewards/rejected": -10.650309562683105, "step": 8510 }, { "epoch": 1.88, "learning_rate": 8.47417992595174e-06, "logits/chosen": -1.4797130823135376, "logits/rejected": -1.454695463180542, "logps/chosen": -82.75384521484375, "logps/rejected": -176.09579467773438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.4825271666049957, "rewards/margins": 8.24959659576416, "rewards/rejected": -7.767069339752197, "step": 8511 }, { "epoch": 1.88, "learning_rate": 8.472890717343391e-06, "logits/chosen": -1.483113169670105, "logits/rejected": -1.0614402294158936, "logps/chosen": -127.86521911621094, "logps/rejected": -632.635009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.287083625793457, "rewards/margins": 49.119773864746094, "rewards/rejected": -53.406856536865234, "step": 8512 }, { "epoch": 1.88, "learning_rate": 8.471601062476933e-06, "logits/chosen": -1.2788867950439453, "logits/rejected": -1.2822760343551636, "logps/chosen": -192.96353149414062, "logps/rejected": -231.06417846679688, "loss": 0.0528, "rewards/accuracies": 1.0, "rewards/chosen": -0.5128890872001648, "rewards/margins": 2.1950364112854004, "rewards/rejected": -2.70792555809021, "step": 8513 }, { "epoch": 1.88, "learning_rate": 8.470310961518085e-06, "logits/chosen": -1.3750346899032593, "logits/rejected": -1.2056492567062378, "logps/chosen": -117.64566040039062, "logps/rejected": -310.967529296875, "loss": 0.0836, "rewards/accuracies": 1.0, "rewards/chosen": -4.163941383361816, "rewards/margins": 11.028250694274902, "rewards/rejected": -15.192192077636719, "step": 8514 }, { "epoch": 1.88, "learning_rate": 8.469020414632619e-06, "logits/chosen": -1.9862209558486938, "logits/rejected": -1.9668365716934204, "logps/chosen": -128.63067626953125, "logps/rejected": -118.00416564941406, "loss": 0.3017, "rewards/accuracies": 1.0, "rewards/chosen": -4.299973964691162, "rewards/margins": 0.18850183486938477, "rewards/rejected": -4.488475799560547, "step": 8515 }, { "epoch": 1.88, "learning_rate": 8.467729421986371e-06, "logits/chosen": -1.6719930171966553, "logits/rejected": -1.682515263557434, "logps/chosen": -91.11848449707031, "logps/rejected": -98.41661071777344, "loss": 0.0676, "rewards/accuracies": 1.0, "rewards/chosen": -1.8848854303359985, "rewards/margins": 1.9505535364151, "rewards/rejected": -3.8354389667510986, "step": 8516 }, { "epoch": 1.89, "learning_rate": 8.466437983745227e-06, "logits/chosen": -2.0221548080444336, "logits/rejected": -2.130507230758667, "logps/chosen": -118.00037384033203, "logps/rejected": -113.92568969726562, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.899285078048706, "rewards/margins": 5.52739143371582, "rewards/rejected": -8.426676750183105, "step": 8517 }, { "epoch": 1.89, "learning_rate": 8.465146100075136e-06, "logits/chosen": -1.6622579097747803, "logits/rejected": -1.6828285455703735, "logps/chosen": -178.9351806640625, "logps/rejected": -179.50112915039062, "loss": 0.0728, "rewards/accuracies": 1.0, "rewards/chosen": -5.731635570526123, "rewards/margins": 1.872415542602539, "rewards/rejected": -7.604051113128662, "step": 8518 }, { "epoch": 1.89, "learning_rate": 8.4638537711421e-06, "logits/chosen": -1.5724918842315674, "logits/rejected": -1.6109342575073242, "logps/chosen": -105.28915405273438, "logps/rejected": -82.60624694824219, "loss": 0.0942, "rewards/accuracies": 1.0, "rewards/chosen": -0.48413392901420593, "rewards/margins": 1.625270128250122, "rewards/rejected": -2.1094040870666504, "step": 8519 }, { "epoch": 1.89, "learning_rate": 8.462560997112184e-06, "logits/chosen": -1.7862383127212524, "logits/rejected": -1.7746881246566772, "logps/chosen": -137.37432861328125, "logps/rejected": -262.2782897949219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0454964637756348, "rewards/margins": 9.663406372070312, "rewards/rejected": -11.708902359008789, "step": 8520 }, { "epoch": 1.89, "learning_rate": 8.4612677781515e-06, "logits/chosen": -1.3029258251190186, "logits/rejected": -1.0495829582214355, "logps/chosen": -230.91458129882812, "logps/rejected": -599.1774291992188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.567574977874756, "rewards/margins": 44.04113006591797, "rewards/rejected": -50.60870361328125, "step": 8521 }, { "epoch": 1.89, "learning_rate": 8.45997411442623e-06, "logits/chosen": -1.3608347177505493, "logits/rejected": -1.2576080560684204, "logps/chosen": -231.3153533935547, "logps/rejected": -291.48895263671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.6608978509902954, "rewards/margins": 8.25494384765625, "rewards/rejected": -6.594046115875244, "step": 8522 }, { "epoch": 1.89, "learning_rate": 8.458680006102602e-06, "logits/chosen": -1.3787000179290771, "logits/rejected": -1.3944358825683594, "logps/chosen": -144.78334045410156, "logps/rejected": -152.66897583007812, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": 0.279837042093277, "rewards/margins": 5.171553134918213, "rewards/rejected": -4.891716003417969, "step": 8523 }, { "epoch": 1.89, "learning_rate": 8.45738545334691e-06, "logits/chosen": -0.9763734936714172, "logits/rejected": -1.0507490634918213, "logps/chosen": -209.22866821289062, "logps/rejected": -278.0439147949219, "loss": 0.2655, "rewards/accuracies": 1.0, "rewards/chosen": -5.545971870422363, "rewards/margins": 0.35570383071899414, "rewards/rejected": -5.901675701141357, "step": 8524 }, { "epoch": 1.89, "learning_rate": 8.456090456325496e-06, "logits/chosen": -1.5748116970062256, "logits/rejected": -1.560826063156128, "logps/chosen": -120.5096435546875, "logps/rejected": -205.2550048828125, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -2.6956894397735596, "rewards/margins": 3.911320447921753, "rewards/rejected": -6.6070098876953125, "step": 8525 }, { "epoch": 1.89, "learning_rate": 8.454795015204767e-06, "logits/chosen": -1.533421516418457, "logits/rejected": -1.5846644639968872, "logps/chosen": -168.3294219970703, "logps/rejected": -164.88638305664062, "loss": 0.0913, "rewards/accuracies": 1.0, "rewards/chosen": -2.3426742553710938, "rewards/margins": 1.6143264770507812, "rewards/rejected": -3.957000732421875, "step": 8526 }, { "epoch": 1.89, "learning_rate": 8.453499130151183e-06, "logits/chosen": -1.482459545135498, "logits/rejected": -1.4215284585952759, "logps/chosen": -168.23765563964844, "logps/rejected": -325.7608337402344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.6926895380020142, "rewards/margins": 8.853517532348633, "rewards/rejected": -7.16082763671875, "step": 8527 }, { "epoch": 1.89, "learning_rate": 8.452202801331265e-06, "logits/chosen": -1.7267709970474243, "logits/rejected": -1.7447218894958496, "logps/chosen": -114.46739196777344, "logps/rejected": -257.9993591308594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.7104179859161377, "rewards/margins": 10.350571632385254, "rewards/rejected": -12.060989379882812, "step": 8528 }, { "epoch": 1.89, "learning_rate": 8.450906028911585e-06, "logits/chosen": -1.2868777513504028, "logits/rejected": -1.2868777513504028, "logps/chosen": -98.94332122802734, "logps/rejected": -98.94332122802734, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -6.723428249359131, "rewards/margins": 0.0, "rewards/rejected": -6.723428249359131, "step": 8529 }, { "epoch": 1.89, "learning_rate": 8.449608813058776e-06, "logits/chosen": -1.7136681079864502, "logits/rejected": -1.6158918142318726, "logps/chosen": -128.4893035888672, "logps/rejected": -193.0011444091797, "loss": 1.3299, "rewards/accuracies": 0.0, "rewards/chosen": -3.2884888648986816, "rewards/margins": -2.5807526111602783, "rewards/rejected": -0.7077361941337585, "step": 8530 }, { "epoch": 1.89, "learning_rate": 8.448311153939527e-06, "logits/chosen": -1.3209260702133179, "logits/rejected": -1.3528735637664795, "logps/chosen": -201.51406860351562, "logps/rejected": -176.6734161376953, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 1.540368676185608, "rewards/margins": 6.257983207702637, "rewards/rejected": -4.717614650726318, "step": 8531 }, { "epoch": 1.89, "learning_rate": 8.447013051720585e-06, "logits/chosen": -1.2525955438613892, "logits/rejected": -1.500652551651001, "logps/chosen": -304.47808837890625, "logps/rejected": -169.30819702148438, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.912304699420929, "rewards/margins": 5.077877521514893, "rewards/rejected": -5.990182399749756, "step": 8532 }, { "epoch": 1.89, "learning_rate": 8.445714506568751e-06, "logits/chosen": -1.6812000274658203, "logits/rejected": -1.6871757507324219, "logps/chosen": -132.51290893554688, "logps/rejected": -272.07684326171875, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -2.7364158630371094, "rewards/margins": 13.475042343139648, "rewards/rejected": -16.211458206176758, "step": 8533 }, { "epoch": 1.89, "learning_rate": 8.444415518650887e-06, "logits/chosen": -1.5970427989959717, "logits/rejected": -1.6412954330444336, "logps/chosen": -86.25198364257812, "logps/rejected": -151.156982421875, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": 0.11105652153491974, "rewards/margins": 10.160191535949707, "rewards/rejected": -10.049135208129883, "step": 8534 }, { "epoch": 1.89, "learning_rate": 8.443116088133908e-06, "logits/chosen": -1.4206920862197876, "logits/rejected": -1.5474151372909546, "logps/chosen": -289.86724853515625, "logps/rejected": -223.59820556640625, "loss": 0.1684, "rewards/accuracies": 1.0, "rewards/chosen": -7.060990810394287, "rewards/margins": 0.9166703224182129, "rewards/rejected": -7.9776611328125, "step": 8535 }, { "epoch": 1.89, "learning_rate": 8.44181621518479e-06, "logits/chosen": -1.816261887550354, "logits/rejected": -1.8565475940704346, "logps/chosen": -122.48412322998047, "logps/rejected": -128.77818298339844, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -2.264633893966675, "rewards/margins": 2.800748586654663, "rewards/rejected": -5.065382480621338, "step": 8536 }, { "epoch": 1.89, "learning_rate": 8.440515899970561e-06, "logits/chosen": -1.6891635656356812, "logits/rejected": -1.6475775241851807, "logps/chosen": -87.52320098876953, "logps/rejected": -179.44154357910156, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -0.4698890745639801, "rewards/margins": 8.311033248901367, "rewards/rejected": -8.780921936035156, "step": 8537 }, { "epoch": 1.89, "learning_rate": 8.43921514265831e-06, "logits/chosen": -1.6015479564666748, "logits/rejected": -1.7840343713760376, "logps/chosen": -179.59329223632812, "logps/rejected": -167.7183837890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.8633942008018494, "rewards/margins": 14.576696395874023, "rewards/rejected": -13.713302612304688, "step": 8538 }, { "epoch": 1.89, "learning_rate": 8.437913943415181e-06, "logits/chosen": -1.4050853252410889, "logits/rejected": -1.3758010864257812, "logps/chosen": -210.30789184570312, "logps/rejected": -197.93898010253906, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.8694732785224915, "rewards/margins": 6.4231367111206055, "rewards/rejected": -7.292610168457031, "step": 8539 }, { "epoch": 1.89, "learning_rate": 8.436612302408376e-06, "logits/chosen": -1.7228282690048218, "logits/rejected": -1.7590312957763672, "logps/chosen": -94.78126525878906, "logps/rejected": -89.56678009033203, "loss": 0.1767, "rewards/accuracies": 1.0, "rewards/chosen": -1.8237053155899048, "rewards/margins": 0.858474850654602, "rewards/rejected": -2.682180166244507, "step": 8540 }, { "epoch": 1.89, "learning_rate": 8.43531021980515e-06, "logits/chosen": -1.2220933437347412, "logits/rejected": -1.1458114385604858, "logps/chosen": -156.0020751953125, "logps/rejected": -651.33837890625, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -6.605473518371582, "rewards/margins": 37.92060470581055, "rewards/rejected": -44.52607727050781, "step": 8541 }, { "epoch": 1.89, "learning_rate": 8.434007695772819e-06, "logits/chosen": -1.5210305452346802, "logits/rejected": -1.4748852252960205, "logps/chosen": -100.41123962402344, "logps/rejected": -138.4607391357422, "loss": 0.0351, "rewards/accuracies": 1.0, "rewards/chosen": -2.3819215297698975, "rewards/margins": 2.7819135189056396, "rewards/rejected": -5.163835048675537, "step": 8542 }, { "epoch": 1.89, "learning_rate": 8.432704730478756e-06, "logits/chosen": -1.4864505529403687, "logits/rejected": -0.9142330288887024, "logps/chosen": -249.1493682861328, "logps/rejected": -1131.985107421875, "loss": 0.1071, "rewards/accuracies": 1.0, "rewards/chosen": -1.243865966796875, "rewards/margins": 103.27769470214844, "rewards/rejected": -104.52156066894531, "step": 8543 }, { "epoch": 1.89, "learning_rate": 8.431401324090384e-06, "logits/chosen": -1.2906312942504883, "logits/rejected": -1.2906312942504883, "logps/chosen": -261.3434143066406, "logps/rejected": -261.3434143066406, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -7.827805995941162, "rewards/margins": 0.0, "rewards/rejected": -7.827805995941162, "step": 8544 }, { "epoch": 1.89, "learning_rate": 8.430097476775194e-06, "logits/chosen": -1.4730314016342163, "logits/rejected": -1.3560781478881836, "logps/chosen": -219.13792419433594, "logps/rejected": -341.11016845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.351632684469223, "rewards/margins": 14.483942031860352, "rewards/rejected": -14.132308959960938, "step": 8545 }, { "epoch": 1.89, "learning_rate": 8.428793188700722e-06, "logits/chosen": -1.7125717401504517, "logits/rejected": -1.6329493522644043, "logps/chosen": -79.22386169433594, "logps/rejected": -211.61611938476562, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.6797264218330383, "rewards/margins": 6.839527130126953, "rewards/rejected": -7.519253730773926, "step": 8546 }, { "epoch": 1.89, "learning_rate": 8.427488460034567e-06, "logits/chosen": -1.229423999786377, "logits/rejected": -1.2261637449264526, "logps/chosen": -90.75875854492188, "logps/rejected": -71.24110412597656, "loss": 0.1414, "rewards/accuracies": 1.0, "rewards/chosen": -0.5988494753837585, "rewards/margins": 1.148193359375, "rewards/rejected": -1.7470428943634033, "step": 8547 }, { "epoch": 1.89, "learning_rate": 8.426183290944387e-06, "logits/chosen": -1.448115348815918, "logits/rejected": -1.555401086807251, "logps/chosen": -275.17730712890625, "logps/rejected": -107.19776916503906, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -4.484532356262207, "rewards/margins": 3.4038000106811523, "rewards/rejected": -7.888332366943359, "step": 8548 }, { "epoch": 1.89, "learning_rate": 8.424877681597889e-06, "logits/chosen": -1.5359952449798584, "logits/rejected": -1.0835635662078857, "logps/chosen": -90.09369659423828, "logps/rejected": -837.4848022460938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.212624430656433, "rewards/margins": 57.140159606933594, "rewards/rejected": -58.352783203125, "step": 8549 }, { "epoch": 1.89, "learning_rate": 8.423571632162843e-06, "logits/chosen": -1.8891775608062744, "logits/rejected": -1.8891775608062744, "logps/chosen": -76.21234130859375, "logps/rejected": -76.21234130859375, "loss": 0.4674, "rewards/accuracies": 0.0, "rewards/chosen": -4.47482442855835, "rewards/margins": 0.0, "rewards/rejected": -4.47482442855835, "step": 8550 }, { "epoch": 1.89, "learning_rate": 8.422265142807071e-06, "logits/chosen": -1.5962058305740356, "logits/rejected": -1.5747922658920288, "logps/chosen": -65.30297088623047, "logps/rejected": -39.178287506103516, "loss": 1.6175, "rewards/accuracies": 0.0, "rewards/chosen": -4.794789791107178, "rewards/margins": -3.1947779655456543, "rewards/rejected": -1.6000118255615234, "step": 8551 }, { "epoch": 1.89, "learning_rate": 8.420958213698455e-06, "logits/chosen": -1.9264131784439087, "logits/rejected": -1.9215673208236694, "logps/chosen": -110.79658508300781, "logps/rejected": -117.34901428222656, "loss": 0.1469, "rewards/accuracies": 1.0, "rewards/chosen": -1.8293427228927612, "rewards/margins": 2.2285308837890625, "rewards/rejected": -4.057873725891113, "step": 8552 }, { "epoch": 1.89, "learning_rate": 8.419650845004932e-06, "logits/chosen": -1.2827144861221313, "logits/rejected": -1.24726402759552, "logps/chosen": -150.8877410888672, "logps/rejected": -220.5052032470703, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -2.592095136642456, "rewards/margins": 4.383459091186523, "rewards/rejected": -6.9755539894104, "step": 8553 }, { "epoch": 1.89, "learning_rate": 8.418343036894497e-06, "logits/chosen": -1.4708529710769653, "logits/rejected": -1.4017643928527832, "logps/chosen": -195.0802459716797, "logps/rejected": -276.7544860839844, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -7.6616339683532715, "rewards/margins": 5.618768215179443, "rewards/rejected": -13.280402183532715, "step": 8554 }, { "epoch": 1.89, "learning_rate": 8.4170347895352e-06, "logits/chosen": -1.1477586030960083, "logits/rejected": -1.181352138519287, "logps/chosen": -108.87230682373047, "logps/rejected": -131.8367156982422, "loss": 0.078, "rewards/accuracies": 1.0, "rewards/chosen": -2.4913642406463623, "rewards/margins": 1.7795603275299072, "rewards/rejected": -4.2709245681762695, "step": 8555 }, { "epoch": 1.89, "learning_rate": 8.415726103095146e-06, "logits/chosen": -1.5383296012878418, "logits/rejected": -1.5383296012878418, "logps/chosen": -72.41606903076172, "logps/rejected": -72.41606903076172, "loss": 0.349, "rewards/accuracies": 0.0, "rewards/chosen": -3.0733916759490967, "rewards/margins": 0.0, "rewards/rejected": -3.0733916759490967, "step": 8556 }, { "epoch": 1.89, "learning_rate": 8.414416977742498e-06, "logits/chosen": -1.3530460596084595, "logits/rejected": -1.3532830476760864, "logps/chosen": -137.40234375, "logps/rejected": -79.42988586425781, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -2.496403455734253, "rewards/margins": 3.6415011882781982, "rewards/rejected": -6.137904644012451, "step": 8557 }, { "epoch": 1.89, "learning_rate": 8.413107413645477e-06, "logits/chosen": -1.5112262964248657, "logits/rejected": -1.5019241571426392, "logps/chosen": -88.92393493652344, "logps/rejected": -142.56370544433594, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 0.05552368238568306, "rewards/margins": 6.023004531860352, "rewards/rejected": -5.967480659484863, "step": 8558 }, { "epoch": 1.89, "learning_rate": 8.411797410972358e-06, "logits/chosen": -1.5518643856048584, "logits/rejected": -1.5518643856048584, "logps/chosen": -147.32418823242188, "logps/rejected": -147.32418823242188, "loss": 0.4389, "rewards/accuracies": 0.0, "rewards/chosen": -11.787543296813965, "rewards/margins": 0.0, "rewards/rejected": -11.787543296813965, "step": 8559 }, { "epoch": 1.89, "learning_rate": 8.410486969891475e-06, "logits/chosen": -0.9607855081558228, "logits/rejected": -0.8902722597122192, "logps/chosen": -126.0896987915039, "logps/rejected": -197.4976806640625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.046645402908325, "rewards/margins": 5.9207763671875, "rewards/rejected": -8.967421531677246, "step": 8560 }, { "epoch": 1.89, "learning_rate": 8.409176090571214e-06, "logits/chosen": -1.8274831771850586, "logits/rejected": -1.7797064781188965, "logps/chosen": -69.09317779541016, "logps/rejected": -204.6046142578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5305664539337158, "rewards/margins": 8.999588012695312, "rewards/rejected": -10.53015422821045, "step": 8561 }, { "epoch": 1.9, "learning_rate": 8.40786477318002e-06, "logits/chosen": -1.365644097328186, "logits/rejected": -1.4057589769363403, "logps/chosen": -225.1865997314453, "logps/rejected": -296.80126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.6886825561523438, "rewards/margins": 16.333980560302734, "rewards/rejected": -14.645297050476074, "step": 8562 }, { "epoch": 1.9, "learning_rate": 8.406553017886397e-06, "logits/chosen": -1.2723491191864014, "logits/rejected": -1.4793479442596436, "logps/chosen": -231.27017211914062, "logps/rejected": -184.98297119140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.578057885169983, "rewards/margins": 8.805724143981934, "rewards/rejected": -7.227665901184082, "step": 8563 }, { "epoch": 1.9, "learning_rate": 8.405240824858898e-06, "logits/chosen": -1.3483461141586304, "logits/rejected": -1.3483461141586304, "logps/chosen": -126.23599243164062, "logps/rejected": -126.23599243164062, "loss": 0.4901, "rewards/accuracies": 0.0, "rewards/chosen": -5.438880920410156, "rewards/margins": 0.0, "rewards/rejected": -5.438880920410156, "step": 8564 }, { "epoch": 1.9, "learning_rate": 8.40392819426614e-06, "logits/chosen": -1.0520832538604736, "logits/rejected": -1.089918851852417, "logps/chosen": -238.74676513671875, "logps/rejected": -292.39874267578125, "loss": 0.078, "rewards/accuracies": 1.0, "rewards/chosen": -4.922415256500244, "rewards/margins": 3.62636137008667, "rewards/rejected": -8.548776626586914, "step": 8565 }, { "epoch": 1.9, "learning_rate": 8.402615126276792e-06, "logits/chosen": -1.5215198993682861, "logits/rejected": -1.5158898830413818, "logps/chosen": -182.95358276367188, "logps/rejected": -249.34495544433594, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -2.753399610519409, "rewards/margins": 5.1665544509887695, "rewards/rejected": -7.9199538230896, "step": 8566 }, { "epoch": 1.9, "learning_rate": 8.40130162105958e-06, "logits/chosen": -1.2863199710845947, "logits/rejected": -2.362398624420166, "logps/chosen": -97.7216796875, "logps/rejected": -677.0313720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7380782961845398, "rewards/margins": 63.41901779174805, "rewards/rejected": -64.15709686279297, "step": 8567 }, { "epoch": 1.9, "learning_rate": 8.399987678783285e-06, "logits/chosen": -1.5191099643707275, "logits/rejected": -1.4421777725219727, "logps/chosen": -153.91558837890625, "logps/rejected": -234.8684539794922, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": 0.3748520016670227, "rewards/margins": 3.3800048828125, "rewards/rejected": -3.005152940750122, "step": 8568 }, { "epoch": 1.9, "learning_rate": 8.398673299616747e-06, "logits/chosen": -1.371978759765625, "logits/rejected": -1.53657865524292, "logps/chosen": -216.6046600341797, "logps/rejected": -201.21107482910156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.011891174130141735, "rewards/margins": 10.849492073059082, "rewards/rejected": -10.837600708007812, "step": 8569 }, { "epoch": 1.9, "learning_rate": 8.397358483728861e-06, "logits/chosen": -1.0897988080978394, "logits/rejected": -1.0904425382614136, "logps/chosen": -199.36770629882812, "logps/rejected": -161.62957763671875, "loss": 2.2499, "rewards/accuracies": 0.0, "rewards/chosen": -6.365126132965088, "rewards/margins": -4.488600254058838, "rewards/rejected": -1.87652587890625, "step": 8570 }, { "epoch": 1.9, "learning_rate": 8.396043231288577e-06, "logits/chosen": -1.6776236295700073, "logits/rejected": -1.7646385431289673, "logps/chosen": -246.9754638671875, "logps/rejected": -294.4229736328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2683609127998352, "rewards/margins": 11.225193977355957, "rewards/rejected": -11.493555068969727, "step": 8571 }, { "epoch": 1.9, "learning_rate": 8.3947275424649e-06, "logits/chosen": -1.2102168798446655, "logits/rejected": -1.2288557291030884, "logps/chosen": -138.84573364257812, "logps/rejected": -145.0692138671875, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -3.5712227821350098, "rewards/margins": 3.3300933837890625, "rewards/rejected": -6.901316165924072, "step": 8572 }, { "epoch": 1.9, "learning_rate": 8.393411417426895e-06, "logits/chosen": -1.3682941198349, "logits/rejected": -1.3912749290466309, "logps/chosen": -89.76148223876953, "logps/rejected": -95.93663024902344, "loss": 0.3992, "rewards/accuracies": 1.0, "rewards/chosen": -1.245826005935669, "rewards/margins": 2.19830322265625, "rewards/rejected": -3.444129228591919, "step": 8573 }, { "epoch": 1.9, "learning_rate": 8.392094856343682e-06, "logits/chosen": -1.3308147192001343, "logits/rejected": -1.1431865692138672, "logps/chosen": -178.91983032226562, "logps/rejected": -322.7630615234375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.520941138267517, "rewards/margins": 8.902215003967285, "rewards/rejected": -10.423155784606934, "step": 8574 }, { "epoch": 1.9, "learning_rate": 8.390777859384434e-06, "logits/chosen": -1.3907626867294312, "logits/rejected": -1.4611947536468506, "logps/chosen": -175.109375, "logps/rejected": -174.77674865722656, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -4.557682991027832, "rewards/margins": 3.406078815460205, "rewards/rejected": -7.963761806488037, "step": 8575 }, { "epoch": 1.9, "learning_rate": 8.38946042671838e-06, "logits/chosen": -1.1886093616485596, "logits/rejected": -1.1774476766586304, "logps/chosen": -279.896240234375, "logps/rejected": -267.1771545410156, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -0.32512208819389343, "rewards/margins": 4.281976699829102, "rewards/rejected": -4.607098579406738, "step": 8576 }, { "epoch": 1.9, "learning_rate": 8.388142558514811e-06, "logits/chosen": -1.7057783603668213, "logits/rejected": -1.6828901767730713, "logps/chosen": -151.5051727294922, "logps/rejected": -236.0997314453125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -6.1951494216918945, "rewards/margins": 5.0081071853637695, "rewards/rejected": -11.203256607055664, "step": 8577 }, { "epoch": 1.9, "learning_rate": 8.38682425494307e-06, "logits/chosen": -1.6365238428115845, "logits/rejected": -1.6996099948883057, "logps/chosen": -166.28134155273438, "logps/rejected": -152.16876220703125, "loss": 0.1169, "rewards/accuracies": 1.0, "rewards/chosen": -1.2453796863555908, "rewards/margins": 1.334204912185669, "rewards/rejected": -2.5795845985412598, "step": 8578 }, { "epoch": 1.9, "learning_rate": 8.38550551617255e-06, "logits/chosen": -1.477258563041687, "logits/rejected": -1.5037024021148682, "logps/chosen": -126.6278305053711, "logps/rejected": -126.55461120605469, "loss": 0.1042, "rewards/accuracies": 1.0, "rewards/chosen": -2.351102590560913, "rewards/margins": 1.4854743480682373, "rewards/rejected": -3.8365769386291504, "step": 8579 }, { "epoch": 1.9, "learning_rate": 8.384186342372711e-06, "logits/chosen": -1.497612476348877, "logits/rejected": -1.5381088256835938, "logps/chosen": -168.4134979248047, "logps/rejected": -180.69088745117188, "loss": 0.0816, "rewards/accuracies": 1.0, "rewards/chosen": 0.6799865961074829, "rewards/margins": 1.7306580543518066, "rewards/rejected": -1.0506714582443237, "step": 8580 }, { "epoch": 1.9, "learning_rate": 8.382866733713064e-06, "logits/chosen": -2.088411808013916, "logits/rejected": -2.114089250564575, "logps/chosen": -139.32960510253906, "logps/rejected": -161.0478515625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.8064415454864502, "rewards/margins": 4.627483367919922, "rewards/rejected": -6.433925151824951, "step": 8581 }, { "epoch": 1.9, "learning_rate": 8.381546690363174e-06, "logits/chosen": -1.3915716409683228, "logits/rejected": -1.4338879585266113, "logps/chosen": -123.80061340332031, "logps/rejected": -110.46293640136719, "loss": 0.0421, "rewards/accuracies": 1.0, "rewards/chosen": -2.589839220046997, "rewards/margins": 2.4320852756500244, "rewards/rejected": -5.0219244956970215, "step": 8582 }, { "epoch": 1.9, "learning_rate": 8.380226212492661e-06, "logits/chosen": -1.3637763261795044, "logits/rejected": -1.385568380355835, "logps/chosen": -107.96392822265625, "logps/rejected": -129.37350463867188, "loss": 0.0493, "rewards/accuracies": 1.0, "rewards/chosen": -2.822528123855591, "rewards/margins": 2.2686073780059814, "rewards/rejected": -5.091135501861572, "step": 8583 }, { "epoch": 1.9, "learning_rate": 8.378905300271207e-06, "logits/chosen": -1.1157861948013306, "logits/rejected": -1.1157861948013306, "logps/chosen": -152.46713256835938, "logps/rejected": -152.46713256835938, "loss": 0.3499, "rewards/accuracies": 0.0, "rewards/chosen": -7.6493120193481445, "rewards/margins": 0.0, "rewards/rejected": -7.6493120193481445, "step": 8584 }, { "epoch": 1.9, "learning_rate": 8.377583953868545e-06, "logits/chosen": -1.5807214975357056, "logits/rejected": -1.4782284498214722, "logps/chosen": -192.95822143554688, "logps/rejected": -280.33184814453125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.278454780578613, "rewards/margins": 6.378578186035156, "rewards/rejected": -10.65703296661377, "step": 8585 }, { "epoch": 1.9, "learning_rate": 8.376262173454464e-06, "logits/chosen": -1.059816837310791, "logits/rejected": -1.238515853881836, "logps/chosen": -280.82000732421875, "logps/rejected": -214.0565643310547, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.5617188215255737, "rewards/margins": 13.59444522857666, "rewards/rejected": -12.032726287841797, "step": 8586 }, { "epoch": 1.9, "learning_rate": 8.374939959198809e-06, "logits/chosen": -1.7144577503204346, "logits/rejected": -1.68449068069458, "logps/chosen": -92.35074615478516, "logps/rejected": -202.042236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3747856616973877, "rewards/margins": 9.737645149230957, "rewards/rejected": -11.112430572509766, "step": 8587 }, { "epoch": 1.9, "learning_rate": 8.373617311271483e-06, "logits/chosen": -1.7289410829544067, "logits/rejected": -1.757939338684082, "logps/chosen": -104.79399108886719, "logps/rejected": -106.31729125976562, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -2.4569594860076904, "rewards/margins": 4.004673957824707, "rewards/rejected": -6.461633205413818, "step": 8588 }, { "epoch": 1.9, "learning_rate": 8.372294229842442e-06, "logits/chosen": -1.5232579708099365, "logits/rejected": -1.5252729654312134, "logps/chosen": -108.43376159667969, "logps/rejected": -113.37176513671875, "loss": 0.8709, "rewards/accuracies": 0.0, "rewards/chosen": -1.9141441583633423, "rewards/margins": -1.5491135120391846, "rewards/rejected": -0.3650306761264801, "step": 8589 }, { "epoch": 1.9, "learning_rate": 8.3709707150817e-06, "logits/chosen": -1.5184684991836548, "logits/rejected": -0.7655907869338989, "logps/chosen": -156.1484375, "logps/rejected": -1088.92626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.41038209199905396, "rewards/margins": 101.6733169555664, "rewards/rejected": -101.26293182373047, "step": 8590 }, { "epoch": 1.9, "learning_rate": 8.369646767159325e-06, "logits/chosen": -1.2048999071121216, "logits/rejected": -1.1451327800750732, "logps/chosen": -95.61241149902344, "logps/rejected": -197.85604858398438, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.8639892935752869, "rewards/margins": 5.217169284820557, "rewards/rejected": -6.081158638000488, "step": 8591 }, { "epoch": 1.9, "learning_rate": 8.36832238624544e-06, "logits/chosen": -1.4071776866912842, "logits/rejected": -1.3509849309921265, "logps/chosen": -194.44378662109375, "logps/rejected": -207.449951171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.3130264282226562, "rewards/margins": 8.384803771972656, "rewards/rejected": -9.697830200195312, "step": 8592 }, { "epoch": 1.9, "learning_rate": 8.366997572510228e-06, "logits/chosen": -1.5366464853286743, "logits/rejected": -1.4441158771514893, "logps/chosen": -144.8500518798828, "logps/rejected": -245.24403381347656, "loss": 0.039, "rewards/accuracies": 1.0, "rewards/chosen": -2.3136138916015625, "rewards/margins": 2.516781806945801, "rewards/rejected": -4.830395698547363, "step": 8593 }, { "epoch": 1.9, "learning_rate": 8.365672326123918e-06, "logits/chosen": -1.6282687187194824, "logits/rejected": -1.6282687187194824, "logps/chosen": -216.93637084960938, "logps/rejected": -216.93637084960938, "loss": 0.3521, "rewards/accuracies": 0.0, "rewards/chosen": -4.520941257476807, "rewards/margins": 0.0, "rewards/rejected": -4.520941257476807, "step": 8594 }, { "epoch": 1.9, "learning_rate": 8.364346647256808e-06, "logits/chosen": -1.5599753856658936, "logits/rejected": -1.6448308229446411, "logps/chosen": -162.97918701171875, "logps/rejected": -158.33522033691406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.3335342407226562, "rewards/margins": 8.17733383178711, "rewards/rejected": -6.843799591064453, "step": 8595 }, { "epoch": 1.9, "learning_rate": 8.36302053607924e-06, "logits/chosen": -1.1586666107177734, "logits/rejected": -1.194700002670288, "logps/chosen": -229.0954132080078, "logps/rejected": -311.07061767578125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.5744064450263977, "rewards/margins": 8.993349075317383, "rewards/rejected": -9.567755699157715, "step": 8596 }, { "epoch": 1.9, "learning_rate": 8.361693992761617e-06, "logits/chosen": -1.342984676361084, "logits/rejected": -1.2846564054489136, "logps/chosen": -148.5418701171875, "logps/rejected": -330.82012939453125, "loss": 0.1236, "rewards/accuracies": 1.0, "rewards/chosen": -5.8238677978515625, "rewards/margins": 1.2714295387268066, "rewards/rejected": -7.095297336578369, "step": 8597 }, { "epoch": 1.9, "learning_rate": 8.360367017474398e-06, "logits/chosen": -1.254307746887207, "logits/rejected": -1.254307746887207, "logps/chosen": -174.7617950439453, "logps/rejected": -174.7617950439453, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -6.6484375, "rewards/margins": 0.0, "rewards/rejected": -6.6484375, "step": 8598 }, { "epoch": 1.9, "learning_rate": 8.359039610388096e-06, "logits/chosen": -1.290476679801941, "logits/rejected": -1.218742847442627, "logps/chosen": -158.61260986328125, "logps/rejected": -255.54286193847656, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.231304883956909, "rewards/margins": 6.88825798034668, "rewards/rejected": -9.119563102722168, "step": 8599 }, { "epoch": 1.9, "learning_rate": 8.357711771673278e-06, "logits/chosen": -1.1816434860229492, "logits/rejected": -1.1939767599105835, "logps/chosen": -65.47290802001953, "logps/rejected": -118.79310607910156, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -3.2741668224334717, "rewards/margins": 3.948951005935669, "rewards/rejected": -7.223117828369141, "step": 8600 }, { "epoch": 1.9, "learning_rate": 8.35638350150057e-06, "logits/chosen": -1.6118351221084595, "logits/rejected": -1.4377543926239014, "logps/chosen": -106.59014892578125, "logps/rejected": -250.81886291503906, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.06065833568573, "rewards/margins": 5.882967948913574, "rewards/rejected": -6.943626403808594, "step": 8601 }, { "epoch": 1.9, "learning_rate": 8.35505480004065e-06, "logits/chosen": -1.3911538124084473, "logits/rejected": -1.3911538124084473, "logps/chosen": -95.3498306274414, "logps/rejected": -95.3498306274414, "loss": 0.3469, "rewards/accuracies": 0.0, "rewards/chosen": -5.443660259246826, "rewards/margins": 0.0, "rewards/rejected": -5.443660259246826, "step": 8602 }, { "epoch": 1.9, "learning_rate": 8.353725667464254e-06, "logits/chosen": -1.2648676633834839, "logits/rejected": -1.2648676633834839, "logps/chosen": -279.0527648925781, "logps/rejected": -279.0527648925781, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -9.818676948547363, "rewards/margins": 0.0, "rewards/rejected": -9.818676948547363, "step": 8603 }, { "epoch": 1.9, "learning_rate": 8.352396103942171e-06, "logits/chosen": -1.2496055364608765, "logits/rejected": -1.2496055364608765, "logps/chosen": -91.74922943115234, "logps/rejected": -91.74922943115234, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.3730950355529785, "rewards/margins": 0.0, "rewards/rejected": -2.3730950355529785, "step": 8604 }, { "epoch": 1.9, "learning_rate": 8.351066109645248e-06, "logits/chosen": -1.7671141624450684, "logits/rejected": -1.8260656595230103, "logps/chosen": -82.89281463623047, "logps/rejected": -77.5482177734375, "loss": 0.06, "rewards/accuracies": 1.0, "rewards/chosen": -0.6336967349052429, "rewards/margins": 2.0602798461914062, "rewards/rejected": -2.693976640701294, "step": 8605 }, { "epoch": 1.9, "learning_rate": 8.349735684744385e-06, "logits/chosen": -1.689326524734497, "logits/rejected": -1.8904935121536255, "logps/chosen": -209.8599853515625, "logps/rejected": -168.46759033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.30853578448295593, "rewards/margins": 11.250632286071777, "rewards/rejected": -10.942096710205078, "step": 8606 }, { "epoch": 1.91, "learning_rate": 8.34840482941054e-06, "logits/chosen": -1.211399793624878, "logits/rejected": -1.1646850109100342, "logps/chosen": -185.343994140625, "logps/rejected": -262.9276123046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.9797027707099915, "rewards/margins": 7.3494062423706055, "rewards/rejected": -8.329109191894531, "step": 8607 }, { "epoch": 1.91, "learning_rate": 8.347073543814723e-06, "logits/chosen": -1.1040332317352295, "logits/rejected": -0.9727314114570618, "logps/chosen": -189.2345733642578, "logps/rejected": -370.49566650390625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.7220596671104431, "rewards/margins": 16.333620071411133, "rewards/rejected": -15.611559867858887, "step": 8608 }, { "epoch": 1.91, "learning_rate": 8.345741828128003e-06, "logits/chosen": -1.6687062978744507, "logits/rejected": -1.85637629032135, "logps/chosen": -221.99205017089844, "logps/rejected": -165.24632263183594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.3095474243164062, "rewards/margins": 7.375920295715332, "rewards/rejected": -8.685467720031738, "step": 8609 }, { "epoch": 1.91, "learning_rate": 8.344409682521499e-06, "logits/chosen": -1.3404511213302612, "logits/rejected": -1.2987960577011108, "logps/chosen": -155.873779296875, "logps/rejected": -242.25015258789062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.5028198957443237, "rewards/margins": 11.191608428955078, "rewards/rejected": -9.688788414001465, "step": 8610 }, { "epoch": 1.91, "learning_rate": 8.343077107166394e-06, "logits/chosen": -1.139844298362732, "logits/rejected": -1.111649751663208, "logps/chosen": -137.51443481445312, "logps/rejected": -189.73704528808594, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.084935188293457, "rewards/margins": 6.764202117919922, "rewards/rejected": -10.849137306213379, "step": 8611 }, { "epoch": 1.91, "learning_rate": 8.341744102233916e-06, "logits/chosen": -1.5805892944335938, "logits/rejected": -1.6758273839950562, "logps/chosen": -182.338134765625, "logps/rejected": -190.219482421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.3671112060546875, "rewards/margins": 8.716171264648438, "rewards/rejected": -9.083282470703125, "step": 8612 }, { "epoch": 1.91, "learning_rate": 8.340410667895352e-06, "logits/chosen": -1.59171462059021, "logits/rejected": -1.7461069822311401, "logps/chosen": -230.84303283691406, "logps/rejected": -234.91497802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.11135559529066086, "rewards/margins": 11.164402961730957, "rewards/rejected": -11.275758743286133, "step": 8613 }, { "epoch": 1.91, "learning_rate": 8.339076804322048e-06, "logits/chosen": -1.1505653858184814, "logits/rejected": -1.0154821872711182, "logps/chosen": -227.2394561767578, "logps/rejected": -341.09490966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8033050298690796, "rewards/margins": 10.380609512329102, "rewards/rejected": -12.183914184570312, "step": 8614 }, { "epoch": 1.91, "learning_rate": 8.337742511685403e-06, "logits/chosen": -1.3331502676010132, "logits/rejected": -1.3345874547958374, "logps/chosen": -150.96018981933594, "logps/rejected": -299.39361572265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.6403915286064148, "rewards/margins": 12.211420059204102, "rewards/rejected": -11.571028709411621, "step": 8615 }, { "epoch": 1.91, "learning_rate": 8.336407790156868e-06, "logits/chosen": -1.2335178852081299, "logits/rejected": -1.253280758857727, "logps/chosen": -184.58030700683594, "logps/rejected": -194.34027099609375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.6946334838867188, "rewards/margins": 6.856919288635254, "rewards/rejected": -9.551552772521973, "step": 8616 }, { "epoch": 1.91, "learning_rate": 8.335072639907953e-06, "logits/chosen": -1.118788719177246, "logits/rejected": -1.2370742559432983, "logps/chosen": -214.79547119140625, "logps/rejected": -171.189208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.301034539937973, "rewards/margins": 11.149721145629883, "rewards/rejected": -10.848686218261719, "step": 8617 }, { "epoch": 1.91, "learning_rate": 8.33373706111022e-06, "logits/chosen": -1.4415850639343262, "logits/rejected": -1.4220987558364868, "logps/chosen": -105.36610412597656, "logps/rejected": -175.90550231933594, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.9670761823654175, "rewards/margins": 5.3866987228393555, "rewards/rejected": -7.3537750244140625, "step": 8618 }, { "epoch": 1.91, "learning_rate": 8.332401053935288e-06, "logits/chosen": -1.4380834102630615, "logits/rejected": -1.4380834102630615, "logps/chosen": -308.26324462890625, "logps/rejected": -308.26324462890625, "loss": 0.4302, "rewards/accuracies": 0.0, "rewards/chosen": -12.934473991394043, "rewards/margins": 0.0, "rewards/rejected": -12.934473991394043, "step": 8619 }, { "epoch": 1.91, "learning_rate": 8.331064618554834e-06, "logits/chosen": -1.5474202632904053, "logits/rejected": -1.5718281269073486, "logps/chosen": -78.26044464111328, "logps/rejected": -111.43553161621094, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -0.9087821841239929, "rewards/margins": 3.13832688331604, "rewards/rejected": -4.047109127044678, "step": 8620 }, { "epoch": 1.91, "learning_rate": 8.329727755140584e-06, "logits/chosen": -1.488882303237915, "logits/rejected": -1.2789965867996216, "logps/chosen": -104.88766479492188, "logps/rejected": -1211.496826171875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.3140594959259033, "rewards/margins": 104.76905059814453, "rewards/rejected": -106.0831069946289, "step": 8621 }, { "epoch": 1.91, "learning_rate": 8.32839046386432e-06, "logits/chosen": -1.0815815925598145, "logits/rejected": -1.2091327905654907, "logps/chosen": -271.3804931640625, "logps/rejected": -159.5023193359375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.953460693359375, "rewards/margins": 6.186145305633545, "rewards/rejected": -5.23268461227417, "step": 8622 }, { "epoch": 1.91, "learning_rate": 8.327052744897883e-06, "logits/chosen": -1.3457385301589966, "logits/rejected": -1.3168693780899048, "logps/chosen": -102.44886779785156, "logps/rejected": -177.1943817138672, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.8247871398925781, "rewards/margins": 6.044739723205566, "rewards/rejected": -6.8695268630981445, "step": 8623 }, { "epoch": 1.91, "learning_rate": 8.325714598413169e-06, "logits/chosen": -1.3240236043930054, "logits/rejected": -1.3283066749572754, "logps/chosen": -113.47811889648438, "logps/rejected": -95.77433013916016, "loss": 0.0925, "rewards/accuracies": 1.0, "rewards/chosen": -2.2786712646484375, "rewards/margins": 2.0409293174743652, "rewards/rejected": -4.319600582122803, "step": 8624 }, { "epoch": 1.91, "learning_rate": 8.32437602458212e-06, "logits/chosen": -1.3618314266204834, "logits/rejected": -1.3495900630950928, "logps/chosen": -107.5743179321289, "logps/rejected": -260.2955627441406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6388275623321533, "rewards/margins": 13.00393295288086, "rewards/rejected": -14.642760276794434, "step": 8625 }, { "epoch": 1.91, "learning_rate": 8.323037023576745e-06, "logits/chosen": -1.325348138809204, "logits/rejected": -1.4455009698867798, "logps/chosen": -222.91004943847656, "logps/rejected": -192.85501098632812, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -3.1790390014648438, "rewards/margins": 6.5304155349731445, "rewards/rejected": -9.709454536437988, "step": 8626 }, { "epoch": 1.91, "learning_rate": 8.3216975955691e-06, "logits/chosen": -1.3083014488220215, "logits/rejected": -1.343623161315918, "logps/chosen": -147.19869995117188, "logps/rejected": -229.0402374267578, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -0.23241882026195526, "rewards/margins": 8.415061950683594, "rewards/rejected": -8.647480964660645, "step": 8627 }, { "epoch": 1.91, "learning_rate": 8.320357740731302e-06, "logits/chosen": -1.6166527271270752, "logits/rejected": -1.1679409742355347, "logps/chosen": -188.3558349609375, "logps/rejected": -517.4077758789062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.7558013796806335, "rewards/margins": 33.56081008911133, "rewards/rejected": -32.80500793457031, "step": 8628 }, { "epoch": 1.91, "learning_rate": 8.319017459235515e-06, "logits/chosen": -1.334415316581726, "logits/rejected": -1.349199891090393, "logps/chosen": -241.34588623046875, "logps/rejected": -220.2766876220703, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.3972320556640625, "rewards/margins": 8.251940727233887, "rewards/rejected": -10.64917278289795, "step": 8629 }, { "epoch": 1.91, "learning_rate": 8.317676751253961e-06, "logits/chosen": -1.4849778413772583, "logits/rejected": -1.6106090545654297, "logps/chosen": -187.51492309570312, "logps/rejected": -203.07394409179688, "loss": 0.0444, "rewards/accuracies": 1.0, "rewards/chosen": 0.8851852416992188, "rewards/margins": 11.759119987487793, "rewards/rejected": -10.873934745788574, "step": 8630 }, { "epoch": 1.91, "learning_rate": 8.316335616958922e-06, "logits/chosen": -1.6346838474273682, "logits/rejected": -1.6060162782669067, "logps/chosen": -105.11090850830078, "logps/rejected": -188.9707794189453, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.3413307666778564, "rewards/margins": 5.180635452270508, "rewards/rejected": -6.521965980529785, "step": 8631 }, { "epoch": 1.91, "learning_rate": 8.314994056522727e-06, "logits/chosen": -1.4789823293685913, "logits/rejected": -1.4285995960235596, "logps/chosen": -53.44517517089844, "logps/rejected": -163.83343505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1121768951416016, "rewards/margins": 10.231045722961426, "rewards/rejected": -11.343222618103027, "step": 8632 }, { "epoch": 1.91, "learning_rate": 8.313652070117765e-06, "logits/chosen": -1.782476782798767, "logits/rejected": -1.746992826461792, "logps/chosen": -67.71635437011719, "logps/rejected": -174.44442749023438, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.7175148129463196, "rewards/margins": 6.916069984436035, "rewards/rejected": -6.198554992675781, "step": 8633 }, { "epoch": 1.91, "learning_rate": 8.31230965791648e-06, "logits/chosen": -1.4051135778427124, "logits/rejected": -1.3555858135223389, "logps/chosen": -148.2174072265625, "logps/rejected": -254.1951446533203, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.9136078357696533, "rewards/margins": 9.699275970458984, "rewards/rejected": -12.612883567810059, "step": 8634 }, { "epoch": 1.91, "learning_rate": 8.310966820091364e-06, "logits/chosen": -1.6806871891021729, "logits/rejected": -1.6806871891021729, "logps/chosen": -112.74333953857422, "logps/rejected": -112.74333953857422, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -3.307990312576294, "rewards/margins": 0.0, "rewards/rejected": -3.307990312576294, "step": 8635 }, { "epoch": 1.91, "learning_rate": 8.309623556814972e-06, "logits/chosen": -1.2965342998504639, "logits/rejected": -1.2067301273345947, "logps/chosen": -112.60749816894531, "logps/rejected": -300.8511047363281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.3864952027797699, "rewards/margins": 8.952683448791504, "rewards/rejected": -8.566187858581543, "step": 8636 }, { "epoch": 1.91, "learning_rate": 8.30827986825991e-06, "logits/chosen": -1.5195974111557007, "logits/rejected": -1.4537400007247925, "logps/chosen": -211.37930297851562, "logps/rejected": -356.1918029785156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 3.396380662918091, "rewards/margins": 8.539761543273926, "rewards/rejected": -5.143380641937256, "step": 8637 }, { "epoch": 1.91, "learning_rate": 8.306935754598838e-06, "logits/chosen": -1.1905333995819092, "logits/rejected": -1.1858282089233398, "logps/chosen": -98.55320739746094, "logps/rejected": -117.22428894042969, "loss": 0.0592, "rewards/accuracies": 1.0, "rewards/chosen": -1.6729096174240112, "rewards/margins": 2.0896787643432617, "rewards/rejected": -3.7625885009765625, "step": 8638 }, { "epoch": 1.91, "learning_rate": 8.305591216004468e-06, "logits/chosen": -1.5457793474197388, "logits/rejected": -1.5975068807601929, "logps/chosen": -59.032073974609375, "logps/rejected": -116.96975708007812, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.4668899476528168, "rewards/margins": 8.333501815795898, "rewards/rejected": -8.800392150878906, "step": 8639 }, { "epoch": 1.91, "learning_rate": 8.304246252649574e-06, "logits/chosen": -1.5329269170761108, "logits/rejected": -1.6239488124847412, "logps/chosen": -194.22576904296875, "logps/rejected": -95.99209594726562, "loss": 0.8902, "rewards/accuracies": 0.0, "rewards/chosen": -8.380477905273438, "rewards/margins": -1.5650749206542969, "rewards/rejected": -6.815402984619141, "step": 8640 }, { "epoch": 1.91, "learning_rate": 8.302900864706982e-06, "logits/chosen": -1.6401958465576172, "logits/rejected": -1.6842480897903442, "logps/chosen": -143.01748657226562, "logps/rejected": -145.08157348632812, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -6.159231662750244, "rewards/margins": 4.086854457855225, "rewards/rejected": -10.246086120605469, "step": 8641 }, { "epoch": 1.91, "learning_rate": 8.301555052349567e-06, "logits/chosen": -1.8497806787490845, "logits/rejected": -1.1525375843048096, "logps/chosen": -115.83226013183594, "logps/rejected": -1059.739990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.12763671576976776, "rewards/margins": 94.88902282714844, "rewards/rejected": -94.76138305664062, "step": 8642 }, { "epoch": 1.91, "learning_rate": 8.300208815750266e-06, "logits/chosen": -1.4116671085357666, "logits/rejected": -1.3932031393051147, "logps/chosen": -248.33248901367188, "logps/rejected": -370.2206726074219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.479290723800659, "rewards/margins": 19.847095489501953, "rewards/rejected": -16.36780548095703, "step": 8643 }, { "epoch": 1.91, "learning_rate": 8.298862155082065e-06, "logits/chosen": -1.6013129949569702, "logits/rejected": -1.588734745979309, "logps/chosen": -118.04463195800781, "logps/rejected": -146.4820098876953, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.6130226254463196, "rewards/margins": 4.222621917724609, "rewards/rejected": -4.835644721984863, "step": 8644 }, { "epoch": 1.91, "learning_rate": 8.297515070518008e-06, "logits/chosen": -1.3904354572296143, "logits/rejected": -1.543342113494873, "logps/chosen": -250.50039672851562, "logps/rejected": -86.66917419433594, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.1422271728515625, "rewards/margins": 5.698241710662842, "rewards/rejected": -5.840468883514404, "step": 8645 }, { "epoch": 1.91, "learning_rate": 8.296167562231192e-06, "logits/chosen": -1.144405484199524, "logits/rejected": -1.1680222749710083, "logps/chosen": -188.38916015625, "logps/rejected": -100.28208923339844, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 1.20269775390625, "rewards/margins": 7.947681427001953, "rewards/rejected": -6.744983673095703, "step": 8646 }, { "epoch": 1.91, "learning_rate": 8.294819630394767e-06, "logits/chosen": -1.4776233434677124, "logits/rejected": -1.4649789333343506, "logps/chosen": -146.99069213867188, "logps/rejected": -109.38815307617188, "loss": 0.3648, "rewards/accuracies": 0.0, "rewards/chosen": -2.9235153198242188, "rewards/margins": -0.067840576171875, "rewards/rejected": -2.8556747436523438, "step": 8647 }, { "epoch": 1.91, "learning_rate": 8.293471275181938e-06, "logits/chosen": -1.542650818824768, "logits/rejected": -1.6642206907272339, "logps/chosen": -161.99191284179688, "logps/rejected": -200.48683166503906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.324284553527832, "rewards/margins": 10.546968460083008, "rewards/rejected": -15.87125301361084, "step": 8648 }, { "epoch": 1.91, "learning_rate": 8.292122496765969e-06, "logits/chosen": -1.3305354118347168, "logits/rejected": -1.3305354118347168, "logps/chosen": -300.5103454589844, "logps/rejected": -300.5103454589844, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -2.564587354660034, "rewards/margins": 0.0, "rewards/rejected": -2.564587354660034, "step": 8649 }, { "epoch": 1.91, "learning_rate": 8.290773295320173e-06, "logits/chosen": -1.4099090099334717, "logits/rejected": -1.3060988187789917, "logps/chosen": -129.7122802734375, "logps/rejected": -220.85372924804688, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -1.1435012817382812, "rewards/margins": 3.4454150199890137, "rewards/rejected": -4.588916301727295, "step": 8650 }, { "epoch": 1.91, "learning_rate": 8.28942367101792e-06, "logits/chosen": -1.5320104360580444, "logits/rejected": -1.647916316986084, "logps/chosen": -234.15408325195312, "logps/rejected": -175.39939880371094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.2759995460510254, "rewards/margins": 8.902725219726562, "rewards/rejected": -11.178725242614746, "step": 8651 }, { "epoch": 1.92, "learning_rate": 8.288073624032634e-06, "logits/chosen": -1.6379156112670898, "logits/rejected": -1.5993494987487793, "logps/chosen": -144.88864135742188, "logps/rejected": -228.4082794189453, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7039238214492798, "rewards/margins": 7.11912202835083, "rewards/rejected": -8.82304573059082, "step": 8652 }, { "epoch": 1.92, "learning_rate": 8.28672315453779e-06, "logits/chosen": -1.5351336002349854, "logits/rejected": -1.7789000272750854, "logps/chosen": -267.1964111328125, "logps/rejected": -154.333251953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.673748791217804, "rewards/margins": 9.816911697387695, "rewards/rejected": -10.490660667419434, "step": 8653 }, { "epoch": 1.92, "learning_rate": 8.285372262706922e-06, "logits/chosen": -1.56858491897583, "logits/rejected": -1.5690423250198364, "logps/chosen": -88.83213806152344, "logps/rejected": -129.44598388671875, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": 0.9400039911270142, "rewards/margins": 2.872708320617676, "rewards/rejected": -1.932704210281372, "step": 8654 }, { "epoch": 1.92, "learning_rate": 8.284020948713615e-06, "logits/chosen": -1.6112910509109497, "logits/rejected": -1.5949561595916748, "logps/chosen": -160.48287963867188, "logps/rejected": -254.8309326171875, "loss": 0.1263, "rewards/accuracies": 1.0, "rewards/chosen": -2.5284347534179688, "rewards/margins": 7.613456726074219, "rewards/rejected": -10.141891479492188, "step": 8655 }, { "epoch": 1.92, "learning_rate": 8.282669212731511e-06, "logits/chosen": -1.3241146802902222, "logits/rejected": -1.3209213018417358, "logps/chosen": -191.07765197753906, "logps/rejected": -162.37234497070312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 3.956097364425659, "rewards/margins": 7.86614990234375, "rewards/rejected": -3.910052537918091, "step": 8656 }, { "epoch": 1.92, "learning_rate": 8.281317054934306e-06, "logits/chosen": -1.350777506828308, "logits/rejected": -1.350777506828308, "logps/chosen": -125.81037902832031, "logps/rejected": -125.81037902832031, "loss": 0.3468, "rewards/accuracies": 0.0, "rewards/chosen": -0.5458831787109375, "rewards/margins": 0.0, "rewards/rejected": -0.5458831787109375, "step": 8657 }, { "epoch": 1.92, "learning_rate": 8.279964475495745e-06, "logits/chosen": -1.3751599788665771, "logits/rejected": -1.360229253768921, "logps/chosen": -179.493408203125, "logps/rejected": -200.018798828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.110137939453125, "rewards/margins": 6.921034336090088, "rewards/rejected": -7.031172275543213, "step": 8658 }, { "epoch": 1.92, "learning_rate": 8.278611474589635e-06, "logits/chosen": -1.4120129346847534, "logits/rejected": -1.3625338077545166, "logps/chosen": -129.4720458984375, "logps/rejected": -359.63702392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 5.484147548675537, "rewards/margins": 14.081560134887695, "rewards/rejected": -8.597412109375, "step": 8659 }, { "epoch": 1.92, "learning_rate": 8.277258052389834e-06, "logits/chosen": -2.1186130046844482, "logits/rejected": -2.1194870471954346, "logps/chosen": -99.36447143554688, "logps/rejected": -94.99755859375, "loss": 0.6221, "rewards/accuracies": 0.0, "rewards/chosen": -2.034994602203369, "rewards/margins": -0.9037185907363892, "rewards/rejected": -1.13127601146698, "step": 8660 }, { "epoch": 1.92, "learning_rate": 8.27590420907025e-06, "logits/chosen": -1.3027886152267456, "logits/rejected": -1.3542425632476807, "logps/chosen": -224.8135528564453, "logps/rejected": -150.73199462890625, "loss": 0.0914, "rewards/accuracies": 1.0, "rewards/chosen": -1.898768663406372, "rewards/margins": 1.6066880226135254, "rewards/rejected": -3.5054566860198975, "step": 8661 }, { "epoch": 1.92, "learning_rate": 8.27454994480485e-06, "logits/chosen": -1.5797401666641235, "logits/rejected": -1.4913220405578613, "logps/chosen": -97.39356994628906, "logps/rejected": -186.666259765625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.3421226441860199, "rewards/margins": 7.348609924316406, "rewards/rejected": -7.0064873695373535, "step": 8662 }, { "epoch": 1.92, "learning_rate": 8.273195259767653e-06, "logits/chosen": -1.1938717365264893, "logits/rejected": -1.124686598777771, "logps/chosen": -192.29307556152344, "logps/rejected": -293.5496520996094, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -2.5746231079101562, "rewards/margins": 4.411705017089844, "rewards/rejected": -6.986328125, "step": 8663 }, { "epoch": 1.92, "learning_rate": 8.271840154132736e-06, "logits/chosen": -1.3164963722229004, "logits/rejected": -1.3916518688201904, "logps/chosen": -302.7412109375, "logps/rejected": -211.0598602294922, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.380462646484375, "rewards/margins": 8.033729553222656, "rewards/rejected": -9.414192199707031, "step": 8664 }, { "epoch": 1.92, "learning_rate": 8.270484628074222e-06, "logits/chosen": -1.768509864807129, "logits/rejected": -1.8272112607955933, "logps/chosen": -200.70687866210938, "logps/rejected": -208.2032470703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7892608642578125, "rewards/margins": 6.435357570648193, "rewards/rejected": -7.224618434906006, "step": 8665 }, { "epoch": 1.92, "learning_rate": 8.269128681766296e-06, "logits/chosen": -1.6861494779586792, "logits/rejected": -1.556854009628296, "logps/chosen": -107.76720428466797, "logps/rejected": -176.48446655273438, "loss": 0.6893, "rewards/accuracies": 0.0, "rewards/chosen": -2.1718711853027344, "rewards/margins": -1.0883156061172485, "rewards/rejected": -1.0835555791854858, "step": 8666 }, { "epoch": 1.92, "learning_rate": 8.267772315383195e-06, "logits/chosen": -1.6142022609710693, "logits/rejected": -1.751616358757019, "logps/chosen": -195.0509490966797, "logps/rejected": -143.49929809570312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 3.4696061611175537, "rewards/margins": 7.880228042602539, "rewards/rejected": -4.410621643066406, "step": 8667 }, { "epoch": 1.92, "learning_rate": 8.266415529099205e-06, "logits/chosen": -1.2983818054199219, "logits/rejected": -1.381332516670227, "logps/chosen": -184.2228240966797, "logps/rejected": -199.60902404785156, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.311187744140625, "rewards/margins": 5.2938995361328125, "rewards/rejected": -4.9827117919921875, "step": 8668 }, { "epoch": 1.92, "learning_rate": 8.265058323088673e-06, "logits/chosen": -1.3949357271194458, "logits/rejected": -1.3796827793121338, "logps/chosen": -157.92527770996094, "logps/rejected": -267.46539306640625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 0.8648635745048523, "rewards/margins": 9.436698913574219, "rewards/rejected": -8.5718355178833, "step": 8669 }, { "epoch": 1.92, "learning_rate": 8.263700697525994e-06, "logits/chosen": -1.7978551387786865, "logits/rejected": -1.7633883953094482, "logps/chosen": -92.1681137084961, "logps/rejected": -176.7391815185547, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5509628653526306, "rewards/margins": 11.553601264953613, "rewards/rejected": -12.10456371307373, "step": 8670 }, { "epoch": 1.92, "learning_rate": 8.262342652585621e-06, "logits/chosen": -1.4224191904067993, "logits/rejected": -1.636525273323059, "logps/chosen": -223.99847412109375, "logps/rejected": -115.14671325683594, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 1.790124535560608, "rewards/margins": 6.016899108886719, "rewards/rejected": -4.2267746925354, "step": 8671 }, { "epoch": 1.92, "learning_rate": 8.260984188442063e-06, "logits/chosen": -1.508955717086792, "logits/rejected": -1.5067859888076782, "logps/chosen": -113.98318481445312, "logps/rejected": -144.40789794921875, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.8309006094932556, "rewards/margins": 3.9320807456970215, "rewards/rejected": -4.762981414794922, "step": 8672 }, { "epoch": 1.92, "learning_rate": 8.259625305269873e-06, "logits/chosen": -1.3944169282913208, "logits/rejected": -1.3598130941390991, "logps/chosen": -193.5084228515625, "logps/rejected": -242.05712890625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 8.053878784179688, "rewards/margins": 12.018707275390625, "rewards/rejected": -3.9648284912109375, "step": 8673 }, { "epoch": 1.92, "learning_rate": 8.258266003243667e-06, "logits/chosen": -1.4306490421295166, "logits/rejected": -1.335699439048767, "logps/chosen": -84.71504211425781, "logps/rejected": -215.47280883789062, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -2.181180715560913, "rewards/margins": 3.7750542163848877, "rewards/rejected": -5.956234931945801, "step": 8674 }, { "epoch": 1.92, "learning_rate": 8.256906282538113e-06, "logits/chosen": -1.6779437065124512, "logits/rejected": -1.7729038000106812, "logps/chosen": -133.6956024169922, "logps/rejected": -97.03093719482422, "loss": 0.0611, "rewards/accuracies": 1.0, "rewards/chosen": -1.6066970825195312, "rewards/margins": 2.0403218269348145, "rewards/rejected": -3.6470189094543457, "step": 8675 }, { "epoch": 1.92, "learning_rate": 8.25554614332793e-06, "logits/chosen": -1.4352552890777588, "logits/rejected": -1.4215428829193115, "logps/chosen": -85.0356216430664, "logps/rejected": -168.61651611328125, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -1.5407981872558594, "rewards/margins": 5.087507724761963, "rewards/rejected": -6.628305912017822, "step": 8676 }, { "epoch": 1.92, "learning_rate": 8.254185585787895e-06, "logits/chosen": -0.9221383333206177, "logits/rejected": -0.9128898978233337, "logps/chosen": -186.83010864257812, "logps/rejected": -197.35931396484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.86203932762146, "rewards/margins": 11.913082122802734, "rewards/rejected": -9.051042556762695, "step": 8677 }, { "epoch": 1.92, "learning_rate": 8.252824610092835e-06, "logits/chosen": -1.1962159872055054, "logits/rejected": -1.1519052982330322, "logps/chosen": -264.6017150878906, "logps/rejected": -217.7079620361328, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": -0.17373351752758026, "rewards/margins": 2.7767839431762695, "rewards/rejected": -2.950517416000366, "step": 8678 }, { "epoch": 1.92, "learning_rate": 8.251463216417632e-06, "logits/chosen": -1.5439403057098389, "logits/rejected": -1.5895636081695557, "logps/chosen": -115.4390640258789, "logps/rejected": -127.18568420410156, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.0876481533050537, "rewards/margins": 6.95760440826416, "rewards/rejected": -10.045252799987793, "step": 8679 }, { "epoch": 1.92, "learning_rate": 8.250101404937223e-06, "logits/chosen": -1.432640790939331, "logits/rejected": -1.7830796241760254, "logps/chosen": -227.39797973632812, "logps/rejected": -103.54611206054688, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 3.85813307762146, "rewards/margins": 7.558507919311523, "rewards/rejected": -3.7003746032714844, "step": 8680 }, { "epoch": 1.92, "learning_rate": 8.248739175826594e-06, "logits/chosen": -1.1875708103179932, "logits/rejected": -1.2567757368087769, "logps/chosen": -88.55371856689453, "logps/rejected": -66.37034606933594, "loss": 0.5199, "rewards/accuracies": 0.0, "rewards/chosen": -2.2181618213653564, "rewards/margins": -0.6019684076309204, "rewards/rejected": -1.616193413734436, "step": 8681 }, { "epoch": 1.92, "learning_rate": 8.247376529260793e-06, "logits/chosen": -1.6302735805511475, "logits/rejected": -1.4870716333389282, "logps/chosen": -88.37608337402344, "logps/rejected": -202.53704833984375, "loss": 0.2547, "rewards/accuracies": 1.0, "rewards/chosen": -1.857160210609436, "rewards/margins": 0.4088524580001831, "rewards/rejected": -2.266012668609619, "step": 8682 }, { "epoch": 1.92, "learning_rate": 8.246013465414914e-06, "logits/chosen": -1.6012723445892334, "logits/rejected": -1.5713609457015991, "logps/chosen": -122.39596557617188, "logps/rejected": -117.4190673828125, "loss": 0.0799, "rewards/accuracies": 1.0, "rewards/chosen": -0.07122192531824112, "rewards/margins": 1.8896621465682983, "rewards/rejected": -1.9608840942382812, "step": 8683 }, { "epoch": 1.92, "learning_rate": 8.244649984464109e-06, "logits/chosen": -1.2618461847305298, "logits/rejected": -1.3315753936767578, "logps/chosen": -225.840576171875, "logps/rejected": -136.15335083007812, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.3141540586948395, "rewards/margins": 6.265583038330078, "rewards/rejected": -6.579737186431885, "step": 8684 }, { "epoch": 1.92, "learning_rate": 8.243286086583577e-06, "logits/chosen": -1.3348689079284668, "logits/rejected": -1.265212059020996, "logps/chosen": -121.59817504882812, "logps/rejected": -231.13616943359375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 1.7707245349884033, "rewards/margins": 6.848029136657715, "rewards/rejected": -5.077304363250732, "step": 8685 }, { "epoch": 1.92, "learning_rate": 8.241921771948583e-06, "logits/chosen": -1.5812106132507324, "logits/rejected": -1.5812106132507324, "logps/chosen": -217.42926025390625, "logps/rejected": -217.42926025390625, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -6.095126628875732, "rewards/margins": 0.0, "rewards/rejected": -6.095126628875732, "step": 8686 }, { "epoch": 1.92, "learning_rate": 8.240557040734434e-06, "logits/chosen": -1.5152747631072998, "logits/rejected": -1.6704782247543335, "logps/chosen": -147.14089965820312, "logps/rejected": -86.84718322753906, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": 3.94547438621521, "rewards/margins": 5.648622989654541, "rewards/rejected": -1.7031487226486206, "step": 8687 }, { "epoch": 1.92, "learning_rate": 8.239191893116494e-06, "logits/chosen": -1.6459343433380127, "logits/rejected": -1.6143349409103394, "logps/chosen": -101.67332458496094, "logps/rejected": -181.15943908691406, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/chosen": -4.084640026092529, "rewards/margins": 3.133167266845703, "rewards/rejected": -7.217807292938232, "step": 8688 }, { "epoch": 1.92, "learning_rate": 8.237826329270183e-06, "logits/chosen": -1.6413261890411377, "logits/rejected": -1.7041406631469727, "logps/chosen": -127.19029235839844, "logps/rejected": -105.98939514160156, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.1521623134613037, "rewards/margins": 5.560616493225098, "rewards/rejected": -8.71277904510498, "step": 8689 }, { "epoch": 1.92, "learning_rate": 8.236460349370972e-06, "logits/chosen": -1.3030197620391846, "logits/rejected": -1.4113825559616089, "logps/chosen": -207.282470703125, "logps/rejected": -130.1083526611328, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 3.9021027088165283, "rewards/margins": 8.354166030883789, "rewards/rejected": -4.452063083648682, "step": 8690 }, { "epoch": 1.92, "learning_rate": 8.235093953594387e-06, "logits/chosen": -1.8272002935409546, "logits/rejected": -1.7957043647766113, "logps/chosen": -178.48997497558594, "logps/rejected": -255.9292755126953, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.17390775680542, "rewards/margins": 9.82821273803711, "rewards/rejected": -15.002120018005371, "step": 8691 }, { "epoch": 1.92, "learning_rate": 8.233727142116007e-06, "logits/chosen": -1.2257057428359985, "logits/rejected": -1.0473438501358032, "logps/chosen": -103.37245178222656, "logps/rejected": -310.568115234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6154342889785767, "rewards/margins": 8.475956916809082, "rewards/rejected": -9.091391563415527, "step": 8692 }, { "epoch": 1.92, "learning_rate": 8.232359915111462e-06, "logits/chosen": -2.3165740966796875, "logits/rejected": -2.475210666656494, "logps/chosen": -106.93767547607422, "logps/rejected": -120.59439849853516, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.190861463546753, "rewards/margins": 6.000653266906738, "rewards/rejected": -9.19151496887207, "step": 8693 }, { "epoch": 1.92, "learning_rate": 8.230992272756438e-06, "logits/chosen": -1.0478622913360596, "logits/rejected": -1.0771684646606445, "logps/chosen": -170.27578735351562, "logps/rejected": -141.1755828857422, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.817883312702179, "rewards/margins": 6.576111793518066, "rewards/rejected": -5.758228302001953, "step": 8694 }, { "epoch": 1.92, "learning_rate": 8.229624215226675e-06, "logits/chosen": -1.1230040788650513, "logits/rejected": -1.0227077007293701, "logps/chosen": -195.78350830078125, "logps/rejected": -311.793212890625, "loss": 0.9734, "rewards/accuracies": 0.0, "rewards/chosen": -6.049292087554932, "rewards/margins": -1.7901520729064941, "rewards/rejected": -4.2591400146484375, "step": 8695 }, { "epoch": 1.92, "learning_rate": 8.228255742697962e-06, "logits/chosen": -0.8814297914505005, "logits/rejected": -1.0411497354507446, "logps/chosen": -125.20562744140625, "logps/rejected": -244.26934814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.06644134968519211, "rewards/margins": 12.800670623779297, "rewards/rejected": -12.73422908782959, "step": 8696 }, { "epoch": 1.92, "learning_rate": 8.226886855346148e-06, "logits/chosen": -1.243237018585205, "logits/rejected": -1.2480658292770386, "logps/chosen": -217.44952392578125, "logps/rejected": -296.6555480957031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.413726806640625, "rewards/margins": 10.50468921661377, "rewards/rejected": -10.918416023254395, "step": 8697 }, { "epoch": 1.93, "learning_rate": 8.225517553347132e-06, "logits/chosen": -1.2434245347976685, "logits/rejected": -1.3091756105422974, "logps/chosen": -108.46675109863281, "logps/rejected": -166.91392517089844, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -1.0791656970977783, "rewards/margins": 4.007890701293945, "rewards/rejected": -5.0870561599731445, "step": 8698 }, { "epoch": 1.93, "learning_rate": 8.224147836876861e-06, "logits/chosen": -1.740782380104065, "logits/rejected": -1.2930858135223389, "logps/chosen": -189.30154418945312, "logps/rejected": -812.6073608398438, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.6952911615371704, "rewards/margins": 71.92971801757812, "rewards/rejected": -71.23442840576172, "step": 8699 }, { "epoch": 1.93, "learning_rate": 8.222777706111345e-06, "logits/chosen": -1.6305080652236938, "logits/rejected": -1.743272066116333, "logps/chosen": -198.38357543945312, "logps/rejected": -232.17416381835938, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": 0.4965561032295227, "rewards/margins": 8.994521141052246, "rewards/rejected": -8.497964859008789, "step": 8700 }, { "epoch": 1.93, "learning_rate": 8.221407161226641e-06, "logits/chosen": -1.397883653640747, "logits/rejected": -1.338789463043213, "logps/chosen": -116.89018249511719, "logps/rejected": -161.5854949951172, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.850018322467804, "rewards/margins": 5.001106262207031, "rewards/rejected": -5.8511247634887695, "step": 8701 }, { "epoch": 1.93, "learning_rate": 8.220036202398861e-06, "logits/chosen": -1.2229807376861572, "logits/rejected": -1.2275755405426025, "logps/chosen": -126.31563568115234, "logps/rejected": -223.2995147705078, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.861452579498291, "rewards/margins": 6.911362171173096, "rewards/rejected": -11.772814750671387, "step": 8702 }, { "epoch": 1.93, "learning_rate": 8.21866482980417e-06, "logits/chosen": -1.6621652841567993, "logits/rejected": -1.573952555656433, "logps/chosen": -111.97201538085938, "logps/rejected": -141.9783172607422, "loss": 0.3056, "rewards/accuracies": 1.0, "rewards/chosen": -2.516688585281372, "rewards/margins": 0.17512965202331543, "rewards/rejected": -2.6918182373046875, "step": 8703 }, { "epoch": 1.93, "learning_rate": 8.217293043618786e-06, "logits/chosen": -1.6227991580963135, "logits/rejected": -1.7318916320800781, "logps/chosen": -191.70242309570312, "logps/rejected": -114.03779602050781, "loss": 0.0292, "rewards/accuracies": 1.0, "rewards/chosen": -0.3548950254917145, "rewards/margins": 2.850795030593872, "rewards/rejected": -3.2056901454925537, "step": 8704 }, { "epoch": 1.93, "learning_rate": 8.21592084401898e-06, "logits/chosen": -1.7754453420639038, "logits/rejected": -1.8645594120025635, "logps/chosen": -134.90277099609375, "logps/rejected": -81.00065612792969, "loss": 0.1096, "rewards/accuracies": 1.0, "rewards/chosen": -3.7154595851898193, "rewards/margins": 1.4777705669403076, "rewards/rejected": -5.193230152130127, "step": 8705 }, { "epoch": 1.93, "learning_rate": 8.214548231181077e-06, "logits/chosen": -1.49287748336792, "logits/rejected": -1.5751042366027832, "logps/chosen": -141.53945922851562, "logps/rejected": -128.37200927734375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.2954148054122925, "rewards/margins": 5.629443168640137, "rewards/rejected": -6.924858093261719, "step": 8706 }, { "epoch": 1.93, "learning_rate": 8.213175205281451e-06, "logits/chosen": -1.6625118255615234, "logits/rejected": -1.768176794052124, "logps/chosen": -247.74935913085938, "logps/rejected": -161.0931396484375, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": 0.4034484922885895, "rewards/margins": 8.648152351379395, "rewards/rejected": -8.244704246520996, "step": 8707 }, { "epoch": 1.93, "learning_rate": 8.211801766496537e-06, "logits/chosen": -1.3124977350234985, "logits/rejected": -1.4469927549362183, "logps/chosen": -262.27032470703125, "logps/rejected": -156.05538940429688, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.2634521424770355, "rewards/margins": 7.947115421295166, "rewards/rejected": -8.210567474365234, "step": 8708 }, { "epoch": 1.93, "learning_rate": 8.210427915002819e-06, "logits/chosen": -1.5908617973327637, "logits/rejected": -1.5746124982833862, "logps/chosen": -101.42161560058594, "logps/rejected": -156.21514892578125, "loss": 0.3306, "rewards/accuracies": 1.0, "rewards/chosen": -3.432560682296753, "rewards/margins": 0.07394719123840332, "rewards/rejected": -3.5065078735351562, "step": 8709 }, { "epoch": 1.93, "learning_rate": 8.20905365097683e-06, "logits/chosen": -1.5926977396011353, "logits/rejected": -1.588979721069336, "logps/chosen": -113.4885025024414, "logps/rejected": -141.33285522460938, "loss": 0.1331, "rewards/accuracies": 1.0, "rewards/chosen": 0.12219315022230148, "rewards/margins": 1.5173537731170654, "rewards/rejected": -1.3951606750488281, "step": 8710 }, { "epoch": 1.93, "learning_rate": 8.20767897459516e-06, "logits/chosen": -0.9992766976356506, "logits/rejected": -1.049742341041565, "logps/chosen": -155.67910766601562, "logps/rejected": -147.94190979003906, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 2.8128952980041504, "rewards/margins": 8.524938583374023, "rewards/rejected": -5.712043762207031, "step": 8711 }, { "epoch": 1.93, "learning_rate": 8.206303886034455e-06, "logits/chosen": -1.5062378644943237, "logits/rejected": -1.5508410930633545, "logps/chosen": -235.53953552246094, "logps/rejected": -206.72964477539062, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.7402511835098267, "rewards/margins": 4.423982620239258, "rewards/rejected": -6.164233684539795, "step": 8712 }, { "epoch": 1.93, "learning_rate": 8.204928385471406e-06, "logits/chosen": -1.7163296937942505, "logits/rejected": -1.6308624744415283, "logps/chosen": -181.28575134277344, "logps/rejected": -221.08763122558594, "loss": 1.9148, "rewards/accuracies": 0.0, "rewards/chosen": -7.271904945373535, "rewards/margins": -3.8073785305023193, "rewards/rejected": -3.464526414871216, "step": 8713 }, { "epoch": 1.93, "learning_rate": 8.203552473082766e-06, "logits/chosen": -1.3064868450164795, "logits/rejected": -1.336722731590271, "logps/chosen": -118.33860778808594, "logps/rejected": -98.4527587890625, "loss": 0.321, "rewards/accuracies": 1.0, "rewards/chosen": -1.1524765491485596, "rewards/margins": 0.10525739192962646, "rewards/rejected": -1.257733941078186, "step": 8714 }, { "epoch": 1.93, "learning_rate": 8.202176149045334e-06, "logits/chosen": -1.448350191116333, "logits/rejected": -1.5395325422286987, "logps/chosen": -191.116455078125, "logps/rejected": -295.2200927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.844635009765625, "rewards/margins": 10.329363822937012, "rewards/rejected": -11.173998832702637, "step": 8715 }, { "epoch": 1.93, "learning_rate": 8.200799413535962e-06, "logits/chosen": -1.1747384071350098, "logits/rejected": -1.1986773014068604, "logps/chosen": -193.19586181640625, "logps/rejected": -166.63409423828125, "loss": 0.2625, "rewards/accuracies": 1.0, "rewards/chosen": -6.188244819641113, "rewards/margins": 0.3713231086730957, "rewards/rejected": -6.559567928314209, "step": 8716 }, { "epoch": 1.93, "learning_rate": 8.199422266731563e-06, "logits/chosen": -1.410577416419983, "logits/rejected": -1.2532511949539185, "logps/chosen": -162.58836364746094, "logps/rejected": -359.43560791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.12978363037109375, "rewards/margins": 12.733613967895508, "rewards/rejected": -12.863397598266602, "step": 8717 }, { "epoch": 1.93, "learning_rate": 8.198044708809094e-06, "logits/chosen": -1.221006155014038, "logits/rejected": -0.9799211621284485, "logps/chosen": -109.54705810546875, "logps/rejected": -889.2435913085938, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": -4.515395641326904, "rewards/margins": 69.10814666748047, "rewards/rejected": -73.62354278564453, "step": 8718 }, { "epoch": 1.93, "learning_rate": 8.196666739945566e-06, "logits/chosen": -1.8107924461364746, "logits/rejected": -1.8547554016113281, "logps/chosen": -177.17544555664062, "logps/rejected": -211.141357421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.152937412261963, "rewards/margins": 7.824708461761475, "rewards/rejected": -11.977645874023438, "step": 8719 }, { "epoch": 1.93, "learning_rate": 8.195288360318048e-06, "logits/chosen": -1.6075693368911743, "logits/rejected": -1.7194868326187134, "logps/chosen": -225.3826446533203, "logps/rejected": -121.75349426269531, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -4.500152587890625, "rewards/margins": 4.157994270324707, "rewards/rejected": -8.658146858215332, "step": 8720 }, { "epoch": 1.93, "learning_rate": 8.193909570103656e-06, "logits/chosen": -1.4914166927337646, "logits/rejected": -1.4360637664794922, "logps/chosen": -101.9310302734375, "logps/rejected": -171.85043334960938, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": 1.8623230457305908, "rewards/margins": 3.4241104125976562, "rewards/rejected": -1.561787486076355, "step": 8721 }, { "epoch": 1.93, "learning_rate": 8.192530369479562e-06, "logits/chosen": -1.9475724697113037, "logits/rejected": -1.6201245784759521, "logps/chosen": -63.73583221435547, "logps/rejected": -341.27850341796875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.3509620726108551, "rewards/margins": 6.757155418395996, "rewards/rejected": -7.108117580413818, "step": 8722 }, { "epoch": 1.93, "learning_rate": 8.191150758622991e-06, "logits/chosen": -1.33967924118042, "logits/rejected": -1.3512099981307983, "logps/chosen": -113.21151733398438, "logps/rejected": -99.07674407958984, "loss": 0.1378, "rewards/accuracies": 1.0, "rewards/chosen": -1.0560554265975952, "rewards/margins": 1.14818274974823, "rewards/rejected": -2.204238176345825, "step": 8723 }, { "epoch": 1.93, "learning_rate": 8.189770737711218e-06, "logits/chosen": -1.300231695175171, "logits/rejected": -1.2086575031280518, "logps/chosen": -152.59075927734375, "logps/rejected": -218.7193603515625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.7966598868370056, "rewards/margins": 6.23881721496582, "rewards/rejected": -5.44215726852417, "step": 8724 }, { "epoch": 1.93, "learning_rate": 8.188390306921574e-06, "logits/chosen": -1.3007113933563232, "logits/rejected": -1.2280924320220947, "logps/chosen": -80.04728698730469, "logps/rejected": -144.32916259765625, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -1.7976264953613281, "rewards/margins": 4.384053707122803, "rewards/rejected": -6.181680202484131, "step": 8725 }, { "epoch": 1.93, "learning_rate": 8.18700946643144e-06, "logits/chosen": -1.4053844213485718, "logits/rejected": -1.224013090133667, "logps/chosen": -219.04345703125, "logps/rejected": -360.7006530761719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.6031677722930908, "rewards/margins": 11.401827812194824, "rewards/rejected": -9.798660278320312, "step": 8726 }, { "epoch": 1.93, "learning_rate": 8.18562821641825e-06, "logits/chosen": -1.1887744665145874, "logits/rejected": -1.2174196243286133, "logps/chosen": -167.32818603515625, "logps/rejected": -256.1037292480469, "loss": 0.3469, "rewards/accuracies": 1.0, "rewards/chosen": 0.23939362168312073, "rewards/margins": 7.400077819824219, "rewards/rejected": -7.160684108734131, "step": 8727 }, { "epoch": 1.93, "learning_rate": 8.184246557059493e-06, "logits/chosen": -1.704469084739685, "logits/rejected": -1.1492836475372314, "logps/chosen": -136.1984405517578, "logps/rejected": -1015.4185791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9794952869415283, "rewards/margins": 92.50015258789062, "rewards/rejected": -95.47964477539062, "step": 8728 }, { "epoch": 1.93, "learning_rate": 8.182864488532707e-06, "logits/chosen": -1.5525381565093994, "logits/rejected": -1.5525381565093994, "logps/chosen": -120.96034240722656, "logps/rejected": -120.96034240722656, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.095151424407959, "rewards/margins": 0.0, "rewards/rejected": -4.095151424407959, "step": 8729 }, { "epoch": 1.93, "learning_rate": 8.181482011015488e-06, "logits/chosen": -1.4539015293121338, "logits/rejected": -1.3345365524291992, "logps/chosen": -105.74224853515625, "logps/rejected": -189.57945251464844, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -4.449873447418213, "rewards/margins": 4.629574298858643, "rewards/rejected": -9.079447746276855, "step": 8730 }, { "epoch": 1.93, "learning_rate": 8.180099124685476e-06, "logits/chosen": -1.6963385343551636, "logits/rejected": -1.8220341205596924, "logps/chosen": -233.96986389160156, "logps/rejected": -162.6754150390625, "loss": 0.5224, "rewards/accuracies": 0.0, "rewards/chosen": -9.085780143737793, "rewards/margins": -0.6114053726196289, "rewards/rejected": -8.474374771118164, "step": 8731 }, { "epoch": 1.93, "learning_rate": 8.178715829720374e-06, "logits/chosen": -1.5276974439620972, "logits/rejected": -1.598564624786377, "logps/chosen": -119.54440307617188, "logps/rejected": -127.88127136230469, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -1.4399384260177612, "rewards/margins": 4.063642978668213, "rewards/rejected": -5.503581523895264, "step": 8732 }, { "epoch": 1.93, "learning_rate": 8.177332126297928e-06, "logits/chosen": -1.332725167274475, "logits/rejected": -1.342675805091858, "logps/chosen": -186.11627197265625, "logps/rejected": -208.87176513671875, "loss": 0.0484, "rewards/accuracies": 1.0, "rewards/chosen": -8.065598487854004, "rewards/margins": 2.308065414428711, "rewards/rejected": -10.373663902282715, "step": 8733 }, { "epoch": 1.93, "learning_rate": 8.175948014595942e-06, "logits/chosen": -1.3523045778274536, "logits/rejected": -1.186556100845337, "logps/chosen": -165.42352294921875, "logps/rejected": -249.0010223388672, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 1.5333404541015625, "rewards/margins": 8.771060943603516, "rewards/rejected": -7.237720012664795, "step": 8734 }, { "epoch": 1.93, "learning_rate": 8.17456349479227e-06, "logits/chosen": -1.469036340713501, "logits/rejected": -1.4478963613510132, "logps/chosen": -73.01631164550781, "logps/rejected": -101.74075317382812, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -0.1486976593732834, "rewards/margins": 3.1342926025390625, "rewards/rejected": -3.2829902172088623, "step": 8735 }, { "epoch": 1.93, "learning_rate": 8.17317856706482e-06, "logits/chosen": -1.2339545488357544, "logits/rejected": -1.338850975036621, "logps/chosen": -212.43968200683594, "logps/rejected": -219.21910095214844, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.3859451413154602, "rewards/margins": 6.111627578735352, "rewards/rejected": -5.725682258605957, "step": 8736 }, { "epoch": 1.93, "learning_rate": 8.171793231591553e-06, "logits/chosen": -1.7037183046340942, "logits/rejected": -1.6291457414627075, "logps/chosen": -153.81216430664062, "logps/rejected": -223.9747314453125, "loss": 0.0484, "rewards/accuracies": 1.0, "rewards/chosen": -2.9140686988830566, "rewards/margins": 2.391763210296631, "rewards/rejected": -5.3058319091796875, "step": 8737 }, { "epoch": 1.93, "learning_rate": 8.170407488550482e-06, "logits/chosen": -1.3720961809158325, "logits/rejected": -1.314683437347412, "logps/chosen": -88.81863403320312, "logps/rejected": -156.08053588867188, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -2.581810474395752, "rewards/margins": 3.536332130432129, "rewards/rejected": -6.118142604827881, "step": 8738 }, { "epoch": 1.93, "learning_rate": 8.169021338119669e-06, "logits/chosen": -1.4776647090911865, "logits/rejected": -1.4366943836212158, "logps/chosen": -75.26756286621094, "logps/rejected": -115.60310363769531, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 0.5925583243370056, "rewards/margins": 4.601202011108398, "rewards/rejected": -4.008643627166748, "step": 8739 }, { "epoch": 1.93, "learning_rate": 8.167634780477231e-06, "logits/chosen": -1.3527220487594604, "logits/rejected": -1.241276502609253, "logps/chosen": -86.04837036132812, "logps/rejected": -251.080810546875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -1.0523452758789062, "rewards/margins": 3.7848010063171387, "rewards/rejected": -4.837146282196045, "step": 8740 }, { "epoch": 1.93, "learning_rate": 8.16624781580134e-06, "logits/chosen": -1.3584883213043213, "logits/rejected": -1.4341180324554443, "logps/chosen": -233.47451782226562, "logps/rejected": -174.04531860351562, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.6186355352401733, "rewards/margins": 3.680893898010254, "rewards/rejected": -5.299529552459717, "step": 8741 }, { "epoch": 1.93, "learning_rate": 8.164860444270217e-06, "logits/chosen": -1.4403599500656128, "logits/rejected": -1.4122411012649536, "logps/chosen": -169.4794921875, "logps/rejected": -146.5162353515625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 1.0322052240371704, "rewards/margins": 6.260389804840088, "rewards/rejected": -5.228184700012207, "step": 8742 }, { "epoch": 1.94, "learning_rate": 8.163472666062133e-06, "logits/chosen": -1.500875473022461, "logits/rejected": -1.4470454454421997, "logps/chosen": -142.97674560546875, "logps/rejected": -222.14273071289062, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.6369614601135254, "rewards/margins": 5.937756061553955, "rewards/rejected": -8.57471752166748, "step": 8743 }, { "epoch": 1.94, "learning_rate": 8.162084481355418e-06, "logits/chosen": -1.5527631044387817, "logits/rejected": -0.9109702110290527, "logps/chosen": -106.54795837402344, "logps/rejected": -775.6425170898438, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.214308261871338, "rewards/margins": 65.09168243408203, "rewards/rejected": -67.30599212646484, "step": 8744 }, { "epoch": 1.94, "learning_rate": 8.160695890328448e-06, "logits/chosen": -1.8399455547332764, "logits/rejected": -1.919417142868042, "logps/chosen": -132.1724853515625, "logps/rejected": -196.09072875976562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7781753540039062, "rewards/margins": 13.003621101379395, "rewards/rejected": -13.7817964553833, "step": 8745 }, { "epoch": 1.94, "learning_rate": 8.159306893159652e-06, "logits/chosen": -1.5522552728652954, "logits/rejected": -1.5653904676437378, "logps/chosen": -108.93551635742188, "logps/rejected": -107.43732452392578, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.1794418096542358, "rewards/margins": 5.7107977867126465, "rewards/rejected": -6.890239715576172, "step": 8746 }, { "epoch": 1.94, "learning_rate": 8.157917490027518e-06, "logits/chosen": -1.529554843902588, "logits/rejected": -1.541741132736206, "logps/chosen": -199.9407958984375, "logps/rejected": -351.15673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.1703979969024658, "rewards/margins": 13.961077690124512, "rewards/rejected": -12.790679931640625, "step": 8747 }, { "epoch": 1.94, "learning_rate": 8.156527681110576e-06, "logits/chosen": -1.1679317951202393, "logits/rejected": -1.263887882232666, "logps/chosen": -179.44667053222656, "logps/rejected": -142.0069580078125, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -6.2216997146606445, "rewards/margins": 2.8888072967529297, "rewards/rejected": -9.110507011413574, "step": 8748 }, { "epoch": 1.94, "learning_rate": 8.155137466587415e-06, "logits/chosen": -1.7175320386886597, "logits/rejected": -1.7793116569519043, "logps/chosen": -126.55486297607422, "logps/rejected": -213.10687255859375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -2.8602454662323, "rewards/margins": 7.75798225402832, "rewards/rejected": -10.6182279586792, "step": 8749 }, { "epoch": 1.94, "learning_rate": 8.153746846636675e-06, "logits/chosen": -1.7929718494415283, "logits/rejected": -1.7173395156860352, "logps/chosen": -238.3095703125, "logps/rejected": -250.29603576660156, "loss": 0.4816, "rewards/accuracies": 0.0, "rewards/chosen": 0.36723023653030396, "rewards/margins": -0.4733932614326477, "rewards/rejected": 0.8406234979629517, "step": 8750 }, { "epoch": 1.94, "learning_rate": 8.152355821437048e-06, "logits/chosen": -1.2855111360549927, "logits/rejected": -1.2671908140182495, "logps/chosen": -129.08224487304688, "logps/rejected": -203.90982055664062, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.02857360802590847, "rewards/margins": 10.496830940246582, "rewards/rejected": -10.468256950378418, "step": 8751 }, { "epoch": 1.94, "learning_rate": 8.150964391167273e-06, "logits/chosen": -1.4319225549697876, "logits/rejected": -1.412705659866333, "logps/chosen": -125.07894897460938, "logps/rejected": -213.677490234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8742828369140625, "rewards/margins": 6.03157377243042, "rewards/rejected": -6.905856609344482, "step": 8752 }, { "epoch": 1.94, "learning_rate": 8.149572556006151e-06, "logits/chosen": -1.3539804220199585, "logits/rejected": -1.4067039489746094, "logps/chosen": -131.79286193847656, "logps/rejected": -87.12288665771484, "loss": 0.0427, "rewards/accuracies": 1.0, "rewards/chosen": 1.3830032348632812, "rewards/margins": 2.5180916786193848, "rewards/rejected": -1.135088324546814, "step": 8753 }, { "epoch": 1.94, "learning_rate": 8.148180316132526e-06, "logits/chosen": -1.1793270111083984, "logits/rejected": -1.2144137620925903, "logps/chosen": -159.36297607421875, "logps/rejected": -100.46184539794922, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": 0.181060791015625, "rewards/margins": 2.969931125640869, "rewards/rejected": -2.788870334625244, "step": 8754 }, { "epoch": 1.94, "learning_rate": 8.146787671725299e-06, "logits/chosen": -1.3323286771774292, "logits/rejected": -1.4573391675949097, "logps/chosen": -98.33006286621094, "logps/rejected": -78.84342193603516, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": -1.7055107355117798, "rewards/margins": 2.9507651329040527, "rewards/rejected": -4.656275749206543, "step": 8755 }, { "epoch": 1.94, "learning_rate": 8.14539462296342e-06, "logits/chosen": -1.5978585481643677, "logits/rejected": -1.5308897495269775, "logps/chosen": -127.13307189941406, "logps/rejected": -258.12530517578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.1564620733261108, "rewards/margins": 8.092134475708008, "rewards/rejected": -9.24859619140625, "step": 8756 }, { "epoch": 1.94, "learning_rate": 8.144001170025894e-06, "logits/chosen": -1.4648323059082031, "logits/rejected": -1.425430417060852, "logps/chosen": -180.28529357910156, "logps/rejected": -292.739990234375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 1.6254287958145142, "rewards/margins": 6.855708122253418, "rewards/rejected": -5.230279445648193, "step": 8757 }, { "epoch": 1.94, "learning_rate": 8.142607313091775e-06, "logits/chosen": -1.1993035078048706, "logits/rejected": -1.0920617580413818, "logps/chosen": -212.97152709960938, "logps/rejected": -330.38848876953125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": 0.19769592583179474, "rewards/margins": 11.50593090057373, "rewards/rejected": -11.308235168457031, "step": 8758 }, { "epoch": 1.94, "learning_rate": 8.141213052340171e-06, "logits/chosen": -1.6088182926177979, "logits/rejected": -1.7054812908172607, "logps/chosen": -207.91444396972656, "logps/rejected": -158.1751251220703, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 1.120845079421997, "rewards/margins": 5.908114433288574, "rewards/rejected": -4.787269115447998, "step": 8759 }, { "epoch": 1.94, "learning_rate": 8.13981838795024e-06, "logits/chosen": -1.5640782117843628, "logits/rejected": -1.695780873298645, "logps/chosen": -155.29592895507812, "logps/rejected": -117.49188995361328, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 0.5921295285224915, "rewards/margins": 3.9373085498809814, "rewards/rejected": -3.3451790809631348, "step": 8760 }, { "epoch": 1.94, "learning_rate": 8.138423320101196e-06, "logits/chosen": -1.242899775505066, "logits/rejected": -1.200434923171997, "logps/chosen": -246.78733825683594, "logps/rejected": -341.94464111328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 1.282801866531372, "rewards/margins": 8.381712913513184, "rewards/rejected": -7.098910808563232, "step": 8761 }, { "epoch": 1.94, "learning_rate": 8.1370278489723e-06, "logits/chosen": -1.4915813207626343, "logits/rejected": -1.5351723432540894, "logps/chosen": -160.32211303710938, "logps/rejected": -187.54385375976562, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.357879638671875, "rewards/margins": 6.547459602355957, "rewards/rejected": -8.905339241027832, "step": 8762 }, { "epoch": 1.94, "learning_rate": 8.135631974742863e-06, "logits/chosen": -1.3657232522964478, "logits/rejected": -1.3283623456954956, "logps/chosen": -159.82382202148438, "logps/rejected": -186.92935180664062, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": -1.8203033208847046, "rewards/margins": 2.99212646484375, "rewards/rejected": -4.812429904937744, "step": 8763 }, { "epoch": 1.94, "learning_rate": 8.13423569759226e-06, "logits/chosen": -1.3794194459915161, "logits/rejected": -1.4197050333023071, "logps/chosen": -212.96697998046875, "logps/rejected": -145.27102661132812, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": 0.47151491045951843, "rewards/margins": 3.4571290016174316, "rewards/rejected": -2.985614061355591, "step": 8764 }, { "epoch": 1.94, "learning_rate": 8.132839017699901e-06, "logits/chosen": -1.6217557191848755, "logits/rejected": -1.6272132396697998, "logps/chosen": -108.08259582519531, "logps/rejected": -106.45396423339844, "loss": 0.3757, "rewards/accuracies": 0.0, "rewards/chosen": -2.411228895187378, "rewards/margins": -0.11324524879455566, "rewards/rejected": -2.2979836463928223, "step": 8765 }, { "epoch": 1.94, "learning_rate": 8.131441935245261e-06, "logits/chosen": -1.699521780014038, "logits/rejected": -1.7082220315933228, "logps/chosen": -157.3058624267578, "logps/rejected": -202.18264770507812, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -3.0979325771331787, "rewards/margins": 3.598828077316284, "rewards/rejected": -6.696760654449463, "step": 8766 }, { "epoch": 1.94, "learning_rate": 8.13004445040786e-06, "logits/chosen": -1.4035444259643555, "logits/rejected": -1.3256843090057373, "logps/chosen": -83.85810089111328, "logps/rejected": -240.31524658203125, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": -2.1236016750335693, "rewards/margins": 7.637330055236816, "rewards/rejected": -9.760931968688965, "step": 8767 }, { "epoch": 1.94, "learning_rate": 8.128646563367271e-06, "logits/chosen": -1.394383430480957, "logits/rejected": -1.27180814743042, "logps/chosen": -123.466796875, "logps/rejected": -411.1559143066406, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": 1.211567759513855, "rewards/margins": 14.34361457824707, "rewards/rejected": -13.132046699523926, "step": 8768 }, { "epoch": 1.94, "learning_rate": 8.12724827430312e-06, "logits/chosen": -1.8972691297531128, "logits/rejected": -1.9201866388320923, "logps/chosen": -160.15985107421875, "logps/rejected": -99.64051818847656, "loss": 0.1799, "rewards/accuracies": 1.0, "rewards/chosen": -0.39570313692092896, "rewards/margins": 0.8369888663291931, "rewards/rejected": -1.232692003250122, "step": 8769 }, { "epoch": 1.94, "learning_rate": 8.125849583395083e-06, "logits/chosen": -1.6443736553192139, "logits/rejected": -1.6092370748519897, "logps/chosen": -130.8160400390625, "logps/rejected": -144.1604766845703, "loss": 0.0772, "rewards/accuracies": 1.0, "rewards/chosen": -1.8456131219863892, "rewards/margins": 1.7977455854415894, "rewards/rejected": -3.6433587074279785, "step": 8770 }, { "epoch": 1.94, "learning_rate": 8.124450490822889e-06, "logits/chosen": -1.8868650197982788, "logits/rejected": -1.9827824831008911, "logps/chosen": -235.5651397705078, "logps/rejected": -248.23660278320312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.152598574757576, "rewards/margins": 9.59557819366455, "rewards/rejected": -9.748176574707031, "step": 8771 }, { "epoch": 1.94, "learning_rate": 8.123050996766317e-06, "logits/chosen": -1.5275119543075562, "logits/rejected": -1.5930134057998657, "logps/chosen": -207.39202880859375, "logps/rejected": -201.06655883789062, "loss": 0.08, "rewards/accuracies": 1.0, "rewards/chosen": -2.273275852203369, "rewards/margins": 1.7618255615234375, "rewards/rejected": -4.035101413726807, "step": 8772 }, { "epoch": 1.94, "learning_rate": 8.121651101405202e-06, "logits/chosen": -1.1038575172424316, "logits/rejected": -1.1757559776306152, "logps/chosen": -193.47381591796875, "logps/rejected": -121.10041809082031, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -2.126141309738159, "rewards/margins": 3.1286890506744385, "rewards/rejected": -5.254830360412598, "step": 8773 }, { "epoch": 1.94, "learning_rate": 8.120250804919424e-06, "logits/chosen": -1.4908543825149536, "logits/rejected": -1.4664418697357178, "logps/chosen": -99.1822509765625, "logps/rejected": -122.78900146484375, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -2.027919054031372, "rewards/margins": 3.803724527359009, "rewards/rejected": -5.831643581390381, "step": 8774 }, { "epoch": 1.94, "learning_rate": 8.118850107488916e-06, "logits/chosen": -1.3232539892196655, "logits/rejected": -1.413098931312561, "logps/chosen": -238.44239807128906, "logps/rejected": -205.61532592773438, "loss": 0.2141, "rewards/accuracies": 1.0, "rewards/chosen": -3.7871978282928467, "rewards/margins": 2.627002000808716, "rewards/rejected": -6.4141998291015625, "step": 8775 }, { "epoch": 1.94, "learning_rate": 8.117449009293668e-06, "logits/chosen": -1.4160116910934448, "logits/rejected": -1.4160116910934448, "logps/chosen": -285.53851318359375, "logps/rejected": -285.53851318359375, "loss": 0.349, "rewards/accuracies": 0.0, "rewards/chosen": -3.8316543102264404, "rewards/margins": 0.0, "rewards/rejected": -3.8316543102264404, "step": 8776 }, { "epoch": 1.94, "learning_rate": 8.116047510513718e-06, "logits/chosen": -1.8515688180923462, "logits/rejected": -1.7090157270431519, "logps/chosen": -137.24191284179688, "logps/rejected": -278.55218505859375, "loss": 0.083, "rewards/accuracies": 1.0, "rewards/chosen": -2.4499313831329346, "rewards/margins": 1.7118303775787354, "rewards/rejected": -4.16176176071167, "step": 8777 }, { "epoch": 1.94, "learning_rate": 8.114645611329152e-06, "logits/chosen": -1.2619614601135254, "logits/rejected": -0.9326339960098267, "logps/chosen": -183.88436889648438, "logps/rejected": -646.9755859375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.914965808391571, "rewards/margins": 29.905460357666016, "rewards/rejected": -28.990493774414062, "step": 8778 }, { "epoch": 1.94, "learning_rate": 8.113243311920113e-06, "logits/chosen": -1.4699130058288574, "logits/rejected": -1.3155887126922607, "logps/chosen": -85.08106994628906, "logps/rejected": -194.5633544921875, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -2.3825714588165283, "rewards/margins": 7.812420845031738, "rewards/rejected": -10.194992065429688, "step": 8779 }, { "epoch": 1.94, "learning_rate": 8.111840612466792e-06, "logits/chosen": -1.754425048828125, "logits/rejected": -1.705852746963501, "logps/chosen": -85.75544738769531, "logps/rejected": -182.13235473632812, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.9982933402061462, "rewards/margins": 6.969918727874756, "rewards/rejected": -7.968212127685547, "step": 8780 }, { "epoch": 1.94, "learning_rate": 8.110437513149433e-06, "logits/chosen": -1.5236155986785889, "logits/rejected": -1.0304503440856934, "logps/chosen": -215.15322875976562, "logps/rejected": -725.7265625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": 1.5749984979629517, "rewards/margins": 60.50297546386719, "rewards/rejected": -58.927978515625, "step": 8781 }, { "epoch": 1.94, "learning_rate": 8.109034014148331e-06, "logits/chosen": -1.6013638973236084, "logits/rejected": -1.6510224342346191, "logps/chosen": -205.349365234375, "logps/rejected": -221.9364013671875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 3.1999053955078125, "rewards/margins": 10.48071002960205, "rewards/rejected": -7.280804634094238, "step": 8782 }, { "epoch": 1.94, "learning_rate": 8.107630115643832e-06, "logits/chosen": -1.6168893575668335, "logits/rejected": -1.514660358428955, "logps/chosen": -87.39726257324219, "logps/rejected": -186.60418701171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7609764337539673, "rewards/margins": 8.121352195739746, "rewards/rejected": -8.882328987121582, "step": 8783 }, { "epoch": 1.94, "learning_rate": 8.106225817816333e-06, "logits/chosen": -1.7170026302337646, "logits/rejected": -1.7210590839385986, "logps/chosen": -112.09375, "logps/rejected": -178.34210205078125, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": 0.07523498684167862, "rewards/margins": 5.950048923492432, "rewards/rejected": -5.874814033508301, "step": 8784 }, { "epoch": 1.94, "learning_rate": 8.104821120846287e-06, "logits/chosen": -1.2340375185012817, "logits/rejected": -1.4085263013839722, "logps/chosen": -222.37924194335938, "logps/rejected": -170.06936645507812, "loss": 0.8552, "rewards/accuracies": 1.0, "rewards/chosen": -1.1273956298828125, "rewards/margins": 4.797069072723389, "rewards/rejected": -5.924464702606201, "step": 8785 }, { "epoch": 1.94, "learning_rate": 8.103416024914186e-06, "logits/chosen": -1.3692667484283447, "logits/rejected": -1.3463256359100342, "logps/chosen": -159.4404296875, "logps/rejected": -165.4503173828125, "loss": 0.1449, "rewards/accuracies": 1.0, "rewards/chosen": -2.1659576892852783, "rewards/margins": 1.09014892578125, "rewards/rejected": -3.2561066150665283, "step": 8786 }, { "epoch": 1.94, "learning_rate": 8.102010530200589e-06, "logits/chosen": -1.6763131618499756, "logits/rejected": -1.7321110963821411, "logps/chosen": -117.22200012207031, "logps/rejected": -112.87016296386719, "loss": 0.2495, "rewards/accuracies": 1.0, "rewards/chosen": -2.524357557296753, "rewards/margins": 0.7079315185546875, "rewards/rejected": -3.2322890758514404, "step": 8787 }, { "epoch": 1.95, "learning_rate": 8.100604636886095e-06, "logits/chosen": -1.861794352531433, "logits/rejected": -1.8300772905349731, "logps/chosen": -92.54704284667969, "logps/rejected": -223.30764770507812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.30950164794921875, "rewards/margins": 9.389810562133789, "rewards/rejected": -9.699312210083008, "step": 8788 }, { "epoch": 1.95, "learning_rate": 8.09919834515136e-06, "logits/chosen": -1.6751899719238281, "logits/rejected": -1.5429123640060425, "logps/chosen": -143.98660278320312, "logps/rejected": -366.11700439453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.084920644760132, "rewards/margins": 9.603592872619629, "rewards/rejected": -11.68851375579834, "step": 8789 }, { "epoch": 1.95, "learning_rate": 8.097791655177085e-06, "logits/chosen": -1.8995096683502197, "logits/rejected": -1.9030309915542603, "logps/chosen": -131.47647094726562, "logps/rejected": -127.369140625, "loss": 0.6467, "rewards/accuracies": 0.0, "rewards/chosen": -4.657563209533691, "rewards/margins": -0.9727678298950195, "rewards/rejected": -3.684795379638672, "step": 8790 }, { "epoch": 1.95, "learning_rate": 8.096384567144033e-06, "logits/chosen": -1.6559349298477173, "logits/rejected": -1.6551636457443237, "logps/chosen": -123.66131591796875, "logps/rejected": -244.759033203125, "loss": 0.3317, "rewards/accuracies": 1.0, "rewards/chosen": -0.9314674735069275, "rewards/margins": 13.816558837890625, "rewards/rejected": -14.748025894165039, "step": 8791 }, { "epoch": 1.95, "learning_rate": 8.094977081233006e-06, "logits/chosen": -1.5250210762023926, "logits/rejected": -1.4551126956939697, "logps/chosen": -71.1037826538086, "logps/rejected": -247.8592529296875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 0.48095399141311646, "rewards/margins": 9.943931579589844, "rewards/rejected": -9.462977409362793, "step": 8792 }, { "epoch": 1.95, "learning_rate": 8.093569197624864e-06, "logits/chosen": -1.572265386581421, "logits/rejected": -1.584579348564148, "logps/chosen": -125.99610900878906, "logps/rejected": -116.90486145019531, "loss": 0.3516, "rewards/accuracies": 0.0, "rewards/chosen": -2.1957809925079346, "rewards/margins": -0.01996922492980957, "rewards/rejected": -2.175811767578125, "step": 8793 }, { "epoch": 1.95, "learning_rate": 8.092160916500515e-06, "logits/chosen": -1.7888113260269165, "logits/rejected": -1.7842429876327515, "logps/chosen": -132.7945556640625, "logps/rejected": -164.5763397216797, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.1776679754257202, "rewards/margins": 7.362193584442139, "rewards/rejected": -8.539861679077148, "step": 8794 }, { "epoch": 1.95, "learning_rate": 8.090752238040925e-06, "logits/chosen": -1.9502136707305908, "logits/rejected": -1.9041926860809326, "logps/chosen": -119.77442169189453, "logps/rejected": -330.408935546875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.1892753690481186, "rewards/margins": 13.896439552307129, "rewards/rejected": -14.085715293884277, "step": 8795 }, { "epoch": 1.95, "learning_rate": 8.0893431624271e-06, "logits/chosen": -1.5848909616470337, "logits/rejected": -1.6069062948226929, "logps/chosen": -84.55352020263672, "logps/rejected": -95.8402099609375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.022365570068359375, "rewards/margins": 4.919247627258301, "rewards/rejected": -4.94161319732666, "step": 8796 }, { "epoch": 1.95, "learning_rate": 8.087933689840107e-06, "logits/chosen": -1.4554808139801025, "logits/rejected": -1.5085862874984741, "logps/chosen": -184.4187469482422, "logps/rejected": -146.01571655273438, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5363494753837585, "rewards/margins": 5.939010143280029, "rewards/rejected": -6.4753594398498535, "step": 8797 }, { "epoch": 1.95, "learning_rate": 8.086523820461057e-06, "logits/chosen": -1.5704519748687744, "logits/rejected": -1.5486140251159668, "logps/chosen": -93.95005798339844, "logps/rejected": -150.29222106933594, "loss": 0.0985, "rewards/accuracies": 1.0, "rewards/chosen": -5.859029293060303, "rewards/margins": 1.5282020568847656, "rewards/rejected": -7.387231349945068, "step": 8798 }, { "epoch": 1.95, "learning_rate": 8.085113554471115e-06, "logits/chosen": -1.6370635032653809, "logits/rejected": -1.6159918308258057, "logps/chosen": -142.74652099609375, "logps/rejected": -208.3220977783203, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.166249081492424, "rewards/margins": 11.022692680358887, "rewards/rejected": -10.856443405151367, "step": 8799 }, { "epoch": 1.95, "learning_rate": 8.083702892051499e-06, "logits/chosen": -1.5306241512298584, "logits/rejected": -1.5083192586898804, "logps/chosen": -81.9592056274414, "logps/rejected": -127.46375274658203, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -1.6987007856369019, "rewards/margins": 3.1763601303100586, "rewards/rejected": -4.87506103515625, "step": 8800 }, { "epoch": 1.95, "learning_rate": 8.082291833383475e-06, "logits/chosen": -1.5033619403839111, "logits/rejected": -1.530874490737915, "logps/chosen": -168.73886108398438, "logps/rejected": -153.60696411132812, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -2.62400221824646, "rewards/margins": 3.5383107662200928, "rewards/rejected": -6.162312984466553, "step": 8801 }, { "epoch": 1.95, "learning_rate": 8.080880378648359e-06, "logits/chosen": -1.4947727918624878, "logits/rejected": -1.4706975221633911, "logps/chosen": -121.90013122558594, "logps/rejected": -123.04640197753906, "loss": 0.2599, "rewards/accuracies": 1.0, "rewards/chosen": -2.6996216773986816, "rewards/margins": 0.4068145751953125, "rewards/rejected": -3.106436252593994, "step": 8802 }, { "epoch": 1.95, "learning_rate": 8.079468528027519e-06, "logits/chosen": -1.3507194519042969, "logits/rejected": -1.3613903522491455, "logps/chosen": -123.60041809082031, "logps/rejected": -183.7245330810547, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 2.4035019874572754, "rewards/margins": 7.993811130523682, "rewards/rejected": -5.590309143066406, "step": 8803 }, { "epoch": 1.95, "learning_rate": 8.078056281702378e-06, "logits/chosen": -1.6226897239685059, "logits/rejected": -1.526564598083496, "logps/chosen": -198.20240783691406, "logps/rejected": -259.02252197265625, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": 1.4722031354904175, "rewards/margins": 4.410182476043701, "rewards/rejected": -2.937979221343994, "step": 8804 }, { "epoch": 1.95, "learning_rate": 8.076643639854405e-06, "logits/chosen": -1.5999399423599243, "logits/rejected": -1.5599011182785034, "logps/chosen": -128.77261352539062, "logps/rejected": -217.87548828125, "loss": 0.7211, "rewards/accuracies": 0.0, "rewards/chosen": -0.36131593585014343, "rewards/margins": -1.1715179681777954, "rewards/rejected": 0.8102020621299744, "step": 8805 }, { "epoch": 1.95, "learning_rate": 8.075230602665118e-06, "logits/chosen": -1.2531180381774902, "logits/rejected": -1.261035680770874, "logps/chosen": -249.74880981445312, "logps/rejected": -233.6572723388672, "loss": 0.4115, "rewards/accuracies": 0.0, "rewards/chosen": -10.003539085388184, "rewards/margins": -0.1745758056640625, "rewards/rejected": -9.828963279724121, "step": 8806 }, { "epoch": 1.95, "learning_rate": 8.073817170316093e-06, "logits/chosen": -1.173337697982788, "logits/rejected": -1.167785406112671, "logps/chosen": -64.94329833984375, "logps/rejected": -203.72921752929688, "loss": 0.0988, "rewards/accuracies": 1.0, "rewards/chosen": -0.2839248776435852, "rewards/margins": 7.767313003540039, "rewards/rejected": -8.051238059997559, "step": 8807 }, { "epoch": 1.95, "learning_rate": 8.07240334298895e-06, "logits/chosen": -1.2877837419509888, "logits/rejected": -1.2877837419509888, "logps/chosen": -96.53765106201172, "logps/rejected": -96.53765106201172, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -2.7622430324554443, "rewards/margins": 0.0, "rewards/rejected": -2.7622430324554443, "step": 8808 }, { "epoch": 1.95, "learning_rate": 8.070989120865362e-06, "logits/chosen": -1.216263771057129, "logits/rejected": -1.2062411308288574, "logps/chosen": -196.24954223632812, "logps/rejected": -196.4004669189453, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -5.519670009613037, "rewards/margins": 3.872352123260498, "rewards/rejected": -9.392022132873535, "step": 8809 }, { "epoch": 1.95, "learning_rate": 8.069574504127058e-06, "logits/chosen": -1.4040406942367554, "logits/rejected": -1.3440560102462769, "logps/chosen": -92.12718200683594, "logps/rejected": -137.324462890625, "loss": 0.2396, "rewards/accuracies": 1.0, "rewards/chosen": -0.2806945741176605, "rewards/margins": 5.295276165008545, "rewards/rejected": -5.575970649719238, "step": 8810 }, { "epoch": 1.95, "learning_rate": 8.068159492955806e-06, "logits/chosen": -1.5225332975387573, "logits/rejected": -1.6417322158813477, "logps/chosen": -189.32281494140625, "logps/rejected": -142.72988891601562, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.20521239936351776, "rewards/margins": 6.207622051239014, "rewards/rejected": -6.0024094581604, "step": 8811 }, { "epoch": 1.95, "learning_rate": 8.066744087533436e-06, "logits/chosen": -1.7918236255645752, "logits/rejected": -1.7248671054840088, "logps/chosen": -94.66795349121094, "logps/rejected": -235.89404296875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.60040283203125, "rewards/margins": 6.5852861404418945, "rewards/rejected": -8.185688972473145, "step": 8812 }, { "epoch": 1.95, "learning_rate": 8.065328288041823e-06, "logits/chosen": -1.289625883102417, "logits/rejected": -1.3319315910339355, "logps/chosen": -248.34628295898438, "logps/rejected": -228.92608642578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 3.9876129627227783, "rewards/margins": 9.014424324035645, "rewards/rejected": -5.026811122894287, "step": 8813 }, { "epoch": 1.95, "learning_rate": 8.063912094662893e-06, "logits/chosen": -1.563183307647705, "logits/rejected": -1.3513076305389404, "logps/chosen": -232.57833862304688, "logps/rejected": -1183.647705078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.09446869045495987, "rewards/margins": 107.678466796875, "rewards/rejected": -107.58399963378906, "step": 8814 }, { "epoch": 1.95, "learning_rate": 8.062495507578628e-06, "logits/chosen": -1.5490050315856934, "logits/rejected": -1.453576683998108, "logps/chosen": -98.96826171875, "logps/rejected": -290.187255859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6769325137138367, "rewards/margins": 15.916489601135254, "rewards/rejected": -16.593421936035156, "step": 8815 }, { "epoch": 1.95, "learning_rate": 8.061078526971048e-06, "logits/chosen": -1.4279996156692505, "logits/rejected": -1.4632493257522583, "logps/chosen": -185.47998046875, "logps/rejected": -124.04899597167969, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013641357654705644, "rewards/margins": 4.405585289001465, "rewards/rejected": -4.406949520111084, "step": 8816 }, { "epoch": 1.95, "learning_rate": 8.059661153022236e-06, "logits/chosen": -1.5037837028503418, "logits/rejected": -1.516810655593872, "logps/chosen": -206.6112518310547, "logps/rejected": -146.2864990234375, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -0.3218338191509247, "rewards/margins": 5.422525882720947, "rewards/rejected": -5.744359493255615, "step": 8817 }, { "epoch": 1.95, "learning_rate": 8.058243385914324e-06, "logits/chosen": -1.835186243057251, "logits/rejected": -1.9553855657577515, "logps/chosen": -255.50042724609375, "logps/rejected": -246.927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.9632935523986816, "rewards/margins": 12.22758674621582, "rewards/rejected": -9.26429271697998, "step": 8818 }, { "epoch": 1.95, "learning_rate": 8.056825225829486e-06, "logits/chosen": -1.719071388244629, "logits/rejected": -1.666918158531189, "logps/chosen": -128.56021118164062, "logps/rejected": -198.51712036132812, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.1820952892303467, "rewards/margins": 7.307795524597168, "rewards/rejected": -9.489891052246094, "step": 8819 }, { "epoch": 1.95, "learning_rate": 8.055406672949957e-06, "logits/chosen": -1.5990607738494873, "logits/rejected": -1.5614171028137207, "logps/chosen": -107.42768859863281, "logps/rejected": -156.44131469726562, "loss": 0.0315, "rewards/accuracies": 1.0, "rewards/chosen": -0.2760482728481293, "rewards/margins": 2.7457468509674072, "rewards/rejected": -3.0217950344085693, "step": 8820 }, { "epoch": 1.95, "learning_rate": 8.053987727458013e-06, "logits/chosen": -2.1791462898254395, "logits/rejected": -1.8586347103118896, "logps/chosen": -94.12861633300781, "logps/rejected": -487.73583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1632423400878906, "rewards/margins": 17.13552474975586, "rewards/rejected": -18.29876708984375, "step": 8821 }, { "epoch": 1.95, "learning_rate": 8.05256838953599e-06, "logits/chosen": -1.8950449228286743, "logits/rejected": -1.7023651599884033, "logps/chosen": -95.39454650878906, "logps/rejected": -398.62115478515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.868417501449585, "rewards/margins": 10.891940116882324, "rewards/rejected": -13.760357856750488, "step": 8822 }, { "epoch": 1.95, "learning_rate": 8.051148659366265e-06, "logits/chosen": -1.4318873882293701, "logits/rejected": -1.5199795961380005, "logps/chosen": -147.6095733642578, "logps/rejected": -143.15386962890625, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": 2.7821426391601562, "rewards/margins": 4.129395961761475, "rewards/rejected": -1.347253441810608, "step": 8823 }, { "epoch": 1.95, "learning_rate": 8.049728537131275e-06, "logits/chosen": -1.2631930112838745, "logits/rejected": -1.2588074207305908, "logps/chosen": -31.09337043762207, "logps/rejected": -88.22988891601562, "loss": 0.3708, "rewards/accuracies": 1.0, "rewards/chosen": -1.5226742029190063, "rewards/margins": 3.0022525787353516, "rewards/rejected": -4.524926662445068, "step": 8824 }, { "epoch": 1.95, "learning_rate": 8.048308023013498e-06, "logits/chosen": -1.6341874599456787, "logits/rejected": -1.252333402633667, "logps/chosen": -115.25482940673828, "logps/rejected": -746.9901123046875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.6227119565010071, "rewards/margins": 65.81157684326172, "rewards/rejected": -66.43428802490234, "step": 8825 }, { "epoch": 1.95, "learning_rate": 8.046887117195467e-06, "logits/chosen": -2.004776954650879, "logits/rejected": -1.990465521812439, "logps/chosen": -71.96444702148438, "logps/rejected": -93.31389617919922, "loss": 1.4287, "rewards/accuracies": 0.0, "rewards/chosen": -5.384346008300781, "rewards/margins": -2.79701828956604, "rewards/rejected": -2.587327718734741, "step": 8826 }, { "epoch": 1.95, "learning_rate": 8.045465819859766e-06, "logits/chosen": -1.4890904426574707, "logits/rejected": -1.489992380142212, "logps/chosen": -94.52110290527344, "logps/rejected": -147.79632568359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.8931634426116943, "rewards/margins": 8.08327579498291, "rewards/rejected": -10.976439476013184, "step": 8827 }, { "epoch": 1.95, "learning_rate": 8.044044131189029e-06, "logits/chosen": -1.256548523902893, "logits/rejected": -1.256548523902893, "logps/chosen": -168.55560302734375, "logps/rejected": -168.55560302734375, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -12.65848445892334, "rewards/margins": 0.0, "rewards/rejected": -12.65848445892334, "step": 8828 }, { "epoch": 1.95, "learning_rate": 8.042622051365938e-06, "logits/chosen": -1.6086241006851196, "logits/rejected": -1.6451362371444702, "logps/chosen": -166.27452087402344, "logps/rejected": -222.44114685058594, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": 0.2438400238752365, "rewards/margins": 10.925020217895508, "rewards/rejected": -10.681180000305176, "step": 8829 }, { "epoch": 1.95, "learning_rate": 8.041199580573229e-06, "logits/chosen": -1.4024561643600464, "logits/rejected": -1.3428423404693604, "logps/chosen": -224.92227172851562, "logps/rejected": -250.56112670898438, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.5754852294921875, "rewards/margins": 5.705867290496826, "rewards/rejected": -5.130382061004639, "step": 8830 }, { "epoch": 1.95, "learning_rate": 8.039776718993683e-06, "logits/chosen": -1.7469122409820557, "logits/rejected": -1.7440283298492432, "logps/chosen": -137.7874755859375, "logps/rejected": -174.16224670410156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.2071640491485596, "rewards/margins": 7.8388519287109375, "rewards/rejected": -9.046015739440918, "step": 8831 }, { "epoch": 1.95, "learning_rate": 8.038353466810137e-06, "logits/chosen": -1.3451485633850098, "logits/rejected": -1.2756506204605103, "logps/chosen": -78.70353698730469, "logps/rejected": -205.71627807617188, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -3.5940823554992676, "rewards/margins": 6.014204502105713, "rewards/rejected": -9.60828685760498, "step": 8832 }, { "epoch": 1.96, "learning_rate": 8.036929824205476e-06, "logits/chosen": -1.6381951570510864, "logits/rejected": -1.7401255369186401, "logps/chosen": -173.27867126464844, "logps/rejected": -171.57662963867188, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.8619552850723267, "rewards/margins": 8.345412254333496, "rewards/rejected": -9.207367897033691, "step": 8833 }, { "epoch": 1.96, "learning_rate": 8.03550579136263e-06, "logits/chosen": -1.3538645505905151, "logits/rejected": -1.3444663286209106, "logps/chosen": -169.08668518066406, "logps/rejected": -155.79168701171875, "loss": 0.4577, "rewards/accuracies": 0.0, "rewards/chosen": -5.520062446594238, "rewards/margins": -0.40386152267456055, "rewards/rejected": -5.116200923919678, "step": 8834 }, { "epoch": 1.96, "learning_rate": 8.03408136846459e-06, "logits/chosen": -1.4034714698791504, "logits/rejected": -1.4034714698791504, "logps/chosen": -165.0609130859375, "logps/rejected": -165.0609130859375, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -3.8690247535705566, "rewards/margins": 0.0, "rewards/rejected": -3.8690247535705566, "step": 8835 }, { "epoch": 1.96, "learning_rate": 8.032656555694388e-06, "logits/chosen": -1.7384694814682007, "logits/rejected": -1.801556944847107, "logps/chosen": -205.58905029296875, "logps/rejected": -219.46722412109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.5408722162246704, "rewards/margins": 9.246413230895996, "rewards/rejected": -8.705540657043457, "step": 8836 }, { "epoch": 1.96, "learning_rate": 8.031231353235104e-06, "logits/chosen": -1.422391414642334, "logits/rejected": -1.2941811084747314, "logps/chosen": -137.84268188476562, "logps/rejected": -181.70713806152344, "loss": 0.1177, "rewards/accuracies": 1.0, "rewards/chosen": -2.4688918590545654, "rewards/margins": 1.3276262283325195, "rewards/rejected": -3.796518087387085, "step": 8837 }, { "epoch": 1.96, "learning_rate": 8.029805761269881e-06, "logits/chosen": -1.5435534715652466, "logits/rejected": -1.5335631370544434, "logps/chosen": -49.207191467285156, "logps/rejected": -128.63201904296875, "loss": 0.1369, "rewards/accuracies": 1.0, "rewards/chosen": -1.8841747045516968, "rewards/margins": 1.1557148694992065, "rewards/rejected": -3.0398895740509033, "step": 8838 }, { "epoch": 1.96, "learning_rate": 8.028379779981902e-06, "logits/chosen": -1.6514918804168701, "logits/rejected": -1.5970491170883179, "logps/chosen": -157.00732421875, "logps/rejected": -241.99807739257812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.078650951385498, "rewards/margins": 6.0925984382629395, "rewards/rejected": -12.171249389648438, "step": 8839 }, { "epoch": 1.96, "learning_rate": 8.026953409554402e-06, "logits/chosen": -1.197884440422058, "logits/rejected": -1.1390374898910522, "logps/chosen": -81.69023132324219, "logps/rejected": -160.70156860351562, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.094293236732483, "rewards/margins": 5.32362699508667, "rewards/rejected": -6.417920112609863, "step": 8840 }, { "epoch": 1.96, "learning_rate": 8.025526650170665e-06, "logits/chosen": -1.751143455505371, "logits/rejected": -1.7064199447631836, "logps/chosen": -104.54124450683594, "logps/rejected": -184.66116333007812, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -2.6968209743499756, "rewards/margins": 4.323818206787109, "rewards/rejected": -7.020638942718506, "step": 8841 }, { "epoch": 1.96, "learning_rate": 8.024099502014024e-06, "logits/chosen": -1.3440258502960205, "logits/rejected": -1.262776494026184, "logps/chosen": -136.86520385742188, "logps/rejected": -203.6090087890625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 3.726126194000244, "rewards/margins": 6.750909805297852, "rewards/rejected": -3.0247833728790283, "step": 8842 }, { "epoch": 1.96, "learning_rate": 8.02267196526787e-06, "logits/chosen": -1.549617052078247, "logits/rejected": -1.5030310153961182, "logps/chosen": -89.45476531982422, "logps/rejected": -246.4451904296875, "loss": 0.6484, "rewards/accuracies": 1.0, "rewards/chosen": -0.9782455563545227, "rewards/margins": 8.157539367675781, "rewards/rejected": -9.135785102844238, "step": 8843 }, { "epoch": 1.96, "learning_rate": 8.021244040115634e-06, "logits/chosen": -1.5987375974655151, "logits/rejected": -1.5888854265213013, "logps/chosen": -110.05384826660156, "logps/rejected": -139.70669555664062, "loss": 0.505, "rewards/accuracies": 0.0, "rewards/chosen": -1.4086074829101562, "rewards/margins": -0.5570006966590881, "rewards/rejected": -0.8516067862510681, "step": 8844 }, { "epoch": 1.96, "learning_rate": 8.019815726740801e-06, "logits/chosen": -1.5743281841278076, "logits/rejected": -1.6280744075775146, "logps/chosen": -121.40097045898438, "logps/rejected": -141.32904052734375, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": 1.0061310529708862, "rewards/margins": 6.3029465675354, "rewards/rejected": -5.296815395355225, "step": 8845 }, { "epoch": 1.96, "learning_rate": 8.018387025326906e-06, "logits/chosen": -1.1763790845870972, "logits/rejected": -1.1682544946670532, "logps/chosen": -296.11627197265625, "logps/rejected": -348.1358947753906, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.019352912902832, "rewards/margins": 5.747840881347656, "rewards/rejected": -10.767193794250488, "step": 8846 }, { "epoch": 1.96, "learning_rate": 8.016957936057535e-06, "logits/chosen": -1.8077692985534668, "logits/rejected": -1.8848140239715576, "logps/chosen": -218.9666290283203, "logps/rejected": -153.73033142089844, "loss": 0.1113, "rewards/accuracies": 1.0, "rewards/chosen": -8.16960620880127, "rewards/margins": 1.3910799026489258, "rewards/rejected": -9.560686111450195, "step": 8847 }, { "epoch": 1.96, "learning_rate": 8.015528459116321e-06, "logits/chosen": -1.4199424982070923, "logits/rejected": -1.5732272863388062, "logps/chosen": -138.106201171875, "logps/rejected": -172.76466369628906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.748028516769409, "rewards/margins": 9.541927337646484, "rewards/rejected": -12.289956092834473, "step": 8848 }, { "epoch": 1.96, "learning_rate": 8.014098594686951e-06, "logits/chosen": -1.7515085935592651, "logits/rejected": -1.4369059801101685, "logps/chosen": -98.90794372558594, "logps/rejected": -275.9703369140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7204055786132812, "rewards/margins": 7.764281272888184, "rewards/rejected": -8.484686851501465, "step": 8849 }, { "epoch": 1.96, "learning_rate": 8.012668342953155e-06, "logits/chosen": -1.1531689167022705, "logits/rejected": -1.1573556661605835, "logps/chosen": -78.3427505493164, "logps/rejected": -133.55950927734375, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/chosen": -1.2014755010604858, "rewards/margins": 2.3884873390197754, "rewards/rejected": -3.5899627208709717, "step": 8850 }, { "epoch": 1.96, "learning_rate": 8.011237704098721e-06, "logits/chosen": -1.5647062063217163, "logits/rejected": -1.2120991945266724, "logps/chosen": -93.49759674072266, "logps/rejected": -876.1959838867188, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -3.3299400806427, "rewards/margins": 76.87899780273438, "rewards/rejected": -80.20893859863281, "step": 8851 }, { "epoch": 1.96, "learning_rate": 8.00980667830748e-06, "logits/chosen": -1.5395228862762451, "logits/rejected": -1.6678555011749268, "logps/chosen": -219.34439086914062, "logps/rejected": -193.40933227539062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 4.017663478851318, "rewards/margins": 10.665916442871094, "rewards/rejected": -6.648252964019775, "step": 8852 }, { "epoch": 1.96, "learning_rate": 8.008375265763317e-06, "logits/chosen": -1.424514889717102, "logits/rejected": -1.44208824634552, "logps/chosen": -220.30160522460938, "logps/rejected": -238.9988250732422, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.348924398422241, "rewards/margins": 15.708741188049316, "rewards/rejected": -12.359816551208496, "step": 8853 }, { "epoch": 1.96, "learning_rate": 8.006943466650163e-06, "logits/chosen": -1.9601987600326538, "logits/rejected": -1.97035551071167, "logps/chosen": -126.38810729980469, "logps/rejected": -182.58766174316406, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -2.320269823074341, "rewards/margins": 3.723247766494751, "rewards/rejected": -6.043517589569092, "step": 8854 }, { "epoch": 1.96, "learning_rate": 8.005511281152004e-06, "logits/chosen": -1.522506594657898, "logits/rejected": -1.4617341756820679, "logps/chosen": -152.9068603515625, "logps/rejected": -203.85324096679688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.32685548067092896, "rewards/margins": 10.62344741821289, "rewards/rejected": -10.950303077697754, "step": 8855 }, { "epoch": 1.96, "learning_rate": 8.004078709452869e-06, "logits/chosen": -1.3366386890411377, "logits/rejected": -1.2574725151062012, "logps/chosen": -163.51510620117188, "logps/rejected": -367.6485595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4695678651332855, "rewards/margins": 12.680377006530762, "rewards/rejected": -13.149945259094238, "step": 8856 }, { "epoch": 1.96, "learning_rate": 8.002645751736841e-06, "logits/chosen": -1.2384848594665527, "logits/rejected": -0.5992823839187622, "logps/chosen": -198.52346801757812, "logps/rejected": -1024.931396484375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.4405456483364105, "rewards/margins": 83.02547454833984, "rewards/rejected": -83.46601867675781, "step": 8857 }, { "epoch": 1.96, "learning_rate": 8.001212408188052e-06, "logits/chosen": -1.4102113246917725, "logits/rejected": -1.5177329778671265, "logps/chosen": -111.60535430908203, "logps/rejected": -129.72911071777344, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.397650957107544, "rewards/margins": 4.980386734008789, "rewards/rejected": -6.378037452697754, "step": 8858 }, { "epoch": 1.96, "learning_rate": 7.999778678990685e-06, "logits/chosen": -1.7771313190460205, "logits/rejected": -1.7219178676605225, "logps/chosen": -76.34965515136719, "logps/rejected": -185.37384033203125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.8172378540039062, "rewards/margins": 5.021632671356201, "rewards/rejected": -5.838870525360107, "step": 8859 }, { "epoch": 1.96, "learning_rate": 7.998344564328967e-06, "logits/chosen": -1.2411909103393555, "logits/rejected": -1.4313280582427979, "logps/chosen": -160.65321350097656, "logps/rejected": -173.07894897460938, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": 0.9374847412109375, "rewards/margins": 9.928306579589844, "rewards/rejected": -8.990821838378906, "step": 8860 }, { "epoch": 1.96, "learning_rate": 7.996910064387181e-06, "logits/chosen": -1.2547122240066528, "logits/rejected": -1.2439950704574585, "logps/chosen": -223.68231201171875, "logps/rejected": -260.594482421875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.960522472858429, "rewards/margins": 5.144094944000244, "rewards/rejected": -6.104617595672607, "step": 8861 }, { "epoch": 1.96, "learning_rate": 7.995475179349657e-06, "logits/chosen": -1.3167953491210938, "logits/rejected": -1.3544514179229736, "logps/chosen": -204.9632110595703, "logps/rejected": -123.45454406738281, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.511627197265625, "rewards/margins": 6.897636413574219, "rewards/rejected": -9.409263610839844, "step": 8862 }, { "epoch": 1.96, "learning_rate": 7.994039909400773e-06, "logits/chosen": -1.6763173341751099, "logits/rejected": -1.5754085779190063, "logps/chosen": -94.79243469238281, "logps/rejected": -204.7291717529297, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": 0.45589980483055115, "rewards/margins": 3.198618173599243, "rewards/rejected": -2.742718458175659, "step": 8863 }, { "epoch": 1.96, "learning_rate": 7.992604254724957e-06, "logits/chosen": -1.4809352159500122, "logits/rejected": -1.483822226524353, "logps/chosen": -109.7617416381836, "logps/rejected": -138.08193969726562, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": -1.6901298761367798, "rewards/margins": 2.9795985221862793, "rewards/rejected": -4.6697282791137695, "step": 8864 }, { "epoch": 1.96, "learning_rate": 7.991168215506688e-06, "logits/chosen": -1.8287838697433472, "logits/rejected": -1.818030595779419, "logps/chosen": -86.6765365600586, "logps/rejected": -115.99136352539062, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": 0.28486862778663635, "rewards/margins": 3.500319004058838, "rewards/rejected": -3.2154502868652344, "step": 8865 }, { "epoch": 1.96, "learning_rate": 7.989731791930497e-06, "logits/chosen": -1.347358226776123, "logits/rejected": -1.3067418336868286, "logps/chosen": -194.415771484375, "logps/rejected": -379.14349365234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 2.2066972255706787, "rewards/margins": 11.376952171325684, "rewards/rejected": -9.170254707336426, "step": 8866 }, { "epoch": 1.96, "learning_rate": 7.988294984180956e-06, "logits/chosen": -1.6290652751922607, "logits/rejected": -1.759878396987915, "logps/chosen": -187.560546875, "logps/rejected": -192.12091064453125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.4269074201583862, "rewards/margins": 6.700291633605957, "rewards/rejected": -8.127199172973633, "step": 8867 }, { "epoch": 1.96, "learning_rate": 7.986857792442692e-06, "logits/chosen": -1.2613065242767334, "logits/rejected": -1.2721819877624512, "logps/chosen": -199.13375854492188, "logps/rejected": -280.7142639160156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 3.532365560531616, "rewards/margins": 13.511354446411133, "rewards/rejected": -9.978988647460938, "step": 8868 }, { "epoch": 1.96, "learning_rate": 7.985420216900384e-06, "logits/chosen": -1.3219187259674072, "logits/rejected": -1.3219187259674072, "logps/chosen": -165.83465576171875, "logps/rejected": -165.83465576171875, "loss": 0.347, "rewards/accuracies": 0.0, "rewards/chosen": -9.3203763961792, "rewards/margins": 0.0, "rewards/rejected": -9.3203763961792, "step": 8869 }, { "epoch": 1.96, "learning_rate": 7.983982257738752e-06, "logits/chosen": -1.8274720907211304, "logits/rejected": -1.9543211460113525, "logps/chosen": -200.35397338867188, "logps/rejected": -161.86904907226562, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.7539612054824829, "rewards/margins": 11.01828670501709, "rewards/rejected": -10.264325141906738, "step": 8870 }, { "epoch": 1.96, "learning_rate": 7.982543915142575e-06, "logits/chosen": -1.4815242290496826, "logits/rejected": -1.4815242290496826, "logps/chosen": -185.70245361328125, "logps/rejected": -185.70245361328125, "loss": 0.3476, "rewards/accuracies": 0.0, "rewards/chosen": -1.874267578125, "rewards/margins": 0.0, "rewards/rejected": -1.874267578125, "step": 8871 }, { "epoch": 1.96, "learning_rate": 7.981105189296676e-06, "logits/chosen": -1.1387747526168823, "logits/rejected": -1.227941870689392, "logps/chosen": -223.41806030273438, "logps/rejected": -173.41766357421875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.09435577690601349, "rewards/margins": 5.7755022048950195, "rewards/rejected": -5.8698577880859375, "step": 8872 }, { "epoch": 1.96, "learning_rate": 7.979666080385923e-06, "logits/chosen": -1.4091806411743164, "logits/rejected": -1.352241039276123, "logps/chosen": -153.38824462890625, "logps/rejected": -233.54127502441406, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.6426849365234375, "rewards/margins": 7.735455513000488, "rewards/rejected": -11.378140449523926, "step": 8873 }, { "epoch": 1.96, "learning_rate": 7.978226588595245e-06, "logits/chosen": -1.288804531097412, "logits/rejected": -0.8251681327819824, "logps/chosen": -115.65348815917969, "logps/rejected": -616.4400634765625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.9726197719573975, "rewards/margins": 30.47251319885254, "rewards/rejected": -33.445133209228516, "step": 8874 }, { "epoch": 1.96, "learning_rate": 7.976786714109608e-06, "logits/chosen": -1.271353006362915, "logits/rejected": -1.2624688148498535, "logps/chosen": -176.73556518554688, "logps/rejected": -227.75064086914062, "loss": 0.0673, "rewards/accuracies": 1.0, "rewards/chosen": 0.2165069580078125, "rewards/margins": 2.69696044921875, "rewards/rejected": -2.4804534912109375, "step": 8875 }, { "epoch": 1.96, "learning_rate": 7.975346457114034e-06, "logits/chosen": -1.646229863166809, "logits/rejected": -1.640897274017334, "logps/chosen": -77.25088500976562, "logps/rejected": -155.90353393554688, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.0664138793945312, "rewards/margins": 3.8125529289245605, "rewards/rejected": -4.878966808319092, "step": 8876 }, { "epoch": 1.96, "learning_rate": 7.973905817793594e-06, "logits/chosen": -1.5229536294937134, "logits/rejected": -1.6254111528396606, "logps/chosen": -270.0608215332031, "logps/rejected": -224.97012329101562, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 2.3399598598480225, "rewards/margins": 13.201515197753906, "rewards/rejected": -10.861555099487305, "step": 8877 }, { "epoch": 1.97, "learning_rate": 7.972464796333408e-06, "logits/chosen": -1.5558247566223145, "logits/rejected": -1.0357965230941772, "logps/chosen": -104.0785140991211, "logps/rejected": -1044.004150390625, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.29927369952201843, "rewards/margins": 92.74093627929688, "rewards/rejected": -93.04020690917969, "step": 8878 }, { "epoch": 1.97, "learning_rate": 7.971023392918637e-06, "logits/chosen": -1.7778944969177246, "logits/rejected": -1.547676920890808, "logps/chosen": -90.17668914794922, "logps/rejected": -338.44390869140625, "loss": 0.1013, "rewards/accuracies": 1.0, "rewards/chosen": 0.7580230832099915, "rewards/margins": 14.490249633789062, "rewards/rejected": -13.732226371765137, "step": 8879 }, { "epoch": 1.97, "learning_rate": 7.969581607734504e-06, "logits/chosen": -1.4827769994735718, "logits/rejected": -1.5069843530654907, "logps/chosen": -168.59963989257812, "logps/rejected": -325.26007080078125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.6748108267784119, "rewards/margins": 13.905257225036621, "rewards/rejected": -14.58006763458252, "step": 8880 }, { "epoch": 1.97, "learning_rate": 7.968139440966271e-06, "logits/chosen": -1.1233866214752197, "logits/rejected": -1.11040198802948, "logps/chosen": -135.5655975341797, "logps/rejected": -139.71200561523438, "loss": 0.0767, "rewards/accuracies": 1.0, "rewards/chosen": -1.122822642326355, "rewards/margins": 1.7979401350021362, "rewards/rejected": -2.920762777328491, "step": 8881 }, { "epoch": 1.97, "learning_rate": 7.966696892799257e-06, "logits/chosen": -1.5031909942626953, "logits/rejected": -1.478263020515442, "logps/chosen": -178.91140747070312, "logps/rejected": -153.350341796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.776110827922821, "rewards/margins": 6.8637566566467285, "rewards/rejected": -7.639867305755615, "step": 8882 }, { "epoch": 1.97, "learning_rate": 7.965253963418825e-06, "logits/chosen": -1.612841010093689, "logits/rejected": -1.5758341550827026, "logps/chosen": -101.84518432617188, "logps/rejected": -139.02354431152344, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.6214210987091064, "rewards/margins": 3.948352098464966, "rewards/rejected": -5.569773197174072, "step": 8883 }, { "epoch": 1.97, "learning_rate": 7.963810653010385e-06, "logits/chosen": -1.5626227855682373, "logits/rejected": -1.499872088432312, "logps/chosen": -184.83094787597656, "logps/rejected": -320.6262512207031, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 1.3126541376113892, "rewards/margins": 9.981706619262695, "rewards/rejected": -8.669052124023438, "step": 8884 }, { "epoch": 1.97, "learning_rate": 7.962366961759402e-06, "logits/chosen": -1.3894400596618652, "logits/rejected": -1.3192931413650513, "logps/chosen": -113.74653625488281, "logps/rejected": -235.3793182373047, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.5604446530342102, "rewards/margins": 7.130723476409912, "rewards/rejected": -7.691168308258057, "step": 8885 }, { "epoch": 1.97, "learning_rate": 7.960922889851386e-06, "logits/chosen": -1.3523296117782593, "logits/rejected": -1.3523296117782593, "logps/chosen": -147.45681762695312, "logps/rejected": -147.45681762695312, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.719648838043213, "rewards/margins": 0.0, "rewards/rejected": -4.719648838043213, "step": 8886 }, { "epoch": 1.97, "learning_rate": 7.959478437471894e-06, "logits/chosen": -1.361265778541565, "logits/rejected": -1.361265778541565, "logps/chosen": -214.713623046875, "logps/rejected": -214.713623046875, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -6.169101238250732, "rewards/margins": 0.0, "rewards/rejected": -6.169101238250732, "step": 8887 }, { "epoch": 1.97, "learning_rate": 7.95803360480654e-06, "logits/chosen": -1.2088558673858643, "logits/rejected": -1.1470731496810913, "logps/chosen": -119.9423828125, "logps/rejected": -233.32928466796875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1706466674804688, "rewards/margins": 10.492891311645508, "rewards/rejected": -11.663537979125977, "step": 8888 }, { "epoch": 1.97, "learning_rate": 7.956588392040978e-06, "logits/chosen": -1.6183264255523682, "logits/rejected": -1.6521830558776855, "logps/chosen": -191.20135498046875, "logps/rejected": -191.94235229492188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.3336593806743622, "rewards/margins": 7.388860702514648, "rewards/rejected": -7.722519874572754, "step": 8889 }, { "epoch": 1.97, "learning_rate": 7.955142799360914e-06, "logits/chosen": -1.5536367893218994, "logits/rejected": -1.7089602947235107, "logps/chosen": -160.907470703125, "logps/rejected": -109.76272583007812, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.2276703119277954, "rewards/margins": 3.8445510864257812, "rewards/rejected": -5.072221279144287, "step": 8890 }, { "epoch": 1.97, "learning_rate": 7.953696826952106e-06, "logits/chosen": -1.4290939569473267, "logits/rejected": -1.3551504611968994, "logps/chosen": -204.65382385253906, "logps/rejected": -207.24087524414062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.8952468633651733, "rewards/margins": 8.383076667785645, "rewards/rejected": -6.487829685211182, "step": 8891 }, { "epoch": 1.97, "learning_rate": 7.952250475000354e-06, "logits/chosen": -1.181577444076538, "logits/rejected": -1.143648624420166, "logps/chosen": -115.40252685546875, "logps/rejected": -234.92044067382812, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -4.7592010498046875, "rewards/margins": 5.444258689880371, "rewards/rejected": -10.203459739685059, "step": 8892 }, { "epoch": 1.97, "learning_rate": 7.950803743691516e-06, "logits/chosen": -1.3932862281799316, "logits/rejected": -1.4515399932861328, "logps/chosen": -211.13229370117188, "logps/rejected": -231.76712036132812, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 2.3356339931488037, "rewards/margins": 12.563872337341309, "rewards/rejected": -10.228238105773926, "step": 8893 }, { "epoch": 1.97, "learning_rate": 7.949356633211487e-06, "logits/chosen": -1.5065945386886597, "logits/rejected": -1.513588547706604, "logps/chosen": -149.81980895996094, "logps/rejected": -183.23361206054688, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -3.479174852371216, "rewards/margins": 3.5054285526275635, "rewards/rejected": -6.984603404998779, "step": 8894 }, { "epoch": 1.97, "learning_rate": 7.947909143746221e-06, "logits/chosen": -1.3284786939620972, "logits/rejected": -1.2841583490371704, "logps/chosen": -155.3303985595703, "logps/rejected": -174.5448760986328, "loss": 0.455, "rewards/accuracies": 0.0, "rewards/chosen": -0.5666702389717102, "rewards/margins": -0.3929199278354645, "rewards/rejected": -0.17375031113624573, "step": 8895 }, { "epoch": 1.97, "learning_rate": 7.946461275481719e-06, "logits/chosen": -1.8052165508270264, "logits/rejected": -1.8545634746551514, "logps/chosen": -107.46732330322266, "logps/rejected": -123.3707504272461, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -3.971926212310791, "rewards/margins": 5.707825183868408, "rewards/rejected": -9.6797513961792, "step": 8896 }, { "epoch": 1.97, "learning_rate": 7.945013028604026e-06, "logits/chosen": -1.622092843055725, "logits/rejected": -1.6391438245773315, "logps/chosen": -150.59750366210938, "logps/rejected": -168.3546600341797, "loss": 0.1278, "rewards/accuracies": 1.0, "rewards/chosen": -1.559625267982483, "rewards/margins": 1.3501511812210083, "rewards/rejected": -2.909776449203491, "step": 8897 }, { "epoch": 1.97, "learning_rate": 7.943564403299238e-06, "logits/chosen": -1.2203859090805054, "logits/rejected": -1.1111631393432617, "logps/chosen": -188.0682830810547, "logps/rejected": -248.53506469726562, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 3.7143876552581787, "rewards/margins": 7.499049663543701, "rewards/rejected": -3.7846620082855225, "step": 8898 }, { "epoch": 1.97, "learning_rate": 7.9421153997535e-06, "logits/chosen": -1.7137088775634766, "logits/rejected": -1.7233933210372925, "logps/chosen": -139.5953369140625, "logps/rejected": -256.93377685546875, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -0.9332794547080994, "rewards/margins": 10.838032722473145, "rewards/rejected": -11.77131175994873, "step": 8899 }, { "epoch": 1.97, "learning_rate": 7.940666018153004e-06, "logits/chosen": -1.3426673412322998, "logits/rejected": -1.2783877849578857, "logps/chosen": -163.63204956054688, "logps/rejected": -255.45440673828125, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/chosen": -3.9389634132385254, "rewards/margins": 2.387773036956787, "rewards/rejected": -6.3267364501953125, "step": 8900 }, { "epoch": 1.97, "learning_rate": 7.939216258683997e-06, "logits/chosen": -1.58705735206604, "logits/rejected": -1.4791011810302734, "logps/chosen": -129.95480346679688, "logps/rejected": -265.116943359375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 2.771475315093994, "rewards/margins": 6.564627170562744, "rewards/rejected": -3.79315185546875, "step": 8901 }, { "epoch": 1.97, "learning_rate": 7.937766121532766e-06, "logits/chosen": -1.2120503187179565, "logits/rejected": -1.2128832340240479, "logps/chosen": -125.4126205444336, "logps/rejected": -126.90496826171875, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -4.373340606689453, "rewards/margins": 5.1461181640625, "rewards/rejected": -9.519458770751953, "step": 8902 }, { "epoch": 1.97, "learning_rate": 7.936315606885649e-06, "logits/chosen": -1.5891304016113281, "logits/rejected": -1.6631666421890259, "logps/chosen": -120.5794448852539, "logps/rejected": -221.85113525390625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -3.345566511154175, "rewards/margins": 11.46181583404541, "rewards/rejected": -14.807382583618164, "step": 8903 }, { "epoch": 1.97, "learning_rate": 7.934864714929036e-06, "logits/chosen": -1.3867852687835693, "logits/rejected": -1.3976117372512817, "logps/chosen": -92.52328491210938, "logps/rejected": -142.90206909179688, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": -1.8039543628692627, "rewards/margins": 2.889768362045288, "rewards/rejected": -4.693722724914551, "step": 8904 }, { "epoch": 1.97, "learning_rate": 7.933413445849361e-06, "logits/chosen": -1.4071190357208252, "logits/rejected": -1.4071190357208252, "logps/chosen": -192.16839599609375, "logps/rejected": -192.16839599609375, "loss": 0.3467, "rewards/accuracies": 0.0, "rewards/chosen": -4.323642253875732, "rewards/margins": 0.0, "rewards/rejected": -4.323642253875732, "step": 8905 }, { "epoch": 1.97, "learning_rate": 7.931961799833112e-06, "logits/chosen": -1.8241559267044067, "logits/rejected": -1.7915693521499634, "logps/chosen": -178.103271484375, "logps/rejected": -194.22042846679688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.6909912824630737, "rewards/margins": 12.194853782653809, "rewards/rejected": -10.503862380981445, "step": 8906 }, { "epoch": 1.97, "learning_rate": 7.930509777066819e-06, "logits/chosen": -1.411257028579712, "logits/rejected": -1.4476464986801147, "logps/chosen": -196.6943359375, "logps/rejected": -162.6101531982422, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 2.6692445278167725, "rewards/margins": 9.069204330444336, "rewards/rejected": -6.399959564208984, "step": 8907 }, { "epoch": 1.97, "learning_rate": 7.929057377737064e-06, "logits/chosen": -1.4074480533599854, "logits/rejected": -1.289539098739624, "logps/chosen": -98.43756103515625, "logps/rejected": -318.50506591796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.1660453826189041, "rewards/margins": 7.861557960510254, "rewards/rejected": -8.027603149414062, "step": 8908 }, { "epoch": 1.97, "learning_rate": 7.92760460203048e-06, "logits/chosen": -1.61032235622406, "logits/rejected": -1.0123521089553833, "logps/chosen": -140.88916015625, "logps/rejected": -584.4895629882812, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.869589328765869, "rewards/margins": 43.31124496459961, "rewards/rejected": -50.18083572387695, "step": 8909 }, { "epoch": 1.97, "learning_rate": 7.926151450133738e-06, "logits/chosen": -1.6522634029388428, "logits/rejected": -1.7321951389312744, "logps/chosen": -125.66368103027344, "logps/rejected": -59.52871322631836, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -0.7018417716026306, "rewards/margins": 3.4680609703063965, "rewards/rejected": -4.169902801513672, "step": 8910 }, { "epoch": 1.97, "learning_rate": 7.924697922233571e-06, "logits/chosen": -1.3005058765411377, "logits/rejected": -1.0693247318267822, "logps/chosen": -157.439453125, "logps/rejected": -461.1978759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.10400390625, "rewards/margins": 15.534671783447266, "rewards/rejected": -16.638675689697266, "step": 8911 }, { "epoch": 1.97, "learning_rate": 7.923244018516751e-06, "logits/chosen": -1.5548638105392456, "logits/rejected": -1.5882477760314941, "logps/chosen": -103.5308837890625, "logps/rejected": -223.49807739257812, "loss": 0.3466, "rewards/accuracies": 1.0, "rewards/chosen": 0.770855724811554, "rewards/margins": 18.814289093017578, "rewards/rejected": -18.043434143066406, "step": 8912 }, { "epoch": 1.97, "learning_rate": 7.921789739170102e-06, "logits/chosen": -1.377839207649231, "logits/rejected": -1.4044532775878906, "logps/chosen": -95.61530303955078, "logps/rejected": -84.7611312866211, "loss": 0.0633, "rewards/accuracies": 1.0, "rewards/chosen": -2.0715744495391846, "rewards/margins": 2.127298593521118, "rewards/rejected": -4.198873043060303, "step": 8913 }, { "epoch": 1.97, "learning_rate": 7.920335084380497e-06, "logits/chosen": -1.5247611999511719, "logits/rejected": -1.5271191596984863, "logps/chosen": -145.776123046875, "logps/rejected": -186.88540649414062, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -2.2585861682891846, "rewards/margins": 3.9610917568206787, "rewards/rejected": -6.219677925109863, "step": 8914 }, { "epoch": 1.97, "learning_rate": 7.918880054334853e-06, "logits/chosen": -1.2633076906204224, "logits/rejected": -1.2633076906204224, "logps/chosen": -165.59881591796875, "logps/rejected": -165.59881591796875, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -5.047220706939697, "rewards/margins": 0.0, "rewards/rejected": -5.047220706939697, "step": 8915 }, { "epoch": 1.97, "learning_rate": 7.91742464922014e-06, "logits/chosen": -1.6455258131027222, "logits/rejected": -1.7909718751907349, "logps/chosen": -142.06106567382812, "logps/rejected": -151.26051330566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5390480756759644, "rewards/margins": 9.752017974853516, "rewards/rejected": -11.29106616973877, "step": 8916 }, { "epoch": 1.97, "learning_rate": 7.915968869223372e-06, "logits/chosen": -1.6161338090896606, "logits/rejected": -1.6857730150222778, "logps/chosen": -157.45030212402344, "logps/rejected": -187.9204559326172, "loss": 0.0639, "rewards/accuracies": 1.0, "rewards/chosen": -3.6344521045684814, "rewards/margins": 2.7133796215057373, "rewards/rejected": -6.347831726074219, "step": 8917 }, { "epoch": 1.97, "learning_rate": 7.914512714531612e-06, "logits/chosen": -1.3491301536560059, "logits/rejected": -1.342759132385254, "logps/chosen": -127.70709228515625, "logps/rejected": -250.45697021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.028514385223389, "rewards/margins": 14.657581329345703, "rewards/rejected": -18.68609619140625, "step": 8918 }, { "epoch": 1.97, "learning_rate": 7.913056185331978e-06, "logits/chosen": -1.901482105255127, "logits/rejected": -1.778650164604187, "logps/chosen": -132.53952026367188, "logps/rejected": -223.459228515625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 0.7521392703056335, "rewards/margins": 4.787853717803955, "rewards/rejected": -4.035714626312256, "step": 8919 }, { "epoch": 1.97, "learning_rate": 7.911599281811624e-06, "logits/chosen": -1.7902263402938843, "logits/rejected": -1.7746305465698242, "logps/chosen": -101.7913818359375, "logps/rejected": -191.00550842285156, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.2519317865371704, "rewards/margins": 6.820263385772705, "rewards/rejected": -8.072195053100586, "step": 8920 }, { "epoch": 1.97, "learning_rate": 7.910142004157762e-06, "logits/chosen": -1.211195945739746, "logits/rejected": -1.229405164718628, "logps/chosen": -101.13235473632812, "logps/rejected": -162.7811737060547, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/chosen": -3.677058458328247, "rewards/margins": 4.278436660766602, "rewards/rejected": -7.9554948806762695, "step": 8921 }, { "epoch": 1.97, "learning_rate": 7.90868435255765e-06, "logits/chosen": -1.5557100772857666, "logits/rejected": -1.9888896942138672, "logps/chosen": -326.8049011230469, "logps/rejected": -176.82244873046875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.512594699859619, "rewards/margins": 9.648508071899414, "rewards/rejected": -12.161102294921875, "step": 8922 }, { "epoch": 1.97, "learning_rate": 7.90722632719859e-06, "logits/chosen": -1.4703730344772339, "logits/rejected": -1.3068174123764038, "logps/chosen": -111.97035217285156, "logps/rejected": -269.33062744140625, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -2.2907166481018066, "rewards/margins": 4.722738742828369, "rewards/rejected": -7.013455390930176, "step": 8923 }, { "epoch": 1.98, "learning_rate": 7.905767928267936e-06, "logits/chosen": -1.6159919500350952, "logits/rejected": -1.6558994054794312, "logps/chosen": -111.013427734375, "logps/rejected": -169.18780517578125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.6637009382247925, "rewards/margins": 5.188040256500244, "rewards/rejected": -6.851741313934326, "step": 8924 }, { "epoch": 1.98, "learning_rate": 7.904309155953087e-06, "logits/chosen": -1.5254974365234375, "logits/rejected": -1.5254974365234375, "logps/chosen": -119.97512817382812, "logps/rejected": -119.97512817382812, "loss": 0.3479, "rewards/accuracies": 0.0, "rewards/chosen": -7.155827522277832, "rewards/margins": 0.0, "rewards/rejected": -7.155827522277832, "step": 8925 }, { "epoch": 1.98, "learning_rate": 7.902850010441494e-06, "logits/chosen": -1.7238255739212036, "logits/rejected": -1.7042025327682495, "logps/chosen": -152.56613159179688, "logps/rejected": -168.16293334960938, "loss": 0.0665, "rewards/accuracies": 1.0, "rewards/chosen": -5.418605804443359, "rewards/margins": 3.524703025817871, "rewards/rejected": -8.94330883026123, "step": 8926 }, { "epoch": 1.98, "learning_rate": 7.901390491920655e-06, "logits/chosen": -1.520782470703125, "logits/rejected": -1.4740740060806274, "logps/chosen": -117.12265014648438, "logps/rejected": -164.1129150390625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 0.41173097491264343, "rewards/margins": 4.64005184173584, "rewards/rejected": -4.228321075439453, "step": 8927 }, { "epoch": 1.98, "learning_rate": 7.899930600578112e-06, "logits/chosen": -1.5331149101257324, "logits/rejected": -1.4596872329711914, "logps/chosen": -181.84295654296875, "logps/rejected": -242.54348754882812, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -2.090466260910034, "rewards/margins": 4.710127830505371, "rewards/rejected": -6.800593852996826, "step": 8928 }, { "epoch": 1.98, "learning_rate": 7.898470336601456e-06, "logits/chosen": -1.305309772491455, "logits/rejected": -1.2939682006835938, "logps/chosen": -140.31173706054688, "logps/rejected": -191.92596435546875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.06670074909925461, "rewards/margins": 8.821565628051758, "rewards/rejected": -8.888266563415527, "step": 8929 }, { "epoch": 1.98, "learning_rate": 7.897009700178331e-06, "logits/chosen": -1.371304988861084, "logits/rejected": -1.4266811609268188, "logps/chosen": -96.03369140625, "logps/rejected": -77.15321350097656, "loss": 0.4359, "rewards/accuracies": 0.0, "rewards/chosen": -3.6345291137695312, "rewards/margins": -0.32947659492492676, "rewards/rejected": -3.3050525188446045, "step": 8930 }, { "epoch": 1.98, "learning_rate": 7.895548691496421e-06, "logits/chosen": -1.4684041738510132, "logits/rejected": -1.4165974855422974, "logps/chosen": -109.36614990234375, "logps/rejected": -187.3763427734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.2014694213867188, "rewards/margins": 6.365194797515869, "rewards/rejected": -7.566664218902588, "step": 8931 }, { "epoch": 1.98, "learning_rate": 7.894087310743468e-06, "logits/chosen": -1.6193501949310303, "logits/rejected": -1.5998013019561768, "logps/chosen": -119.82101440429688, "logps/rejected": -182.56979370117188, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -1.7423492670059204, "rewards/margins": 3.5812225341796875, "rewards/rejected": -5.323571681976318, "step": 8932 }, { "epoch": 1.98, "learning_rate": 7.892625558107252e-06, "logits/chosen": -1.410520315170288, "logits/rejected": -1.3692171573638916, "logps/chosen": -97.26512145996094, "logps/rejected": -115.03569030761719, "loss": 0.0387, "rewards/accuracies": 1.0, "rewards/chosen": -2.995032548904419, "rewards/margins": 2.5198943614959717, "rewards/rejected": -5.514926910400391, "step": 8933 }, { "epoch": 1.98, "learning_rate": 7.891163433775605e-06, "logits/chosen": -1.785319447517395, "logits/rejected": -1.6844326257705688, "logps/chosen": -144.04367065429688, "logps/rejected": -341.3746032714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.60333251953125, "rewards/margins": 10.500640869140625, "rewards/rejected": -13.103973388671875, "step": 8934 }, { "epoch": 1.98, "learning_rate": 7.889700937936408e-06, "logits/chosen": -1.3519991636276245, "logits/rejected": -1.3519991636276245, "logps/chosen": -111.54497528076172, "logps/rejected": -111.54497528076172, "loss": 0.3506, "rewards/accuracies": 0.0, "rewards/chosen": -5.345061779022217, "rewards/margins": 0.0, "rewards/rejected": -5.345061779022217, "step": 8935 }, { "epoch": 1.98, "learning_rate": 7.888238070777586e-06, "logits/chosen": -1.4119223356246948, "logits/rejected": -1.4119223356246948, "logps/chosen": -128.35960388183594, "logps/rejected": -128.35960388183594, "loss": 0.3555, "rewards/accuracies": 0.0, "rewards/chosen": -4.116031169891357, "rewards/margins": 0.0, "rewards/rejected": -4.116031169891357, "step": 8936 }, { "epoch": 1.98, "learning_rate": 7.886774832487116e-06, "logits/chosen": -1.9718295335769653, "logits/rejected": -2.100487470626831, "logps/chosen": -222.12887573242188, "logps/rejected": -196.68923950195312, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.7036163210868835, "rewards/margins": 6.613850116729736, "rewards/rejected": -5.910233974456787, "step": 8937 }, { "epoch": 1.98, "learning_rate": 7.885311223253018e-06, "logits/chosen": -1.3769373893737793, "logits/rejected": -1.3531091213226318, "logps/chosen": -101.89811706542969, "logps/rejected": -250.96934509277344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.265860080718994, "rewards/margins": 10.625423431396484, "rewards/rejected": -12.89128303527832, "step": 8938 }, { "epoch": 1.98, "learning_rate": 7.883847243263366e-06, "logits/chosen": -1.32322359085083, "logits/rejected": -1.263671875, "logps/chosen": -207.38214111328125, "logps/rejected": -329.29150390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.8589813113212585, "rewards/margins": 7.484097003936768, "rewards/rejected": -6.625115871429443, "step": 8939 }, { "epoch": 1.98, "learning_rate": 7.882382892706273e-06, "logits/chosen": -1.9063808917999268, "logits/rejected": -1.846269130706787, "logps/chosen": -105.04058837890625, "logps/rejected": -130.39715576171875, "loss": 0.3925, "rewards/accuracies": 0.0, "rewards/chosen": -2.676561117172241, "rewards/margins": -0.17596220970153809, "rewards/rejected": -2.500598907470703, "step": 8940 }, { "epoch": 1.98, "learning_rate": 7.88091817176991e-06, "logits/chosen": -1.668502688407898, "logits/rejected": -1.1804566383361816, "logps/chosen": -70.58235168457031, "logps/rejected": -612.03271484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.7941265106201172, "rewards/margins": 51.19487762451172, "rewards/rejected": -51.9890022277832, "step": 8941 }, { "epoch": 1.98, "learning_rate": 7.879453080642486e-06, "logits/chosen": -1.494646668434143, "logits/rejected": -1.494646668434143, "logps/chosen": -122.16397094726562, "logps/rejected": -122.16397094726562, "loss": 0.3475, "rewards/accuracies": 0.0, "rewards/chosen": -3.9244918823242188, "rewards/margins": 0.0, "rewards/rejected": -3.9244918823242188, "step": 8942 }, { "epoch": 1.98, "learning_rate": 7.877987619512263e-06, "logits/chosen": -1.3910926580429077, "logits/rejected": -1.3916583061218262, "logps/chosen": -124.53941345214844, "logps/rejected": -140.19683837890625, "loss": 0.2153, "rewards/accuracies": 1.0, "rewards/chosen": -4.4839935302734375, "rewards/margins": 0.645721435546875, "rewards/rejected": -5.1297149658203125, "step": 8943 }, { "epoch": 1.98, "learning_rate": 7.87652178856755e-06, "logits/chosen": -1.6717581748962402, "logits/rejected": -1.6059143543243408, "logps/chosen": -78.82064819335938, "logps/rejected": -157.98281860351562, "loss": 0.2546, "rewards/accuracies": 1.0, "rewards/chosen": -1.534646987915039, "rewards/margins": 0.6706540584564209, "rewards/rejected": -2.20530104637146, "step": 8944 }, { "epoch": 1.98, "learning_rate": 7.875055587996703e-06, "logits/chosen": -1.2854150533676147, "logits/rejected": -1.0736949443817139, "logps/chosen": -153.54766845703125, "logps/rejected": -292.9049072265625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.35727235674858093, "rewards/margins": 7.752938747406006, "rewards/rejected": -7.395666599273682, "step": 8945 }, { "epoch": 1.98, "learning_rate": 7.873589017988124e-06, "logits/chosen": -1.7241432666778564, "logits/rejected": -1.7251322269439697, "logps/chosen": -137.7912139892578, "logps/rejected": -148.15866088867188, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -6.702233791351318, "rewards/margins": 3.8014302253723145, "rewards/rejected": -10.503664016723633, "step": 8946 }, { "epoch": 1.98, "learning_rate": 7.872122078730263e-06, "logits/chosen": -1.6856695413589478, "logits/rejected": -1.6856695413589478, "logps/chosen": -195.7527313232422, "logps/rejected": -195.7527313232422, "loss": 0.3544, "rewards/accuracies": 0.0, "rewards/chosen": -4.3712358474731445, "rewards/margins": 0.0, "rewards/rejected": -4.3712358474731445, "step": 8947 }, { "epoch": 1.98, "learning_rate": 7.87065477041162e-06, "logits/chosen": -1.779126524925232, "logits/rejected": -1.7878634929656982, "logps/chosen": -64.71725463867188, "logps/rejected": -86.53822326660156, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -0.1492973417043686, "rewards/margins": 4.365094184875488, "rewards/rejected": -4.5143914222717285, "step": 8948 }, { "epoch": 1.98, "learning_rate": 7.86918709322074e-06, "logits/chosen": -1.1220424175262451, "logits/rejected": -1.0122510194778442, "logps/chosen": -152.7320556640625, "logps/rejected": -197.87242126464844, "loss": 0.0867, "rewards/accuracies": 1.0, "rewards/chosen": 0.8355880975723267, "rewards/margins": 2.0340590476989746, "rewards/rejected": -1.1984710693359375, "step": 8949 }, { "epoch": 1.98, "learning_rate": 7.867719047346216e-06, "logits/chosen": -1.3802056312561035, "logits/rejected": -1.387179970741272, "logps/chosen": -115.29519653320312, "logps/rejected": -139.55825805664062, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": -1.7369431257247925, "rewards/margins": 2.887584686279297, "rewards/rejected": -4.624527931213379, "step": 8950 }, { "epoch": 1.98, "learning_rate": 7.86625063297669e-06, "logits/chosen": -1.1867836713790894, "logits/rejected": -1.1614874601364136, "logps/chosen": -89.56538391113281, "logps/rejected": -84.52731323242188, "loss": 0.12, "rewards/accuracies": 1.0, "rewards/chosen": -1.3086906671524048, "rewards/margins": 1.3212167024612427, "rewards/rejected": -2.6299073696136475, "step": 8951 }, { "epoch": 1.98, "learning_rate": 7.864781850300844e-06, "logits/chosen": -1.3242223262786865, "logits/rejected": -1.3242223262786865, "logps/chosen": -59.62852478027344, "logps/rejected": -59.62852478027344, "loss": 0.6959, "rewards/accuracies": 0.0, "rewards/chosen": -1.2659084796905518, "rewards/margins": 0.0, "rewards/rejected": -1.2659084796905518, "step": 8952 }, { "epoch": 1.98, "learning_rate": 7.863312699507419e-06, "logits/chosen": -1.2671129703521729, "logits/rejected": -1.2629555463790894, "logps/chosen": -212.45501708984375, "logps/rejected": -238.25833129882812, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.9134933948516846, "rewards/margins": 7.287799835205078, "rewards/rejected": -10.201292991638184, "step": 8953 }, { "epoch": 1.98, "learning_rate": 7.861843180785196e-06, "logits/chosen": -1.7742565870285034, "logits/rejected": -1.7742565870285034, "logps/chosen": -72.4216079711914, "logps/rejected": -72.4216079711914, "loss": 0.3469, "rewards/accuracies": 0.0, "rewards/chosen": -1.503975749015808, "rewards/margins": 0.0, "rewards/rejected": -1.503975749015808, "step": 8954 }, { "epoch": 1.98, "learning_rate": 7.860373294323002e-06, "logits/chosen": -1.4495688676834106, "logits/rejected": -0.9360308647155762, "logps/chosen": -171.34803771972656, "logps/rejected": -1127.2374267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -14.584782600402832, "rewards/margins": 83.08439636230469, "rewards/rejected": -97.66918182373047, "step": 8955 }, { "epoch": 1.98, "learning_rate": 7.858903040309717e-06, "logits/chosen": -1.6086859703063965, "logits/rejected": -1.5235999822616577, "logps/chosen": -106.36131286621094, "logps/rejected": -260.9439392089844, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": 0.6146003603935242, "rewards/margins": 10.317193031311035, "rewards/rejected": -9.702592849731445, "step": 8956 }, { "epoch": 1.98, "learning_rate": 7.857432418934264e-06, "logits/chosen": -1.4953429698944092, "logits/rejected": -1.4690006971359253, "logps/chosen": -89.72547149658203, "logps/rejected": -91.81532287597656, "loss": 0.2758, "rewards/accuracies": 1.0, "rewards/chosen": -2.4361398220062256, "rewards/margins": 0.6872167587280273, "rewards/rejected": -3.123356580734253, "step": 8957 }, { "epoch": 1.98, "learning_rate": 7.855961430385615e-06, "logits/chosen": -1.3631166219711304, "logits/rejected": -0.9723915457725525, "logps/chosen": -268.965576171875, "logps/rejected": -796.013671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6747161746025085, "rewards/margins": 69.77977752685547, "rewards/rejected": -70.4544906616211, "step": 8958 }, { "epoch": 1.98, "learning_rate": 7.854490074852784e-06, "logits/chosen": -1.4185680150985718, "logits/rejected": -1.259979009628296, "logps/chosen": -102.64472961425781, "logps/rejected": -266.5142822265625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3319565057754517, "rewards/margins": 7.245425701141357, "rewards/rejected": -8.57738208770752, "step": 8959 }, { "epoch": 1.98, "learning_rate": 7.853018352524845e-06, "logits/chosen": -1.7681978940963745, "logits/rejected": -1.8293395042419434, "logps/chosen": -129.14227294921875, "logps/rejected": -116.0063247680664, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.2600388526916504, "rewards/margins": 5.681086540222168, "rewards/rejected": -7.941125392913818, "step": 8960 }, { "epoch": 1.98, "learning_rate": 7.851546263590905e-06, "logits/chosen": -1.3519026041030884, "logits/rejected": -1.1884617805480957, "logps/chosen": -149.89613342285156, "logps/rejected": -276.35174560546875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.9651992321014404, "rewards/margins": 6.8351945877075195, "rewards/rejected": -9.800394058227539, "step": 8961 }, { "epoch": 1.98, "learning_rate": 7.850073808240125e-06, "logits/chosen": -1.3731026649475098, "logits/rejected": -1.3798696994781494, "logps/chosen": -91.23560333251953, "logps/rejected": -63.399627685546875, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": -2.1312196254730225, "rewards/margins": 2.7660419940948486, "rewards/rejected": -4.897261619567871, "step": 8962 }, { "epoch": 1.98, "learning_rate": 7.84860098666171e-06, "logits/chosen": -1.7575117349624634, "logits/rejected": -1.7136849164962769, "logps/chosen": -116.36754608154297, "logps/rejected": -189.60256958007812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6047286987304688, "rewards/margins": 8.231980323791504, "rewards/rejected": -9.836709022521973, "step": 8963 }, { "epoch": 1.98, "learning_rate": 7.847127799044918e-06, "logits/chosen": -1.878937005996704, "logits/rejected": -1.8212448358535767, "logps/chosen": -106.00565338134766, "logps/rejected": -190.892333984375, "loss": 0.3586, "rewards/accuracies": 1.0, "rewards/chosen": -0.47370681166648865, "rewards/margins": 3.7190589904785156, "rewards/rejected": -4.192765712738037, "step": 8964 }, { "epoch": 1.98, "learning_rate": 7.845654245579047e-06, "logits/chosen": -1.5551592111587524, "logits/rejected": -1.5469626188278198, "logps/chosen": -203.7928924560547, "logps/rejected": -226.67996215820312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.4867355823516846, "rewards/margins": 11.006502151489258, "rewards/rejected": -8.519766807556152, "step": 8965 }, { "epoch": 1.98, "learning_rate": 7.844180326453447e-06, "logits/chosen": -1.3572347164154053, "logits/rejected": -1.3139121532440186, "logps/chosen": -109.02182006835938, "logps/rejected": -186.5440673828125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.361093282699585, "rewards/margins": 7.348099708557129, "rewards/rejected": -9.709193229675293, "step": 8966 }, { "epoch": 1.98, "learning_rate": 7.842706041857512e-06, "logits/chosen": -1.75462806224823, "logits/rejected": -1.8065063953399658, "logps/chosen": -159.27267456054688, "logps/rejected": -132.35215759277344, "loss": 0.6529, "rewards/accuracies": 0.0, "rewards/chosen": -4.568051338195801, "rewards/margins": -0.9898972511291504, "rewards/rejected": -3.5781540870666504, "step": 8967 }, { "epoch": 1.98, "learning_rate": 7.841231391980687e-06, "logits/chosen": -1.3727130889892578, "logits/rejected": -1.387841820716858, "logps/chosen": -114.07966613769531, "logps/rejected": -111.00291442871094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.4676834046840668, "rewards/margins": 8.674601554870605, "rewards/rejected": -8.206917762756348, "step": 8968 }, { "epoch": 1.99, "learning_rate": 7.839756377012453e-06, "logits/chosen": -1.9224599599838257, "logits/rejected": -1.9607539176940918, "logps/chosen": -104.1143798828125, "logps/rejected": -104.81045532226562, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.335003614425659, "rewards/margins": 5.869595527648926, "rewards/rejected": -8.204599380493164, "step": 8969 }, { "epoch": 1.99, "learning_rate": 7.838280997142355e-06, "logits/chosen": -1.5760327577590942, "logits/rejected": -1.5314773321151733, "logps/chosen": -172.51040649414062, "logps/rejected": -220.25230407714844, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.9663101434707642, "rewards/margins": 6.817956447601318, "rewards/rejected": -8.784266471862793, "step": 8970 }, { "epoch": 1.99, "learning_rate": 7.836805252559971e-06, "logits/chosen": -1.773934245109558, "logits/rejected": -1.7809432744979858, "logps/chosen": -166.92095947265625, "logps/rejected": -132.62557983398438, "loss": 0.126, "rewards/accuracies": 1.0, "rewards/chosen": -4.328887939453125, "rewards/margins": 1.2621855735778809, "rewards/rejected": -5.591073513031006, "step": 8971 }, { "epoch": 1.99, "learning_rate": 7.83532914345493e-06, "logits/chosen": -1.7614262104034424, "logits/rejected": -1.9736565351486206, "logps/chosen": -194.80047607421875, "logps/rejected": -205.81837463378906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 3.5891144275665283, "rewards/margins": 11.319005012512207, "rewards/rejected": -7.7298903465271, "step": 8972 }, { "epoch": 1.99, "learning_rate": 7.833852670016912e-06, "logits/chosen": -1.4909586906433105, "logits/rejected": -1.557405948638916, "logps/chosen": -178.90679931640625, "logps/rejected": -177.90916442871094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.73663330078125, "rewards/margins": 7.766565322875977, "rewards/rejected": -10.503198623657227, "step": 8973 }, { "epoch": 1.99, "learning_rate": 7.832375832435637e-06, "logits/chosen": -1.4955426454544067, "logits/rejected": -1.4622098207473755, "logps/chosen": -134.05039978027344, "logps/rejected": -321.2535095214844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.848186016082764, "rewards/margins": 11.547296524047852, "rewards/rejected": -17.395483016967773, "step": 8974 }, { "epoch": 1.99, "learning_rate": 7.830898630900877e-06, "logits/chosen": -1.8545706272125244, "logits/rejected": -1.834876537322998, "logps/chosen": -90.48896789550781, "logps/rejected": -190.35972595214844, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 1.4071648120880127, "rewards/margins": 11.94009017944336, "rewards/rejected": -10.532925605773926, "step": 8975 }, { "epoch": 1.99, "learning_rate": 7.829421065602448e-06, "logits/chosen": -1.5622979402542114, "logits/rejected": -0.8846942186355591, "logps/chosen": -232.30258178710938, "logps/rejected": -498.1629943847656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6705291867256165, "rewards/margins": 21.425281524658203, "rewards/rejected": -22.095809936523438, "step": 8976 }, { "epoch": 1.99, "learning_rate": 7.827943136730214e-06, "logits/chosen": -1.4856476783752441, "logits/rejected": -1.4440466165542603, "logps/chosen": -168.03396606445312, "logps/rejected": -215.2259521484375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.8343505859375, "rewards/margins": 5.074659824371338, "rewards/rejected": -7.909010410308838, "step": 8977 }, { "epoch": 1.99, "learning_rate": 7.826464844474086e-06, "logits/chosen": -1.5403358936309814, "logits/rejected": -1.5403358936309814, "logps/chosen": -92.5002670288086, "logps/rejected": -92.5002670288086, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -0.8639335632324219, "rewards/margins": 0.0, "rewards/rejected": -0.8639335632324219, "step": 8978 }, { "epoch": 1.99, "learning_rate": 7.82498618902402e-06, "logits/chosen": -1.0943572521209717, "logits/rejected": -1.1352914571762085, "logps/chosen": -162.95079040527344, "logps/rejected": -176.1329803466797, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.418653964996338, "rewards/margins": 6.5622334480285645, "rewards/rejected": -9.980887413024902, "step": 8979 }, { "epoch": 1.99, "learning_rate": 7.823507170570018e-06, "logits/chosen": -1.7665166854858398, "logits/rejected": -1.7756627798080444, "logps/chosen": -108.98482513427734, "logps/rejected": -202.49252319335938, "loss": 0.3468, "rewards/accuracies": 1.0, "rewards/chosen": -4.48848819732666, "rewards/margins": 7.729045867919922, "rewards/rejected": -12.217534065246582, "step": 8980 }, { "epoch": 1.99, "learning_rate": 7.822027789302134e-06, "logits/chosen": -1.922357201576233, "logits/rejected": -1.9007539749145508, "logps/chosen": -148.35939025878906, "logps/rejected": -173.3043975830078, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -3.326754093170166, "rewards/margins": 4.0391035079956055, "rewards/rejected": -7.3658576011657715, "step": 8981 }, { "epoch": 1.99, "learning_rate": 7.820548045410462e-06, "logits/chosen": -1.4205175638198853, "logits/rejected": -1.3616023063659668, "logps/chosen": -138.0840301513672, "logps/rejected": -204.89932250976562, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -0.24045410752296448, "rewards/margins": 6.724215507507324, "rewards/rejected": -6.964669704437256, "step": 8982 }, { "epoch": 1.99, "learning_rate": 7.819067939085145e-06, "logits/chosen": -1.3504526615142822, "logits/rejected": -1.39315927028656, "logps/chosen": -111.76716613769531, "logps/rejected": -155.049072265625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.6851685047149658, "rewards/margins": 5.063419342041016, "rewards/rejected": -6.7485880851745605, "step": 8983 }, { "epoch": 1.99, "learning_rate": 7.817587470516378e-06, "logits/chosen": -1.6273740530014038, "logits/rejected": -1.5386781692504883, "logps/chosen": -116.49358367919922, "logps/rejected": -235.11431884765625, "loss": 0.1063, "rewards/accuracies": 1.0, "rewards/chosen": -3.6221015453338623, "rewards/margins": 8.506827354431152, "rewards/rejected": -12.128929138183594, "step": 8984 }, { "epoch": 1.99, "learning_rate": 7.816106639894392e-06, "logits/chosen": -1.7448923587799072, "logits/rejected": -1.7740567922592163, "logps/chosen": -144.29714965820312, "logps/rejected": -172.31939697265625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.36929628252983093, "rewards/margins": 5.448757648468018, "rewards/rejected": -5.079461574554443, "step": 8985 }, { "epoch": 1.99, "learning_rate": 7.814625447409474e-06, "logits/chosen": -1.3939814567565918, "logits/rejected": -1.39165461063385, "logps/chosen": -172.9390869140625, "logps/rejected": -174.883056640625, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": -2.864239454269409, "rewards/margins": 6.368046760559082, "rewards/rejected": -9.23228645324707, "step": 8986 }, { "epoch": 1.99, "learning_rate": 7.813143893251951e-06, "logits/chosen": -1.45589017868042, "logits/rejected": -1.3863407373428345, "logps/chosen": -174.9842529296875, "logps/rejected": -210.35614013671875, "loss": 0.3414, "rewards/accuracies": 1.0, "rewards/chosen": -7.365609169006348, "rewards/margins": 0.021500110626220703, "rewards/rejected": -7.387109279632568, "step": 8987 }, { "epoch": 1.99, "learning_rate": 7.811661977612202e-06, "logits/chosen": -1.5981221199035645, "logits/rejected": -1.596215844154358, "logps/chosen": -105.67721557617188, "logps/rejected": -115.23434448242188, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -3.935115098953247, "rewards/margins": 4.470725059509277, "rewards/rejected": -8.405839920043945, "step": 8988 }, { "epoch": 1.99, "learning_rate": 7.810179700680646e-06, "logits/chosen": -1.4941710233688354, "logits/rejected": -1.4440454244613647, "logps/chosen": -110.97852325439453, "logps/rejected": -223.12533569335938, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.8021323680877686, "rewards/margins": 7.620506286621094, "rewards/rejected": -11.422638893127441, "step": 8989 }, { "epoch": 1.99, "learning_rate": 7.808697062647755e-06, "logits/chosen": -1.6096875667572021, "logits/rejected": -1.5677870512008667, "logps/chosen": -68.49958801269531, "logps/rejected": -182.7918701171875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -2.040609121322632, "rewards/margins": 7.4832658767700195, "rewards/rejected": -9.52387523651123, "step": 8990 }, { "epoch": 1.99, "learning_rate": 7.807214063704042e-06, "logits/chosen": -1.6396058797836304, "logits/rejected": -1.6039204597473145, "logps/chosen": -187.35638427734375, "logps/rejected": -249.86355590820312, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -4.4177093505859375, "rewards/margins": 3.369720458984375, "rewards/rejected": -7.7874298095703125, "step": 8991 }, { "epoch": 1.99, "learning_rate": 7.805730704040072e-06, "logits/chosen": -1.7543306350708008, "logits/rejected": -1.7688865661621094, "logps/chosen": -175.9267578125, "logps/rejected": -160.08633422851562, "loss": 0.1816, "rewards/accuracies": 1.0, "rewards/chosen": -3.6566162109375, "rewards/margins": 2.7910232543945312, "rewards/rejected": -6.447639465332031, "step": 8992 }, { "epoch": 1.99, "learning_rate": 7.804246983846449e-06, "logits/chosen": -1.4209197759628296, "logits/rejected": -1.3310481309890747, "logps/chosen": -90.99574279785156, "logps/rejected": -186.28375244140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.10231476277112961, "rewards/margins": 8.478846549987793, "rewards/rejected": -8.581161499023438, "step": 8993 }, { "epoch": 1.99, "learning_rate": 7.802762903313831e-06, "logits/chosen": -1.7798014879226685, "logits/rejected": -1.760327935218811, "logps/chosen": -78.15750122070312, "logps/rejected": -161.921875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.02419128455221653, "rewards/margins": 12.134163856506348, "rewards/rejected": -12.109972953796387, "step": 8994 }, { "epoch": 1.99, "learning_rate": 7.80127846263292e-06, "logits/chosen": -1.840654730796814, "logits/rejected": -1.8268845081329346, "logps/chosen": -155.29058837890625, "logps/rejected": -274.7343444824219, "loss": 0.0363, "rewards/accuracies": 1.0, "rewards/chosen": -3.3527092933654785, "rewards/margins": 5.427902698516846, "rewards/rejected": -8.780611991882324, "step": 8995 }, { "epoch": 1.99, "learning_rate": 7.799793661994457e-06, "logits/chosen": -1.6710494756698608, "logits/rejected": -1.6729599237442017, "logps/chosen": -150.18743896484375, "logps/rejected": -161.55210876464844, "loss": 0.5008, "rewards/accuracies": 0.0, "rewards/chosen": -5.29629373550415, "rewards/margins": -0.5244812965393066, "rewards/rejected": -4.771812438964844, "step": 8996 }, { "epoch": 1.99, "learning_rate": 7.79830850158924e-06, "logits/chosen": -1.5757375955581665, "logits/rejected": -1.5592896938323975, "logps/chosen": -109.35371398925781, "logps/rejected": -146.78482055664062, "loss": 0.1886, "rewards/accuracies": 1.0, "rewards/chosen": -1.5915863513946533, "rewards/margins": 0.7805466651916504, "rewards/rejected": -2.3721330165863037, "step": 8997 }, { "epoch": 1.99, "learning_rate": 7.796822981608109e-06, "logits/chosen": -1.6855207681655884, "logits/rejected": -1.6520342826843262, "logps/chosen": -100.18508911132812, "logps/rejected": -145.86288452148438, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 1.0711029767990112, "rewards/margins": 5.6353983879089355, "rewards/rejected": -4.564295291900635, "step": 8998 }, { "epoch": 1.99, "learning_rate": 7.795337102241948e-06, "logits/chosen": -1.7014468908309937, "logits/rejected": -1.7898414134979248, "logps/chosen": -229.2153778076172, "logps/rejected": -241.47515869140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.104412794113159, "rewards/margins": 9.073884963989258, "rewards/rejected": -11.178297996520996, "step": 8999 }, { "epoch": 1.99, "learning_rate": 7.793850863681688e-06, "logits/chosen": -1.572535753250122, "logits/rejected": -1.6041396856307983, "logps/chosen": -192.21939086914062, "logps/rejected": -170.48431396484375, "loss": 0.078, "rewards/accuracies": 1.0, "rewards/chosen": -10.863332748413086, "rewards/margins": 1.7791557312011719, "rewards/rejected": -12.642488479614258, "step": 9000 }, { "epoch": 1.99, "learning_rate": 7.79236426611831e-06, "logits/chosen": -1.354056477546692, "logits/rejected": -1.2746553421020508, "logps/chosen": -126.3466796875, "logps/rejected": -260.8793029785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8087798953056335, "rewards/margins": 12.211870193481445, "rewards/rejected": -13.020649909973145, "step": 9001 }, { "epoch": 1.99, "learning_rate": 7.790877309742833e-06, "logits/chosen": -1.769835352897644, "logits/rejected": -1.6521378755569458, "logps/chosen": -156.05264282226562, "logps/rejected": -335.7601623535156, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 1.6739777326583862, "rewards/margins": 14.891498565673828, "rewards/rejected": -13.217520713806152, "step": 9002 }, { "epoch": 1.99, "learning_rate": 7.789389994746334e-06, "logits/chosen": -1.6580047607421875, "logits/rejected": -1.6444356441497803, "logps/chosen": -82.2529067993164, "logps/rejected": -127.22114562988281, "loss": 0.0351, "rewards/accuracies": 1.0, "rewards/chosen": -1.4176292419433594, "rewards/margins": 2.7213058471679688, "rewards/rejected": -4.138935089111328, "step": 9003 }, { "epoch": 1.99, "learning_rate": 7.787902321319925e-06, "logits/chosen": -1.7736552953720093, "logits/rejected": -1.846632719039917, "logps/chosen": -175.40789794921875, "logps/rejected": -162.38783264160156, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 1.1056259870529175, "rewards/margins": 5.014041423797607, "rewards/rejected": -3.9084153175354004, "step": 9004 }, { "epoch": 1.99, "learning_rate": 7.786414289654768e-06, "logits/chosen": -1.389710545539856, "logits/rejected": -1.3891069889068604, "logps/chosen": -187.56912231445312, "logps/rejected": -300.9922790527344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7345550656318665, "rewards/margins": 14.496968269348145, "rewards/rejected": -15.231523513793945, "step": 9005 }, { "epoch": 1.99, "learning_rate": 7.784925899942075e-06, "logits/chosen": -1.5662589073181152, "logits/rejected": -1.5662589073181152, "logps/chosen": -194.27462768554688, "logps/rejected": -194.27462768554688, "loss": 0.3477, "rewards/accuracies": 0.0, "rewards/chosen": -4.287236213684082, "rewards/margins": 0.0, "rewards/rejected": -4.287236213684082, "step": 9006 }, { "epoch": 1.99, "learning_rate": 7.7834371523731e-06, "logits/chosen": -1.3589025735855103, "logits/rejected": -1.3871525526046753, "logps/chosen": -197.839599609375, "logps/rejected": -268.94879150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.02596588246524334, "rewards/margins": 11.798675537109375, "rewards/rejected": -11.772709846496582, "step": 9007 }, { "epoch": 1.99, "learning_rate": 7.781948047139139e-06, "logits/chosen": -1.4626662731170654, "logits/rejected": -1.0948209762573242, "logps/chosen": -156.01429748535156, "logps/rejected": -577.3641357421875, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -3.0486228466033936, "rewards/margins": 42.15311813354492, "rewards/rejected": -45.20174026489258, "step": 9008 }, { "epoch": 1.99, "learning_rate": 7.780458584431545e-06, "logits/chosen": -1.5893402099609375, "logits/rejected": -1.5105564594268799, "logps/chosen": -75.796630859375, "logps/rejected": -141.10687255859375, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/chosen": 0.8828026056289673, "rewards/margins": 6.045769691467285, "rewards/rejected": -5.162967205047607, "step": 9009 }, { "epoch": 1.99, "learning_rate": 7.778968764441704e-06, "logits/chosen": -1.6620765924453735, "logits/rejected": -1.6291301250457764, "logps/chosen": -90.16209411621094, "logps/rejected": -287.26513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1741478443145752, "rewards/margins": 13.038877487182617, "rewards/rejected": -14.213025093078613, "step": 9010 }, { "epoch": 1.99, "learning_rate": 7.777478587361058e-06, "logits/chosen": -1.8367549180984497, "logits/rejected": -1.8054686784744263, "logps/chosen": -229.31993103027344, "logps/rejected": -283.361328125, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": -9.344377517700195, "rewards/margins": 2.459254264831543, "rewards/rejected": -11.803631782531738, "step": 9011 }, { "epoch": 1.99, "learning_rate": 7.775988053381092e-06, "logits/chosen": -1.6943100690841675, "logits/rejected": -1.6144206523895264, "logps/chosen": -165.09231567382812, "logps/rejected": -338.5149230957031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.058453321456909, "rewards/margins": 10.39190673828125, "rewards/rejected": -13.450360298156738, "step": 9012 }, { "epoch": 1.99, "learning_rate": 7.774497162693333e-06, "logits/chosen": -1.2715609073638916, "logits/rejected": -1.1126657724380493, "logps/chosen": -240.86639404296875, "logps/rejected": -301.2021179199219, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -3.532423496246338, "rewards/margins": 4.110954284667969, "rewards/rejected": -7.643377780914307, "step": 9013 }, { "epoch": 2.0, "learning_rate": 7.773005915489358e-06, "logits/chosen": -1.4537991285324097, "logits/rejected": -1.4267765283584595, "logps/chosen": -84.44063568115234, "logps/rejected": -178.30026245117188, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -2.342729330062866, "rewards/margins": 6.92153263092041, "rewards/rejected": -9.264262199401855, "step": 9014 }, { "epoch": 2.0, "learning_rate": 7.77151431196079e-06, "logits/chosen": -1.715998888015747, "logits/rejected": -1.715998888015747, "logps/chosen": -147.2008056640625, "logps/rejected": -147.2008056640625, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.508182048797607, "rewards/margins": 0.0, "rewards/rejected": -4.508182048797607, "step": 9015 }, { "epoch": 2.0, "learning_rate": 7.770022352299294e-06, "logits/chosen": -1.8109689950942993, "logits/rejected": -1.9083569049835205, "logps/chosen": -250.34130859375, "logps/rejected": -333.4288330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.439749151468277, "rewards/margins": 16.04233741760254, "rewards/rejected": -16.482086181640625, "step": 9016 }, { "epoch": 2.0, "learning_rate": 7.768530036696585e-06, "logits/chosen": -1.5495996475219727, "logits/rejected": -1.487097144126892, "logps/chosen": -98.6622085571289, "logps/rejected": -206.9527587890625, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -2.0746498107910156, "rewards/margins": 3.2538094520568848, "rewards/rejected": -5.3284592628479, "step": 9017 }, { "epoch": 2.0, "learning_rate": 7.767037365344422e-06, "logits/chosen": -1.2766826152801514, "logits/rejected": -1.4193158149719238, "logps/chosen": -285.00634765625, "logps/rejected": -156.19113159179688, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.0989502668380737, "rewards/margins": 4.322659015655518, "rewards/rejected": -5.421609401702881, "step": 9018 }, { "epoch": 2.0, "learning_rate": 7.76554433843461e-06, "logits/chosen": -1.718942642211914, "logits/rejected": -1.767071008682251, "logps/chosen": -114.4547119140625, "logps/rejected": -129.74465942382812, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.4446624517440796, "rewards/margins": 8.961740493774414, "rewards/rejected": -10.406402587890625, "step": 9019 }, { "epoch": 2.0, "learning_rate": 7.764050956159e-06, "logits/chosen": -1.120179295539856, "logits/rejected": -1.120179295539856, "logps/chosen": -135.81045532226562, "logps/rejected": -135.81045532226562, "loss": 0.3466, "rewards/accuracies": 0.0, "rewards/chosen": -4.8143630027771, "rewards/margins": 0.0, "rewards/rejected": -4.8143630027771, "step": 9020 }, { "epoch": 2.0, "learning_rate": 7.762557218709484e-06, "logits/chosen": -1.7508186101913452, "logits/rejected": -1.8003311157226562, "logps/chosen": -210.91098022460938, "logps/rejected": -266.24090576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3573334217071533, "rewards/margins": 10.480298042297363, "rewards/rejected": -11.837631225585938, "step": 9021 }, { "epoch": 2.0, "learning_rate": 7.761063126278006e-06, "logits/chosen": -1.37409508228302, "logits/rejected": -1.191686749458313, "logps/chosen": -188.54026794433594, "logps/rejected": -428.8830871582031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7284683585166931, "rewards/margins": 12.39201831817627, "rewards/rejected": -13.12048625946045, "step": 9022 }, { "epoch": 2.0, "learning_rate": 7.759568679056554e-06, "logits/chosen": -2.02500057220459, "logits/rejected": -1.9806917905807495, "logps/chosen": -107.51082611083984, "logps/rejected": -148.0419921875, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": -2.7407853603363037, "rewards/margins": 3.5132863521575928, "rewards/rejected": -6.2540717124938965, "step": 9023 }, { "epoch": 2.0, "learning_rate": 7.758073877237164e-06, "logits/chosen": -1.6295201778411865, "logits/rejected": -0.99442058801651, "logps/chosen": -144.81201171875, "logps/rejected": -810.868896484375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.801853895187378, "rewards/margins": 65.4170913696289, "rewards/rejected": -68.21894836425781, "step": 9024 }, { "epoch": 2.0, "learning_rate": 7.756578721011908e-06, "logits/chosen": -1.5928081274032593, "logits/rejected": -1.517360806465149, "logps/chosen": -72.6846923828125, "logps/rejected": -231.05178833007812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.36955726146698, "rewards/margins": 11.55962085723877, "rewards/rejected": -12.929178237915039, "step": 9025 }, { "epoch": 2.0, "learning_rate": 7.755083210572914e-06, "logits/chosen": -1.5119359493255615, "logits/rejected": -1.4708620309829712, "logps/chosen": -139.7261505126953, "logps/rejected": -229.73287963867188, "loss": 0.3499, "rewards/accuracies": 1.0, "rewards/chosen": -0.9533371329307556, "rewards/margins": 5.0068864822387695, "rewards/rejected": -5.96022367477417, "step": 9026 }, { "epoch": 2.0, "learning_rate": 7.75358734611235e-06, "logits/chosen": -1.728896141052246, "logits/rejected": -1.7132538557052612, "logps/chosen": -152.58193969726562, "logps/rejected": -138.91006469726562, "loss": 0.5482, "rewards/accuracies": 0.0, "rewards/chosen": -2.94500732421875, "rewards/margins": -0.6897232532501221, "rewards/rejected": -2.255284070968628, "step": 9027 }, { "epoch": 2.0, "learning_rate": 7.75209112782243e-06, "logits/chosen": -1.700520396232605, "logits/rejected": -1.7099392414093018, "logps/chosen": -139.364501953125, "logps/rejected": -102.19466400146484, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -5.2775774002075195, "rewards/margins": 3.2263545989990234, "rewards/rejected": -8.503931999206543, "step": 9028 }, { "epoch": 2.0, "learning_rate": 7.75059455589542e-06, "logits/chosen": -1.4882599115371704, "logits/rejected": -1.3646498918533325, "logps/chosen": -94.17606353759766, "logps/rejected": -186.25503540039062, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.6695541739463806, "rewards/margins": 6.201042175292969, "rewards/rejected": -5.531487941741943, "step": 9029 }, { "epoch": 2.0, "learning_rate": 7.749097630523618e-06, "logits/chosen": -1.5804080963134766, "logits/rejected": -1.5896217823028564, "logps/chosen": -101.70114135742188, "logps/rejected": -163.0420684814453, "loss": 0.0774, "rewards/accuracies": 1.0, "rewards/chosen": -1.1536056995391846, "rewards/margins": 9.474496841430664, "rewards/rejected": -10.62810230255127, "step": 9030 }, { "epoch": 2.0, "learning_rate": 7.74760035189938e-06, "logits/chosen": -1.2616230249404907, "logits/rejected": -1.1603689193725586, "logps/chosen": -238.31124877929688, "logps/rejected": -302.4507141113281, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.43444520235061646, "rewards/margins": 7.12568998336792, "rewards/rejected": -6.691244602203369, "step": 9031 }, { "epoch": 2.0, "learning_rate": 7.746102720215102e-06, "logits/chosen": -1.8220466375350952, "logits/rejected": -1.7559796571731567, "logps/chosen": -88.40319061279297, "logps/rejected": -202.0885009765625, "loss": 0.3561, "rewards/accuracies": 1.0, "rewards/chosen": -3.9238698482513428, "rewards/margins": 3.951293706893921, "rewards/rejected": -7.875163555145264, "step": 9032 }, { "epoch": 2.0, "learning_rate": 7.744604735663227e-06, "logits/chosen": -1.5721627473831177, "logits/rejected": -1.5672143697738647, "logps/chosen": -74.00320434570312, "logps/rejected": -106.58928680419922, "loss": 0.2, "rewards/accuracies": 1.0, "rewards/chosen": -0.716076672077179, "rewards/margins": 0.709844172000885, "rewards/rejected": -1.425920844078064, "step": 9033 }, { "epoch": 2.0, "learning_rate": 7.74310639843624e-06, "logits/chosen": -1.5945672988891602, "logits/rejected": -1.5945672988891602, "logps/chosen": -190.19129943847656, "logps/rejected": -190.19129943847656, "loss": 0.3469, "rewards/accuracies": 0.0, "rewards/chosen": -11.346068382263184, "rewards/margins": 0.0, "rewards/rejected": -11.346068382263184, "step": 9034 }, { "epoch": 2.0, "learning_rate": 7.741607708726675e-06, "logits/chosen": -1.188664197921753, "logits/rejected": -1.188664197921753, "logps/chosen": -241.50880432128906, "logps/rejected": -241.50880432128906, "loss": 0.3609, "rewards/accuracies": 0.0, "rewards/chosen": -14.1033353805542, "rewards/margins": 0.0, "rewards/rejected": -14.1033353805542, "step": 9035 }, { "epoch": 2.0, "learning_rate": 7.740108666727111e-06, "logits/chosen": -2.2703890800476074, "logits/rejected": -1.211271047592163, "logps/chosen": -162.66363525390625, "logps/rejected": -748.7705078125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.322998046875, "rewards/margins": 60.80078887939453, "rewards/rejected": -62.12378692626953, "step": 9036 }, { "epoch": 2.0, "step": 9036, "total_flos": 0.0, "train_loss": 0.03461877462709143, "train_runtime": 4342.0228, "train_samples_per_second": 4.162, "train_steps_per_second": 2.081 } ], "logging_steps": 1.0, "max_steps": 9036, "num_train_epochs": 2, "save_steps": 3000, "total_flos": 0.0, "trial_name": null, "trial_params": null }