RoyJoy commited on
Commit
eea7365
·
verified ·
1 Parent(s): f5f3efe

Training in progress, step 2780, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c1d40c61f50049679166c8670f167ee8eec009dc1451b0836dd6ac8b5960920
3
  size 4286680
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d36d2e186207b84b2c45712ab427f0ad2a1151e2ecdc7f8e268aa968c72d1ab0
3
  size 4286680
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:468a023458887a6a83bd49071226f7c5d9b0d6b387c6d708f52e3174eed61e23
3
  size 8583659
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ad69c4bbfc0217d682e3dd97cf9a007ca44f2d30692ffba700086ed8a8d7b37
3
  size 8583659
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eefe44a6755636697078a4868989894ccb2a677e6810ecfd50681e082a727be9
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fd9548f5ebcc619ea12bff0f8c85684dd6b490d5e7a15ed245e3fc1b6caa5d3
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c18ce6361aace6924f9f770c3b13702e11bd95c6d550d95d5c9c36e74f3b863
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f7dda6252d67e1f22ef854ad6e21045a9793d2a4a2d5fc69ea513e82b91e37d
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70469f58c7cc5d283fe45c271b4ae5caeabf9236e26b21e5ac156bd9d1337f25
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23fef5b8ea6e3f479d8dce05e71bf47c577836cecdc4e49e2ce1aca8b5c7da16
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e861d20feb9c6d53b41bcd90bb5cb53106357b1ac2f809b7e4aed8fd1286b2fb
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18091ccb9bbda4cc1c7f4d4c03537d6fe4375355face5ebdf3f896691a1ae0d1
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0738a5ecd838b0fa4bd9f9a651e01259796f4e6a7624feb03017a7c8b9f73d98
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc5a87cbe042a37e049f17d5d373d66e08f75e1ca959dda159534184475e9857
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9454283916209656,
5
  "eval_steps": 500,
6
- "global_step": 2629,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -18410,6 +18410,1063 @@
18410
  "learning_rate": 7.812496433630467e-07,
18411
  "loss": 35.0426,
18412
  "step": 2629
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18413
  }
18414
  ],
18415
  "logging_steps": 1,
@@ -18424,12 +19481,12 @@
18424
  "should_evaluate": false,
18425
  "should_log": false,
18426
  "should_save": true,
18427
- "should_training_stop": false
18428
  },
18429
  "attributes": {}
18430
  }
18431
  },
18432
- "total_flos": 4825322313744384.0,
18433
  "train_batch_size": 4,
18434
  "trial_name": null,
18435
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9997302885912074,
5
  "eval_steps": 500,
6
+ "global_step": 2780,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
18410
  "learning_rate": 7.812496433630467e-07,
18411
  "loss": 35.0426,
18412
  "step": 2629
18413
+ },
18414
+ {
18415
+ "epoch": 0.945788006832689,
18416
+ "grad_norm": 6.844422817230225,
18417
+ "learning_rate": 7.709628166416128e-07,
18418
+ "loss": 35.0489,
18419
+ "step": 2630
18420
+ },
18421
+ {
18422
+ "epoch": 0.9461476220444125,
18423
+ "grad_norm": 6.938287734985352,
18424
+ "learning_rate": 7.607436373731736e-07,
18425
+ "loss": 35.0326,
18426
+ "step": 2631
18427
+ },
18428
+ {
18429
+ "epoch": 0.946507237256136,
18430
+ "grad_norm": 6.78154993057251,
18431
+ "learning_rate": 7.505921196002796e-07,
18432
+ "loss": 35.0827,
18433
+ "step": 2632
18434
+ },
18435
+ {
18436
+ "epoch": 0.9468668524678594,
18437
+ "grad_norm": 6.781467914581299,
18438
+ "learning_rate": 7.405082772725225e-07,
18439
+ "loss": 35.0993,
18440
+ "step": 2633
18441
+ },
18442
+ {
18443
+ "epoch": 0.9472264676795829,
18444
+ "grad_norm": 6.844005584716797,
18445
+ "learning_rate": 7.30492124246479e-07,
18446
+ "loss": 35.0527,
18447
+ "step": 2634
18448
+ },
18449
+ {
18450
+ "epoch": 0.9475860828913063,
18451
+ "grad_norm": 7.000504016876221,
18452
+ "learning_rate": 7.205436742857119e-07,
18453
+ "loss": 35.0167,
18454
+ "step": 2635
18455
+ },
18456
+ {
18457
+ "epoch": 0.9479456981030298,
18458
+ "grad_norm": 7.001081466674805,
18459
+ "learning_rate": 7.106629410607691e-07,
18460
+ "loss": 34.9904,
18461
+ "step": 2636
18462
+ },
18463
+ {
18464
+ "epoch": 0.9483053133147532,
18465
+ "grad_norm": 6.970365524291992,
18466
+ "learning_rate": 7.008499381491284e-07,
18467
+ "loss": 35.0496,
18468
+ "step": 2637
18469
+ },
18470
+ {
18471
+ "epoch": 0.9486649285264767,
18472
+ "grad_norm": 6.876420497894287,
18473
+ "learning_rate": 6.911046790351983e-07,
18474
+ "loss": 35.0639,
18475
+ "step": 2638
18476
+ },
18477
+ {
18478
+ "epoch": 0.9490245437382001,
18479
+ "grad_norm": 6.784447193145752,
18480
+ "learning_rate": 6.814271771103109e-07,
18481
+ "loss": 35.156,
18482
+ "step": 2639
18483
+ },
18484
+ {
18485
+ "epoch": 0.9493841589499236,
18486
+ "grad_norm": 7.000516891479492,
18487
+ "learning_rate": 6.718174456726789e-07,
18488
+ "loss": 34.9821,
18489
+ "step": 2640
18490
+ },
18491
+ {
18492
+ "epoch": 0.949743774161647,
18493
+ "grad_norm": 6.875356674194336,
18494
+ "learning_rate": 6.622754979273837e-07,
18495
+ "loss": 35.0327,
18496
+ "step": 2641
18497
+ },
18498
+ {
18499
+ "epoch": 0.9501033893733705,
18500
+ "grad_norm": 6.719158172607422,
18501
+ "learning_rate": 6.528013469863814e-07,
18502
+ "loss": 35.0713,
18503
+ "step": 2642
18504
+ },
18505
+ {
18506
+ "epoch": 0.9504630045850939,
18507
+ "grad_norm": 6.750500679016113,
18508
+ "learning_rate": 6.433950058684468e-07,
18509
+ "loss": 35.1472,
18510
+ "step": 2643
18511
+ },
18512
+ {
18513
+ "epoch": 0.9508226197968174,
18514
+ "grad_norm": 7.063511371612549,
18515
+ "learning_rate": 6.340564874991905e-07,
18516
+ "loss": 34.9856,
18517
+ "step": 2644
18518
+ },
18519
+ {
18520
+ "epoch": 0.9511822350085408,
18521
+ "grad_norm": 6.9381561279296875,
18522
+ "learning_rate": 6.247858047110145e-07,
18523
+ "loss": 34.9976,
18524
+ "step": 2645
18525
+ },
18526
+ {
18527
+ "epoch": 0.9515418502202643,
18528
+ "grad_norm": 6.844141960144043,
18529
+ "learning_rate": 6.15582970243117e-07,
18530
+ "loss": 35.0957,
18531
+ "step": 2646
18532
+ },
18533
+ {
18534
+ "epoch": 0.9519014654319877,
18535
+ "grad_norm": 6.937661170959473,
18536
+ "learning_rate": 6.064479967414383e-07,
18537
+ "loss": 35.0179,
18538
+ "step": 2647
18539
+ },
18540
+ {
18541
+ "epoch": 0.9522610806437112,
18542
+ "grad_norm": 6.937982082366943,
18543
+ "learning_rate": 5.973808967587092e-07,
18544
+ "loss": 35.0541,
18545
+ "step": 2648
18546
+ },
18547
+ {
18548
+ "epoch": 0.9526206958554347,
18549
+ "grad_norm": 6.782608509063721,
18550
+ "learning_rate": 5.88381682754352e-07,
18551
+ "loss": 35.1271,
18552
+ "step": 2649
18553
+ },
18554
+ {
18555
+ "epoch": 0.9529803110671582,
18556
+ "grad_norm": 6.848582744598389,
18557
+ "learning_rate": 5.794503670945195e-07,
18558
+ "loss": 35.0158,
18559
+ "step": 2650
18560
+ },
18561
+ {
18562
+ "epoch": 0.9533399262788816,
18563
+ "grad_norm": 6.90962553024292,
18564
+ "learning_rate": 5.705869620520831e-07,
18565
+ "loss": 35.0575,
18566
+ "step": 2651
18567
+ },
18568
+ {
18569
+ "epoch": 0.9536995414906051,
18570
+ "grad_norm": 6.564623832702637,
18571
+ "learning_rate": 5.617914798065616e-07,
18572
+ "loss": 35.1439,
18573
+ "step": 2652
18574
+ },
18575
+ {
18576
+ "epoch": 0.9540591567023285,
18577
+ "grad_norm": 6.875448226928711,
18578
+ "learning_rate": 5.530639324441589e-07,
18579
+ "loss": 35.0336,
18580
+ "step": 2653
18581
+ },
18582
+ {
18583
+ "epoch": 0.954418771914052,
18584
+ "grad_norm": 6.93787956237793,
18585
+ "learning_rate": 5.444043319577264e-07,
18586
+ "loss": 35.0034,
18587
+ "step": 2654
18588
+ },
18589
+ {
18590
+ "epoch": 0.9547783871257755,
18591
+ "grad_norm": 6.781844139099121,
18592
+ "learning_rate": 5.358126902467397e-07,
18593
+ "loss": 35.1012,
18594
+ "step": 2655
18595
+ },
18596
+ {
18597
+ "epoch": 0.9551380023374989,
18598
+ "grad_norm": 6.938012599945068,
18599
+ "learning_rate": 5.272890191172942e-07,
18600
+ "loss": 35.009,
18601
+ "step": 2656
18602
+ },
18603
+ {
18604
+ "epoch": 0.9554976175492224,
18605
+ "grad_norm": 6.750193119049072,
18606
+ "learning_rate": 5.188333302820814e-07,
18607
+ "loss": 35.1228,
18608
+ "step": 2657
18609
+ },
18610
+ {
18611
+ "epoch": 0.9558572327609458,
18612
+ "grad_norm": 6.750398635864258,
18613
+ "learning_rate": 5.104456353603848e-07,
18614
+ "loss": 35.1784,
18615
+ "step": 2658
18616
+ },
18617
+ {
18618
+ "epoch": 0.9562168479726693,
18619
+ "grad_norm": 6.719043731689453,
18620
+ "learning_rate": 5.021259458780403e-07,
18621
+ "loss": 35.2097,
18622
+ "step": 2659
18623
+ },
18624
+ {
18625
+ "epoch": 0.9565764631843927,
18626
+ "grad_norm": 6.906541347503662,
18627
+ "learning_rate": 4.938742732674529e-07,
18628
+ "loss": 35.0454,
18629
+ "step": 2660
18630
+ },
18631
+ {
18632
+ "epoch": 0.9569360783961162,
18633
+ "grad_norm": 6.938282489776611,
18634
+ "learning_rate": 4.856906288675412e-07,
18635
+ "loss": 35.0274,
18636
+ "step": 2661
18637
+ },
18638
+ {
18639
+ "epoch": 0.9572956936078396,
18640
+ "grad_norm": 7.034068584442139,
18641
+ "learning_rate": 4.775750239237709e-07,
18642
+ "loss": 34.9938,
18643
+ "step": 2662
18644
+ },
18645
+ {
18646
+ "epoch": 0.9576553088195631,
18647
+ "grad_norm": 7.031942844390869,
18648
+ "learning_rate": 4.6952746958808246e-07,
18649
+ "loss": 34.9907,
18650
+ "step": 2663
18651
+ },
18652
+ {
18653
+ "epoch": 0.9580149240312865,
18654
+ "grad_norm": 6.726756572723389,
18655
+ "learning_rate": 4.615479769189246e-07,
18656
+ "loss": 35.2139,
18657
+ "step": 2664
18658
+ },
18659
+ {
18660
+ "epoch": 0.95837453924301,
18661
+ "grad_norm": 6.844308853149414,
18662
+ "learning_rate": 4.5363655688122066e-07,
18663
+ "loss": 35.0669,
18664
+ "step": 2665
18665
+ },
18666
+ {
18667
+ "epoch": 0.9587341544547334,
18668
+ "grad_norm": 6.938045024871826,
18669
+ "learning_rate": 4.457932203463411e-07,
18670
+ "loss": 34.9841,
18671
+ "step": 2666
18672
+ },
18673
+ {
18674
+ "epoch": 0.9590937696664569,
18675
+ "grad_norm": 6.75029182434082,
18676
+ "learning_rate": 4.380179780921034e-07,
18677
+ "loss": 35.1466,
18678
+ "step": 2667
18679
+ },
18680
+ {
18681
+ "epoch": 0.9594533848781803,
18682
+ "grad_norm": 6.876071929931641,
18683
+ "learning_rate": 4.303108408027667e-07,
18684
+ "loss": 35.0857,
18685
+ "step": 2668
18686
+ },
18687
+ {
18688
+ "epoch": 0.9598130000899038,
18689
+ "grad_norm": 6.781948089599609,
18690
+ "learning_rate": 4.226718190689927e-07,
18691
+ "loss": 35.1294,
18692
+ "step": 2669
18693
+ },
18694
+ {
18695
+ "epoch": 0.9601726153016272,
18696
+ "grad_norm": 6.8751606941223145,
18697
+ "learning_rate": 4.1510092338784e-07,
18698
+ "loss": 35.0227,
18699
+ "step": 2670
18700
+ },
18701
+ {
18702
+ "epoch": 0.9605322305133507,
18703
+ "grad_norm": 6.937859058380127,
18704
+ "learning_rate": 4.075981641627646e-07,
18705
+ "loss": 35.0112,
18706
+ "step": 2671
18707
+ },
18708
+ {
18709
+ "epoch": 0.9608918457250741,
18710
+ "grad_norm": 6.812950134277344,
18711
+ "learning_rate": 4.001635517035862e-07,
18712
+ "loss": 35.0691,
18713
+ "step": 2672
18714
+ },
18715
+ {
18716
+ "epoch": 0.9612514609367976,
18717
+ "grad_norm": 6.8443450927734375,
18718
+ "learning_rate": 3.9279709622648266e-07,
18719
+ "loss": 35.0248,
18720
+ "step": 2673
18721
+ },
18722
+ {
18723
+ "epoch": 0.961611076148521,
18724
+ "grad_norm": 7.0011515617370605,
18725
+ "learning_rate": 3.854988078539734e-07,
18726
+ "loss": 34.9897,
18727
+ "step": 2674
18728
+ },
18729
+ {
18730
+ "epoch": 0.9619706913602445,
18731
+ "grad_norm": 7.034828186035156,
18732
+ "learning_rate": 3.782686966149085e-07,
18733
+ "loss": 34.9971,
18734
+ "step": 2675
18735
+ },
18736
+ {
18737
+ "epoch": 0.9623303065719679,
18738
+ "grad_norm": 7.001497268676758,
18739
+ "learning_rate": 3.7110677244445167e-07,
18740
+ "loss": 35.0989,
18741
+ "step": 2676
18742
+ },
18743
+ {
18744
+ "epoch": 0.9626899217836915,
18745
+ "grad_norm": 6.408331394195557,
18746
+ "learning_rate": 3.6401304518406955e-07,
18747
+ "loss": 35.3291,
18748
+ "step": 2677
18749
+ },
18750
+ {
18751
+ "epoch": 0.963049536995415,
18752
+ "grad_norm": 6.8754987716674805,
18753
+ "learning_rate": 3.569875245815202e-07,
18754
+ "loss": 35.0203,
18755
+ "step": 2678
18756
+ },
18757
+ {
18758
+ "epoch": 0.9634091522071384,
18759
+ "grad_norm": 6.937820911407471,
18760
+ "learning_rate": 3.500302202908312e-07,
18761
+ "loss": 35.0325,
18762
+ "step": 2679
18763
+ },
18764
+ {
18765
+ "epoch": 0.9637687674188619,
18766
+ "grad_norm": 6.96920108795166,
18767
+ "learning_rate": 3.4314114187229406e-07,
18768
+ "loss": 35.0312,
18769
+ "step": 2680
18770
+ },
18771
+ {
18772
+ "epoch": 0.9641283826305853,
18773
+ "grad_norm": 6.563116073608398,
18774
+ "learning_rate": 3.363202987924474e-07,
18775
+ "loss": 35.287,
18776
+ "step": 2681
18777
+ },
18778
+ {
18779
+ "epoch": 0.9644879978423088,
18780
+ "grad_norm": 7.000607967376709,
18781
+ "learning_rate": 3.295677004240605e-07,
18782
+ "loss": 34.9819,
18783
+ "step": 2682
18784
+ },
18785
+ {
18786
+ "epoch": 0.9648476130540322,
18787
+ "grad_norm": 6.875179290771484,
18788
+ "learning_rate": 3.228833560461386e-07,
18789
+ "loss": 35.0564,
18790
+ "step": 2683
18791
+ },
18792
+ {
18793
+ "epoch": 0.9652072282657557,
18794
+ "grad_norm": 6.937688827514648,
18795
+ "learning_rate": 3.162672748438844e-07,
18796
+ "loss": 35.0283,
18797
+ "step": 2684
18798
+ },
18799
+ {
18800
+ "epoch": 0.9655668434774791,
18801
+ "grad_norm": 6.844105243682861,
18802
+ "learning_rate": 3.097194659086977e-07,
18803
+ "loss": 35.046,
18804
+ "step": 2685
18805
+ },
18806
+ {
18807
+ "epoch": 0.9659264586892026,
18808
+ "grad_norm": 6.907447814941406,
18809
+ "learning_rate": 3.032399382381812e-07,
18810
+ "loss": 35.0243,
18811
+ "step": 2686
18812
+ },
18813
+ {
18814
+ "epoch": 0.966286073900926,
18815
+ "grad_norm": 6.939781665802002,
18816
+ "learning_rate": 2.9682870073607924e-07,
18817
+ "loss": 35.0202,
18818
+ "step": 2687
18819
+ },
18820
+ {
18821
+ "epoch": 0.9666456891126495,
18822
+ "grad_norm": 6.969338893890381,
18823
+ "learning_rate": 2.904857622123114e-07,
18824
+ "loss": 34.9846,
18825
+ "step": 2688
18826
+ },
18827
+ {
18828
+ "epoch": 0.9670053043243729,
18829
+ "grad_norm": 7.066370487213135,
18830
+ "learning_rate": 2.8421113138296096e-07,
18831
+ "loss": 35.0785,
18832
+ "step": 2689
18833
+ },
18834
+ {
18835
+ "epoch": 0.9673649195360964,
18836
+ "grad_norm": 6.65691614151001,
18837
+ "learning_rate": 2.7800481687021987e-07,
18838
+ "loss": 35.2234,
18839
+ "step": 2690
18840
+ },
18841
+ {
18842
+ "epoch": 0.9677245347478198,
18843
+ "grad_norm": 6.906788349151611,
18844
+ "learning_rate": 2.7186682720241053e-07,
18845
+ "loss": 35.0046,
18846
+ "step": 2691
18847
+ },
18848
+ {
18849
+ "epoch": 0.9680841499595433,
18850
+ "grad_norm": 6.875458717346191,
18851
+ "learning_rate": 2.657971708139917e-07,
18852
+ "loss": 35.0472,
18853
+ "step": 2692
18854
+ },
18855
+ {
18856
+ "epoch": 0.9684437651712667,
18857
+ "grad_norm": 6.875387191772461,
18858
+ "learning_rate": 2.5979585604549164e-07,
18859
+ "loss": 35.0196,
18860
+ "step": 2693
18861
+ },
18862
+ {
18863
+ "epoch": 0.9688033803829902,
18864
+ "grad_norm": 6.938167095184326,
18865
+ "learning_rate": 2.538628911435359e-07,
18866
+ "loss": 35.0231,
18867
+ "step": 2694
18868
+ },
18869
+ {
18870
+ "epoch": 0.9691629955947136,
18871
+ "grad_norm": 7.031561374664307,
18872
+ "learning_rate": 2.4799828426084747e-07,
18873
+ "loss": 34.9725,
18874
+ "step": 2695
18875
+ },
18876
+ {
18877
+ "epoch": 0.9695226108064371,
18878
+ "grad_norm": 6.750325679779053,
18879
+ "learning_rate": 2.4220204345618557e-07,
18880
+ "loss": 35.1077,
18881
+ "step": 2696
18882
+ },
18883
+ {
18884
+ "epoch": 0.9698822260181605,
18885
+ "grad_norm": 6.937902927398682,
18886
+ "learning_rate": 2.364741766943901e-07,
18887
+ "loss": 35.0435,
18888
+ "step": 2697
18889
+ },
18890
+ {
18891
+ "epoch": 0.970241841229884,
18892
+ "grad_norm": 6.844454765319824,
18893
+ "learning_rate": 2.3081469184633164e-07,
18894
+ "loss": 35.1286,
18895
+ "step": 2698
18896
+ },
18897
+ {
18898
+ "epoch": 0.9706014564416074,
18899
+ "grad_norm": 6.876379013061523,
18900
+ "learning_rate": 2.2522359668892268e-07,
18901
+ "loss": 35.0179,
18902
+ "step": 2699
18903
+ },
18904
+ {
18905
+ "epoch": 0.9709610716533309,
18906
+ "grad_norm": 7.064993381500244,
18907
+ "learning_rate": 2.1970089890509527e-07,
18908
+ "loss": 35.0263,
18909
+ "step": 2700
18910
+ },
18911
+ {
18912
+ "epoch": 0.9713206868650544,
18913
+ "grad_norm": 6.846558570861816,
18914
+ "learning_rate": 2.1424660608378998e-07,
18915
+ "loss": 35.0245,
18916
+ "step": 2701
18917
+ },
18918
+ {
18919
+ "epoch": 0.9716803020767778,
18920
+ "grad_norm": 6.1268630027771,
18921
+ "learning_rate": 2.0886072571995597e-07,
18922
+ "loss": 35.5075,
18923
+ "step": 2702
18924
+ },
18925
+ {
18926
+ "epoch": 0.9720399172885013,
18927
+ "grad_norm": 6.87537145614624,
18928
+ "learning_rate": 2.0354326521453414e-07,
18929
+ "loss": 35.0149,
18930
+ "step": 2703
18931
+ },
18932
+ {
18933
+ "epoch": 0.9723995325002247,
18934
+ "grad_norm": 6.844267845153809,
18935
+ "learning_rate": 1.9829423187444074e-07,
18936
+ "loss": 35.0315,
18937
+ "step": 2704
18938
+ },
18939
+ {
18940
+ "epoch": 0.9727591477119483,
18941
+ "grad_norm": 6.875933647155762,
18942
+ "learning_rate": 1.9311363291257268e-07,
18943
+ "loss": 35.0775,
18944
+ "step": 2705
18945
+ },
18946
+ {
18947
+ "epoch": 0.9731187629236717,
18948
+ "grad_norm": 6.884557247161865,
18949
+ "learning_rate": 1.8800147544777435e-07,
18950
+ "loss": 35.081,
18951
+ "step": 2706
18952
+ },
18953
+ {
18954
+ "epoch": 0.9734783781353952,
18955
+ "grad_norm": 6.84420919418335,
18956
+ "learning_rate": 1.829577665048654e-07,
18957
+ "loss": 35.0697,
18958
+ "step": 2707
18959
+ },
18960
+ {
18961
+ "epoch": 0.9738379933471186,
18962
+ "grad_norm": 6.656634330749512,
18963
+ "learning_rate": 1.7798251301458513e-07,
18964
+ "loss": 35.1576,
18965
+ "step": 2708
18966
+ },
18967
+ {
18968
+ "epoch": 0.9741976085588421,
18969
+ "grad_norm": 6.719245433807373,
18970
+ "learning_rate": 1.7307572181361475e-07,
18971
+ "loss": 35.1254,
18972
+ "step": 2709
18973
+ },
18974
+ {
18975
+ "epoch": 0.9745572237705655,
18976
+ "grad_norm": 6.782287120819092,
18977
+ "learning_rate": 1.6823739964456075e-07,
18978
+ "loss": 35.0883,
18979
+ "step": 2710
18980
+ },
18981
+ {
18982
+ "epoch": 0.974916838982289,
18983
+ "grad_norm": 6.750965118408203,
18984
+ "learning_rate": 1.6346755315594375e-07,
18985
+ "loss": 35.0792,
18986
+ "step": 2711
18987
+ },
18988
+ {
18989
+ "epoch": 0.9752764541940124,
18990
+ "grad_norm": 6.970799922943115,
18991
+ "learning_rate": 1.5876618890218186e-07,
18992
+ "loss": 34.9827,
18993
+ "step": 2712
18994
+ },
18995
+ {
18996
+ "epoch": 0.9756360694057359,
18997
+ "grad_norm": 6.938418865203857,
18998
+ "learning_rate": 1.5413331334360182e-07,
18999
+ "loss": 34.9946,
19000
+ "step": 2713
19001
+ },
19002
+ {
19003
+ "epoch": 0.9759956846174593,
19004
+ "grad_norm": 6.911016941070557,
19005
+ "learning_rate": 1.495689328464056e-07,
19006
+ "loss": 35.18,
19007
+ "step": 2714
19008
+ },
19009
+ {
19010
+ "epoch": 0.9763552998291828,
19011
+ "grad_norm": 6.906633377075195,
19012
+ "learning_rate": 1.4507305368268166e-07,
19013
+ "loss": 35.0348,
19014
+ "step": 2715
19015
+ },
19016
+ {
19017
+ "epoch": 0.9767149150409062,
19018
+ "grad_norm": 6.938003063201904,
19019
+ "learning_rate": 1.4064568203037697e-07,
19020
+ "loss": 35.009,
19021
+ "step": 2716
19022
+ },
19023
+ {
19024
+ "epoch": 0.9770745302526297,
19025
+ "grad_norm": 6.781818866729736,
19026
+ "learning_rate": 1.362868239733195e-07,
19027
+ "loss": 35.0737,
19028
+ "step": 2717
19029
+ },
19030
+ {
19031
+ "epoch": 0.9774341454643531,
19032
+ "grad_norm": 6.969637870788574,
19033
+ "learning_rate": 1.3199648550116795e-07,
19034
+ "loss": 35.055,
19035
+ "step": 2718
19036
+ },
19037
+ {
19038
+ "epoch": 0.9777937606760766,
19039
+ "grad_norm": 6.906702995300293,
19040
+ "learning_rate": 1.277746725094453e-07,
19041
+ "loss": 35.0483,
19042
+ "step": 2719
19043
+ },
19044
+ {
19045
+ "epoch": 0.9781533758878,
19046
+ "grad_norm": 7.000565528869629,
19047
+ "learning_rate": 1.236213907994943e-07,
19048
+ "loss": 35.0131,
19049
+ "step": 2720
19050
+ },
19051
+ {
19052
+ "epoch": 0.9785129910995235,
19053
+ "grad_norm": 6.750234603881836,
19054
+ "learning_rate": 1.1953664607849968e-07,
19055
+ "loss": 35.1448,
19056
+ "step": 2721
19057
+ },
19058
+ {
19059
+ "epoch": 0.978872606311247,
19060
+ "grad_norm": 6.8440260887146,
19061
+ "learning_rate": 1.1552044395945482e-07,
19062
+ "loss": 35.1569,
19063
+ "step": 2722
19064
+ },
19065
+ {
19066
+ "epoch": 0.9792322215229704,
19067
+ "grad_norm": 6.937960147857666,
19068
+ "learning_rate": 1.1157278996118404e-07,
19069
+ "loss": 35.0192,
19070
+ "step": 2723
19071
+ },
19072
+ {
19073
+ "epoch": 0.9795918367346939,
19074
+ "grad_norm": 6.845226764678955,
19075
+ "learning_rate": 1.076936895082925e-07,
19076
+ "loss": 35.0441,
19077
+ "step": 2724
19078
+ },
19079
+ {
19080
+ "epoch": 0.9799514519464173,
19081
+ "grad_norm": 7.003016471862793,
19082
+ "learning_rate": 1.0388314793119968e-07,
19083
+ "loss": 35.0005,
19084
+ "step": 2725
19085
+ },
19086
+ {
19087
+ "epoch": 0.9803110671581408,
19088
+ "grad_norm": 6.627377986907959,
19089
+ "learning_rate": 1.0014117046612259e-07,
19090
+ "loss": 35.1722,
19091
+ "step": 2726
19092
+ },
19093
+ {
19094
+ "epoch": 0.9806706823698642,
19095
+ "grad_norm": 7.000593185424805,
19096
+ "learning_rate": 9.646776225503696e-08,
19097
+ "loss": 34.9891,
19098
+ "step": 2727
19099
+ },
19100
+ {
19101
+ "epoch": 0.9810302975815877,
19102
+ "grad_norm": 6.906768798828125,
19103
+ "learning_rate": 9.286292834572164e-08,
19104
+ "loss": 35.0124,
19105
+ "step": 2728
19106
+ },
19107
+ {
19108
+ "epoch": 0.9813899127933111,
19109
+ "grad_norm": 6.9691667556762695,
19110
+ "learning_rate": 8.932667369170866e-08,
19111
+ "loss": 34.9706,
19112
+ "step": 2729
19113
+ },
19114
+ {
19115
+ "epoch": 0.9817495280050346,
19116
+ "grad_norm": 6.844206809997559,
19117
+ "learning_rate": 8.585900315229434e-08,
19118
+ "loss": 35.0817,
19119
+ "step": 2730
19120
+ },
19121
+ {
19122
+ "epoch": 0.982109143216758,
19123
+ "grad_norm": 7.031874656677246,
19124
+ "learning_rate": 8.245992149253923e-08,
19125
+ "loss": 34.9827,
19126
+ "step": 2731
19127
+ },
19128
+ {
19129
+ "epoch": 0.9824687584284815,
19130
+ "grad_norm": 6.781763553619385,
19131
+ "learning_rate": 7.912943338324596e-08,
19132
+ "loss": 35.0802,
19133
+ "step": 2732
19134
+ },
19135
+ {
19136
+ "epoch": 0.982828373640205,
19137
+ "grad_norm": 6.844075679779053,
19138
+ "learning_rate": 7.58675434009648e-08,
19139
+ "loss": 35.017,
19140
+ "step": 2733
19141
+ },
19142
+ {
19143
+ "epoch": 0.9831879888519285,
19144
+ "grad_norm": 6.843966484069824,
19145
+ "learning_rate": 7.26742560279714e-08,
19146
+ "loss": 35.0268,
19147
+ "step": 2734
19148
+ },
19149
+ {
19150
+ "epoch": 0.9835476040636519,
19151
+ "grad_norm": 6.8442463874816895,
19152
+ "learning_rate": 6.954957565228904e-08,
19153
+ "loss": 35.0883,
19154
+ "step": 2735
19155
+ },
19156
+ {
19157
+ "epoch": 0.9839072192753754,
19158
+ "grad_norm": 6.9076337814331055,
19159
+ "learning_rate": 6.64935065676553e-08,
19160
+ "loss": 35.006,
19161
+ "step": 2736
19162
+ },
19163
+ {
19164
+ "epoch": 0.9842668344870988,
19165
+ "grad_norm": 6.93890905380249,
19166
+ "learning_rate": 6.350605297352763e-08,
19167
+ "loss": 35.03,
19168
+ "step": 2737
19169
+ },
19170
+ {
19171
+ "epoch": 0.9846264496988223,
19172
+ "grad_norm": 6.9072394371032715,
19173
+ "learning_rate": 6.05872189750778e-08,
19174
+ "loss": 35.0858,
19175
+ "step": 2738
19176
+ },
19177
+ {
19178
+ "epoch": 0.9849860649105457,
19179
+ "grad_norm": 6.504227638244629,
19180
+ "learning_rate": 5.773700858318631e-08,
19181
+ "loss": 35.4658,
19182
+ "step": 2739
19183
+ },
19184
+ {
19185
+ "epoch": 0.9853456801222692,
19186
+ "grad_norm": 6.875583171844482,
19187
+ "learning_rate": 5.4955425714431353e-08,
19188
+ "loss": 35.0411,
19189
+ "step": 2740
19190
+ },
19191
+ {
19192
+ "epoch": 0.9857052953339926,
19193
+ "grad_norm": 6.938041687011719,
19194
+ "learning_rate": 5.224247419108319e-08,
19195
+ "loss": 34.9976,
19196
+ "step": 2741
19197
+ },
19198
+ {
19199
+ "epoch": 0.9860649105457161,
19200
+ "grad_norm": 6.844311714172363,
19201
+ "learning_rate": 4.9598157741120866e-08,
19202
+ "loss": 35.0794,
19203
+ "step": 2742
19204
+ },
19205
+ {
19206
+ "epoch": 0.9864245257574396,
19207
+ "grad_norm": 6.844622611999512,
19208
+ "learning_rate": 4.702247999819887e-08,
19209
+ "loss": 35.0342,
19210
+ "step": 2743
19211
+ },
19212
+ {
19213
+ "epoch": 0.986784140969163,
19214
+ "grad_norm": 6.906755447387695,
19215
+ "learning_rate": 4.451544450163603e-08,
19216
+ "loss": 35.0091,
19217
+ "step": 2744
19218
+ },
19219
+ {
19220
+ "epoch": 0.9871437561808865,
19221
+ "grad_norm": 6.844326496124268,
19222
+ "learning_rate": 4.207705469645995e-08,
19223
+ "loss": 35.0878,
19224
+ "step": 2745
19225
+ },
19226
+ {
19227
+ "epoch": 0.9875033713926099,
19228
+ "grad_norm": 6.9065752029418945,
19229
+ "learning_rate": 3.9707313933345926e-08,
19230
+ "loss": 35.1009,
19231
+ "step": 2746
19232
+ },
19233
+ {
19234
+ "epoch": 0.9878629866043334,
19235
+ "grad_norm": 6.875674724578857,
19236
+ "learning_rate": 3.740622546863914e-08,
19237
+ "loss": 35.0409,
19238
+ "step": 2747
19239
+ },
19240
+ {
19241
+ "epoch": 0.9882226018160568,
19242
+ "grad_norm": 6.906862735748291,
19243
+ "learning_rate": 3.517379246436026e-08,
19244
+ "loss": 35.026,
19245
+ "step": 2748
19246
+ },
19247
+ {
19248
+ "epoch": 0.9885822170277803,
19249
+ "grad_norm": 6.876607418060303,
19250
+ "learning_rate": 3.3010017988166495e-08,
19251
+ "loss": 35.0271,
19252
+ "step": 2749
19253
+ },
19254
+ {
19255
+ "epoch": 0.9889418322395037,
19256
+ "grad_norm": 7.0033369064331055,
19257
+ "learning_rate": 3.091490501339611e-08,
19258
+ "loss": 35.0037,
19259
+ "step": 2750
19260
+ },
19261
+ {
19262
+ "epoch": 0.9893014474512272,
19263
+ "grad_norm": 6.878208637237549,
19264
+ "learning_rate": 2.888845641900728e-08,
19265
+ "loss": 35.1367,
19266
+ "step": 2751
19267
+ },
19268
+ {
19269
+ "epoch": 0.9896610626629506,
19270
+ "grad_norm": 6.6880035400390625,
19271
+ "learning_rate": 2.6930674989628092e-08,
19272
+ "loss": 35.1422,
19273
+ "step": 2752
19274
+ },
19275
+ {
19276
+ "epoch": 0.9900206778746741,
19277
+ "grad_norm": 6.8129658699035645,
19278
+ "learning_rate": 2.5041563415512115e-08,
19279
+ "loss": 35.1067,
19280
+ "step": 2753
19281
+ },
19282
+ {
19283
+ "epoch": 0.9903802930863975,
19284
+ "grad_norm": 6.875419616699219,
19285
+ "learning_rate": 2.3221124292566176e-08,
19286
+ "loss": 35.0506,
19287
+ "step": 2754
19288
+ },
19289
+ {
19290
+ "epoch": 0.990739908298121,
19291
+ "grad_norm": 6.781773090362549,
19292
+ "learning_rate": 2.146936012231704e-08,
19293
+ "loss": 35.061,
19294
+ "step": 2755
19295
+ },
19296
+ {
19297
+ "epoch": 0.9910995235098444,
19298
+ "grad_norm": 6.813143253326416,
19299
+ "learning_rate": 1.9786273311928062e-08,
19300
+ "loss": 35.0469,
19301
+ "step": 2756
19302
+ },
19303
+ {
19304
+ "epoch": 0.9914591387215679,
19305
+ "grad_norm": 7.0006608963012695,
19306
+ "learning_rate": 1.817186617419364e-08,
19307
+ "loss": 35.0013,
19308
+ "step": 2757
19309
+ },
19310
+ {
19311
+ "epoch": 0.9918187539332913,
19312
+ "grad_norm": 6.781820297241211,
19313
+ "learning_rate": 1.6626140927533673e-08,
19314
+ "loss": 35.1017,
19315
+ "step": 2758
19316
+ },
19317
+ {
19318
+ "epoch": 0.9921783691450148,
19319
+ "grad_norm": 6.781604290008545,
19320
+ "learning_rate": 1.5149099695987988e-08,
19321
+ "loss": 35.0649,
19322
+ "step": 2759
19323
+ },
19324
+ {
19325
+ "epoch": 0.9925379843567383,
19326
+ "grad_norm": 6.844202995300293,
19327
+ "learning_rate": 1.3740744509205261e-08,
19328
+ "loss": 35.0584,
19329
+ "step": 2760
19330
+ },
19331
+ {
19332
+ "epoch": 0.9928975995684618,
19333
+ "grad_norm": 6.813276767730713,
19334
+ "learning_rate": 1.2401077302465203e-08,
19335
+ "loss": 35.0444,
19336
+ "step": 2761
19337
+ },
19338
+ {
19339
+ "epoch": 0.9932572147801852,
19340
+ "grad_norm": 6.9080119132995605,
19341
+ "learning_rate": 1.1130099916650816e-08,
19342
+ "loss": 35.029,
19343
+ "step": 2762
19344
+ },
19345
+ {
19346
+ "epoch": 0.9936168299919087,
19347
+ "grad_norm": 6.939767360687256,
19348
+ "learning_rate": 9.927814098265043e-09,
19349
+ "loss": 35.1383,
19350
+ "step": 2763
19351
+ },
19352
+ {
19353
+ "epoch": 0.9939764452036322,
19354
+ "grad_norm": 6.628198146820068,
19355
+ "learning_rate": 8.794221499408561e-09,
19356
+ "loss": 35.2006,
19357
+ "step": 2764
19358
+ },
19359
+ {
19360
+ "epoch": 0.9943360604153556,
19361
+ "grad_norm": 6.875469207763672,
19362
+ "learning_rate": 7.72932367779089e-09,
19363
+ "loss": 35.0291,
19364
+ "step": 2765
19365
+ },
19366
+ {
19367
+ "epoch": 0.9946956756270791,
19368
+ "grad_norm": 6.812909126281738,
19369
+ "learning_rate": 6.7331220967359336e-09,
19370
+ "loss": 35.1033,
19371
+ "step": 2766
19372
+ },
19373
+ {
19374
+ "epoch": 0.9950552908388025,
19375
+ "grad_norm": 6.937972068786621,
19376
+ "learning_rate": 5.805618125159784e-09,
19377
+ "loss": 35.0225,
19378
+ "step": 2767
19379
+ },
19380
+ {
19381
+ "epoch": 0.995414906050526,
19382
+ "grad_norm": 6.8758111000061035,
19383
+ "learning_rate": 4.94681303757627e-09,
19384
+ "loss": 35.0932,
19385
+ "step": 2768
19386
+ },
19387
+ {
19388
+ "epoch": 0.9957745212622494,
19389
+ "grad_norm": 6.969333648681641,
19390
+ "learning_rate": 4.156708014096955e-09,
19391
+ "loss": 35.0095,
19392
+ "step": 2769
19393
+ },
19394
+ {
19395
+ "epoch": 0.9961341364739729,
19396
+ "grad_norm": 6.812990188598633,
19397
+ "learning_rate": 3.4353041404477927e-09,
19398
+ "loss": 35.0702,
19399
+ "step": 2770
19400
+ },
19401
+ {
19402
+ "epoch": 0.9964937516856963,
19403
+ "grad_norm": 6.8440117835998535,
19404
+ "learning_rate": 2.7826024079247172e-09,
19405
+ "loss": 35.0216,
19406
+ "step": 2771
19407
+ },
19408
+ {
19409
+ "epoch": 0.9968533668974198,
19410
+ "grad_norm": 6.812954902648926,
19411
+ "learning_rate": 2.198603713432501e-09,
19412
+ "loss": 35.0711,
19413
+ "step": 2772
19414
+ },
19415
+ {
19416
+ "epoch": 0.9972129821091432,
19417
+ "grad_norm": 6.7817559242248535,
19418
+ "learning_rate": 1.6833088594736535e-09,
19419
+ "loss": 35.0771,
19420
+ "step": 2773
19421
+ },
19422
+ {
19423
+ "epoch": 0.9975725973208667,
19424
+ "grad_norm": 6.969820022583008,
19425
+ "learning_rate": 1.236718554120664e-09,
19426
+ "loss": 34.9983,
19427
+ "step": 2774
19428
+ },
19429
+ {
19430
+ "epoch": 0.9979322125325901,
19431
+ "grad_norm": 7.004218101501465,
19432
+ "learning_rate": 8.588334110604112e-10,
19433
+ "loss": 35.0482,
19434
+ "step": 2775
19435
+ },
19436
+ {
19437
+ "epoch": 0.9982918277443136,
19438
+ "grad_norm": 6.938352584838867,
19439
+ "learning_rate": 5.496539495553065e-10,
19440
+ "loss": 35.0865,
19441
+ "step": 2776
19442
+ },
19443
+ {
19444
+ "epoch": 0.998651442956037,
19445
+ "grad_norm": 6.906884670257568,
19446
+ "learning_rate": 3.0918059445439464e-10,
19447
+ "loss": 35.0426,
19448
+ "step": 2777
19449
+ },
19450
+ {
19451
+ "epoch": 0.9990110581677605,
19452
+ "grad_norm": 6.906632900238037,
19453
+ "learning_rate": 1.3741367621555866e-10,
19454
+ "loss": 34.9973,
19455
+ "step": 2778
19456
+ },
19457
+ {
19458
+ "epoch": 0.9993706733794839,
19459
+ "grad_norm": 6.71948766708374,
19460
+ "learning_rate": 3.435343085556042e-11,
19461
+ "loss": 35.1191,
19462
+ "step": 2779
19463
+ },
19464
+ {
19465
+ "epoch": 0.9997302885912074,
19466
+ "grad_norm": 6.9692182540893555,
19467
+ "learning_rate": 0.0,
19468
+ "loss": 35.0035,
19469
+ "step": 2780
19470
  }
19471
  ],
19472
  "logging_steps": 1,
 
19481
  "should_evaluate": false,
19482
  "should_log": false,
19483
  "should_save": true,
19484
+ "should_training_stop": true
19485
  },
19486
  "attributes": {}
19487
  }
19488
  },
19489
+ "total_flos": 5100266764369920.0,
19490
  "train_batch_size": 4,
19491
  "trial_name": null,
19492
  "trial_params": null