Training in progress, step 2780, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +1061 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4286680
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d36d2e186207b84b2c45712ab427f0ad2a1151e2ecdc7f8e268aa968c72d1ab0
|
3 |
size 4286680
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 8583659
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ad69c4bbfc0217d682e3dd97cf9a007ca44f2d30692ffba700086ed8a8d7b37
|
3 |
size 8583659
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fd9548f5ebcc619ea12bff0f8c85684dd6b490d5e7a15ed245e3fc1b6caa5d3
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f7dda6252d67e1f22ef854ad6e21045a9793d2a4a2d5fc69ea513e82b91e37d
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23fef5b8ea6e3f479d8dce05e71bf47c577836cecdc4e49e2ce1aca8b5c7da16
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18091ccb9bbda4cc1c7f4d4c03537d6fe4375355face5ebdf3f896691a1ae0d1
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc5a87cbe042a37e049f17d5d373d66e08f75e1ca959dda159534184475e9857
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -18410,6 +18410,1063 @@
|
|
18410 |
"learning_rate": 7.812496433630467e-07,
|
18411 |
"loss": 35.0426,
|
18412 |
"step": 2629
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18413 |
}
|
18414 |
],
|
18415 |
"logging_steps": 1,
|
@@ -18424,12 +19481,12 @@
|
|
18424 |
"should_evaluate": false,
|
18425 |
"should_log": false,
|
18426 |
"should_save": true,
|
18427 |
-
"should_training_stop":
|
18428 |
},
|
18429 |
"attributes": {}
|
18430 |
}
|
18431 |
},
|
18432 |
-
"total_flos":
|
18433 |
"train_batch_size": 4,
|
18434 |
"trial_name": null,
|
18435 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9997302885912074,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2780,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
18410 |
"learning_rate": 7.812496433630467e-07,
|
18411 |
"loss": 35.0426,
|
18412 |
"step": 2629
|
18413 |
+
},
|
18414 |
+
{
|
18415 |
+
"epoch": 0.945788006832689,
|
18416 |
+
"grad_norm": 6.844422817230225,
|
18417 |
+
"learning_rate": 7.709628166416128e-07,
|
18418 |
+
"loss": 35.0489,
|
18419 |
+
"step": 2630
|
18420 |
+
},
|
18421 |
+
{
|
18422 |
+
"epoch": 0.9461476220444125,
|
18423 |
+
"grad_norm": 6.938287734985352,
|
18424 |
+
"learning_rate": 7.607436373731736e-07,
|
18425 |
+
"loss": 35.0326,
|
18426 |
+
"step": 2631
|
18427 |
+
},
|
18428 |
+
{
|
18429 |
+
"epoch": 0.946507237256136,
|
18430 |
+
"grad_norm": 6.78154993057251,
|
18431 |
+
"learning_rate": 7.505921196002796e-07,
|
18432 |
+
"loss": 35.0827,
|
18433 |
+
"step": 2632
|
18434 |
+
},
|
18435 |
+
{
|
18436 |
+
"epoch": 0.9468668524678594,
|
18437 |
+
"grad_norm": 6.781467914581299,
|
18438 |
+
"learning_rate": 7.405082772725225e-07,
|
18439 |
+
"loss": 35.0993,
|
18440 |
+
"step": 2633
|
18441 |
+
},
|
18442 |
+
{
|
18443 |
+
"epoch": 0.9472264676795829,
|
18444 |
+
"grad_norm": 6.844005584716797,
|
18445 |
+
"learning_rate": 7.30492124246479e-07,
|
18446 |
+
"loss": 35.0527,
|
18447 |
+
"step": 2634
|
18448 |
+
},
|
18449 |
+
{
|
18450 |
+
"epoch": 0.9475860828913063,
|
18451 |
+
"grad_norm": 7.000504016876221,
|
18452 |
+
"learning_rate": 7.205436742857119e-07,
|
18453 |
+
"loss": 35.0167,
|
18454 |
+
"step": 2635
|
18455 |
+
},
|
18456 |
+
{
|
18457 |
+
"epoch": 0.9479456981030298,
|
18458 |
+
"grad_norm": 7.001081466674805,
|
18459 |
+
"learning_rate": 7.106629410607691e-07,
|
18460 |
+
"loss": 34.9904,
|
18461 |
+
"step": 2636
|
18462 |
+
},
|
18463 |
+
{
|
18464 |
+
"epoch": 0.9483053133147532,
|
18465 |
+
"grad_norm": 6.970365524291992,
|
18466 |
+
"learning_rate": 7.008499381491284e-07,
|
18467 |
+
"loss": 35.0496,
|
18468 |
+
"step": 2637
|
18469 |
+
},
|
18470 |
+
{
|
18471 |
+
"epoch": 0.9486649285264767,
|
18472 |
+
"grad_norm": 6.876420497894287,
|
18473 |
+
"learning_rate": 6.911046790351983e-07,
|
18474 |
+
"loss": 35.0639,
|
18475 |
+
"step": 2638
|
18476 |
+
},
|
18477 |
+
{
|
18478 |
+
"epoch": 0.9490245437382001,
|
18479 |
+
"grad_norm": 6.784447193145752,
|
18480 |
+
"learning_rate": 6.814271771103109e-07,
|
18481 |
+
"loss": 35.156,
|
18482 |
+
"step": 2639
|
18483 |
+
},
|
18484 |
+
{
|
18485 |
+
"epoch": 0.9493841589499236,
|
18486 |
+
"grad_norm": 7.000516891479492,
|
18487 |
+
"learning_rate": 6.718174456726789e-07,
|
18488 |
+
"loss": 34.9821,
|
18489 |
+
"step": 2640
|
18490 |
+
},
|
18491 |
+
{
|
18492 |
+
"epoch": 0.949743774161647,
|
18493 |
+
"grad_norm": 6.875356674194336,
|
18494 |
+
"learning_rate": 6.622754979273837e-07,
|
18495 |
+
"loss": 35.0327,
|
18496 |
+
"step": 2641
|
18497 |
+
},
|
18498 |
+
{
|
18499 |
+
"epoch": 0.9501033893733705,
|
18500 |
+
"grad_norm": 6.719158172607422,
|
18501 |
+
"learning_rate": 6.528013469863814e-07,
|
18502 |
+
"loss": 35.0713,
|
18503 |
+
"step": 2642
|
18504 |
+
},
|
18505 |
+
{
|
18506 |
+
"epoch": 0.9504630045850939,
|
18507 |
+
"grad_norm": 6.750500679016113,
|
18508 |
+
"learning_rate": 6.433950058684468e-07,
|
18509 |
+
"loss": 35.1472,
|
18510 |
+
"step": 2643
|
18511 |
+
},
|
18512 |
+
{
|
18513 |
+
"epoch": 0.9508226197968174,
|
18514 |
+
"grad_norm": 7.063511371612549,
|
18515 |
+
"learning_rate": 6.340564874991905e-07,
|
18516 |
+
"loss": 34.9856,
|
18517 |
+
"step": 2644
|
18518 |
+
},
|
18519 |
+
{
|
18520 |
+
"epoch": 0.9511822350085408,
|
18521 |
+
"grad_norm": 6.9381561279296875,
|
18522 |
+
"learning_rate": 6.247858047110145e-07,
|
18523 |
+
"loss": 34.9976,
|
18524 |
+
"step": 2645
|
18525 |
+
},
|
18526 |
+
{
|
18527 |
+
"epoch": 0.9515418502202643,
|
18528 |
+
"grad_norm": 6.844141960144043,
|
18529 |
+
"learning_rate": 6.15582970243117e-07,
|
18530 |
+
"loss": 35.0957,
|
18531 |
+
"step": 2646
|
18532 |
+
},
|
18533 |
+
{
|
18534 |
+
"epoch": 0.9519014654319877,
|
18535 |
+
"grad_norm": 6.937661170959473,
|
18536 |
+
"learning_rate": 6.064479967414383e-07,
|
18537 |
+
"loss": 35.0179,
|
18538 |
+
"step": 2647
|
18539 |
+
},
|
18540 |
+
{
|
18541 |
+
"epoch": 0.9522610806437112,
|
18542 |
+
"grad_norm": 6.937982082366943,
|
18543 |
+
"learning_rate": 5.973808967587092e-07,
|
18544 |
+
"loss": 35.0541,
|
18545 |
+
"step": 2648
|
18546 |
+
},
|
18547 |
+
{
|
18548 |
+
"epoch": 0.9526206958554347,
|
18549 |
+
"grad_norm": 6.782608509063721,
|
18550 |
+
"learning_rate": 5.88381682754352e-07,
|
18551 |
+
"loss": 35.1271,
|
18552 |
+
"step": 2649
|
18553 |
+
},
|
18554 |
+
{
|
18555 |
+
"epoch": 0.9529803110671582,
|
18556 |
+
"grad_norm": 6.848582744598389,
|
18557 |
+
"learning_rate": 5.794503670945195e-07,
|
18558 |
+
"loss": 35.0158,
|
18559 |
+
"step": 2650
|
18560 |
+
},
|
18561 |
+
{
|
18562 |
+
"epoch": 0.9533399262788816,
|
18563 |
+
"grad_norm": 6.90962553024292,
|
18564 |
+
"learning_rate": 5.705869620520831e-07,
|
18565 |
+
"loss": 35.0575,
|
18566 |
+
"step": 2651
|
18567 |
+
},
|
18568 |
+
{
|
18569 |
+
"epoch": 0.9536995414906051,
|
18570 |
+
"grad_norm": 6.564623832702637,
|
18571 |
+
"learning_rate": 5.617914798065616e-07,
|
18572 |
+
"loss": 35.1439,
|
18573 |
+
"step": 2652
|
18574 |
+
},
|
18575 |
+
{
|
18576 |
+
"epoch": 0.9540591567023285,
|
18577 |
+
"grad_norm": 6.875448226928711,
|
18578 |
+
"learning_rate": 5.530639324441589e-07,
|
18579 |
+
"loss": 35.0336,
|
18580 |
+
"step": 2653
|
18581 |
+
},
|
18582 |
+
{
|
18583 |
+
"epoch": 0.954418771914052,
|
18584 |
+
"grad_norm": 6.93787956237793,
|
18585 |
+
"learning_rate": 5.444043319577264e-07,
|
18586 |
+
"loss": 35.0034,
|
18587 |
+
"step": 2654
|
18588 |
+
},
|
18589 |
+
{
|
18590 |
+
"epoch": 0.9547783871257755,
|
18591 |
+
"grad_norm": 6.781844139099121,
|
18592 |
+
"learning_rate": 5.358126902467397e-07,
|
18593 |
+
"loss": 35.1012,
|
18594 |
+
"step": 2655
|
18595 |
+
},
|
18596 |
+
{
|
18597 |
+
"epoch": 0.9551380023374989,
|
18598 |
+
"grad_norm": 6.938012599945068,
|
18599 |
+
"learning_rate": 5.272890191172942e-07,
|
18600 |
+
"loss": 35.009,
|
18601 |
+
"step": 2656
|
18602 |
+
},
|
18603 |
+
{
|
18604 |
+
"epoch": 0.9554976175492224,
|
18605 |
+
"grad_norm": 6.750193119049072,
|
18606 |
+
"learning_rate": 5.188333302820814e-07,
|
18607 |
+
"loss": 35.1228,
|
18608 |
+
"step": 2657
|
18609 |
+
},
|
18610 |
+
{
|
18611 |
+
"epoch": 0.9558572327609458,
|
18612 |
+
"grad_norm": 6.750398635864258,
|
18613 |
+
"learning_rate": 5.104456353603848e-07,
|
18614 |
+
"loss": 35.1784,
|
18615 |
+
"step": 2658
|
18616 |
+
},
|
18617 |
+
{
|
18618 |
+
"epoch": 0.9562168479726693,
|
18619 |
+
"grad_norm": 6.719043731689453,
|
18620 |
+
"learning_rate": 5.021259458780403e-07,
|
18621 |
+
"loss": 35.2097,
|
18622 |
+
"step": 2659
|
18623 |
+
},
|
18624 |
+
{
|
18625 |
+
"epoch": 0.9565764631843927,
|
18626 |
+
"grad_norm": 6.906541347503662,
|
18627 |
+
"learning_rate": 4.938742732674529e-07,
|
18628 |
+
"loss": 35.0454,
|
18629 |
+
"step": 2660
|
18630 |
+
},
|
18631 |
+
{
|
18632 |
+
"epoch": 0.9569360783961162,
|
18633 |
+
"grad_norm": 6.938282489776611,
|
18634 |
+
"learning_rate": 4.856906288675412e-07,
|
18635 |
+
"loss": 35.0274,
|
18636 |
+
"step": 2661
|
18637 |
+
},
|
18638 |
+
{
|
18639 |
+
"epoch": 0.9572956936078396,
|
18640 |
+
"grad_norm": 7.034068584442139,
|
18641 |
+
"learning_rate": 4.775750239237709e-07,
|
18642 |
+
"loss": 34.9938,
|
18643 |
+
"step": 2662
|
18644 |
+
},
|
18645 |
+
{
|
18646 |
+
"epoch": 0.9576553088195631,
|
18647 |
+
"grad_norm": 7.031942844390869,
|
18648 |
+
"learning_rate": 4.6952746958808246e-07,
|
18649 |
+
"loss": 34.9907,
|
18650 |
+
"step": 2663
|
18651 |
+
},
|
18652 |
+
{
|
18653 |
+
"epoch": 0.9580149240312865,
|
18654 |
+
"grad_norm": 6.726756572723389,
|
18655 |
+
"learning_rate": 4.615479769189246e-07,
|
18656 |
+
"loss": 35.2139,
|
18657 |
+
"step": 2664
|
18658 |
+
},
|
18659 |
+
{
|
18660 |
+
"epoch": 0.95837453924301,
|
18661 |
+
"grad_norm": 6.844308853149414,
|
18662 |
+
"learning_rate": 4.5363655688122066e-07,
|
18663 |
+
"loss": 35.0669,
|
18664 |
+
"step": 2665
|
18665 |
+
},
|
18666 |
+
{
|
18667 |
+
"epoch": 0.9587341544547334,
|
18668 |
+
"grad_norm": 6.938045024871826,
|
18669 |
+
"learning_rate": 4.457932203463411e-07,
|
18670 |
+
"loss": 34.9841,
|
18671 |
+
"step": 2666
|
18672 |
+
},
|
18673 |
+
{
|
18674 |
+
"epoch": 0.9590937696664569,
|
18675 |
+
"grad_norm": 6.75029182434082,
|
18676 |
+
"learning_rate": 4.380179780921034e-07,
|
18677 |
+
"loss": 35.1466,
|
18678 |
+
"step": 2667
|
18679 |
+
},
|
18680 |
+
{
|
18681 |
+
"epoch": 0.9594533848781803,
|
18682 |
+
"grad_norm": 6.876071929931641,
|
18683 |
+
"learning_rate": 4.303108408027667e-07,
|
18684 |
+
"loss": 35.0857,
|
18685 |
+
"step": 2668
|
18686 |
+
},
|
18687 |
+
{
|
18688 |
+
"epoch": 0.9598130000899038,
|
18689 |
+
"grad_norm": 6.781948089599609,
|
18690 |
+
"learning_rate": 4.226718190689927e-07,
|
18691 |
+
"loss": 35.1294,
|
18692 |
+
"step": 2669
|
18693 |
+
},
|
18694 |
+
{
|
18695 |
+
"epoch": 0.9601726153016272,
|
18696 |
+
"grad_norm": 6.8751606941223145,
|
18697 |
+
"learning_rate": 4.1510092338784e-07,
|
18698 |
+
"loss": 35.0227,
|
18699 |
+
"step": 2670
|
18700 |
+
},
|
18701 |
+
{
|
18702 |
+
"epoch": 0.9605322305133507,
|
18703 |
+
"grad_norm": 6.937859058380127,
|
18704 |
+
"learning_rate": 4.075981641627646e-07,
|
18705 |
+
"loss": 35.0112,
|
18706 |
+
"step": 2671
|
18707 |
+
},
|
18708 |
+
{
|
18709 |
+
"epoch": 0.9608918457250741,
|
18710 |
+
"grad_norm": 6.812950134277344,
|
18711 |
+
"learning_rate": 4.001635517035862e-07,
|
18712 |
+
"loss": 35.0691,
|
18713 |
+
"step": 2672
|
18714 |
+
},
|
18715 |
+
{
|
18716 |
+
"epoch": 0.9612514609367976,
|
18717 |
+
"grad_norm": 6.8443450927734375,
|
18718 |
+
"learning_rate": 3.9279709622648266e-07,
|
18719 |
+
"loss": 35.0248,
|
18720 |
+
"step": 2673
|
18721 |
+
},
|
18722 |
+
{
|
18723 |
+
"epoch": 0.961611076148521,
|
18724 |
+
"grad_norm": 7.0011515617370605,
|
18725 |
+
"learning_rate": 3.854988078539734e-07,
|
18726 |
+
"loss": 34.9897,
|
18727 |
+
"step": 2674
|
18728 |
+
},
|
18729 |
+
{
|
18730 |
+
"epoch": 0.9619706913602445,
|
18731 |
+
"grad_norm": 7.034828186035156,
|
18732 |
+
"learning_rate": 3.782686966149085e-07,
|
18733 |
+
"loss": 34.9971,
|
18734 |
+
"step": 2675
|
18735 |
+
},
|
18736 |
+
{
|
18737 |
+
"epoch": 0.9623303065719679,
|
18738 |
+
"grad_norm": 7.001497268676758,
|
18739 |
+
"learning_rate": 3.7110677244445167e-07,
|
18740 |
+
"loss": 35.0989,
|
18741 |
+
"step": 2676
|
18742 |
+
},
|
18743 |
+
{
|
18744 |
+
"epoch": 0.9626899217836915,
|
18745 |
+
"grad_norm": 6.408331394195557,
|
18746 |
+
"learning_rate": 3.6401304518406955e-07,
|
18747 |
+
"loss": 35.3291,
|
18748 |
+
"step": 2677
|
18749 |
+
},
|
18750 |
+
{
|
18751 |
+
"epoch": 0.963049536995415,
|
18752 |
+
"grad_norm": 6.8754987716674805,
|
18753 |
+
"learning_rate": 3.569875245815202e-07,
|
18754 |
+
"loss": 35.0203,
|
18755 |
+
"step": 2678
|
18756 |
+
},
|
18757 |
+
{
|
18758 |
+
"epoch": 0.9634091522071384,
|
18759 |
+
"grad_norm": 6.937820911407471,
|
18760 |
+
"learning_rate": 3.500302202908312e-07,
|
18761 |
+
"loss": 35.0325,
|
18762 |
+
"step": 2679
|
18763 |
+
},
|
18764 |
+
{
|
18765 |
+
"epoch": 0.9637687674188619,
|
18766 |
+
"grad_norm": 6.96920108795166,
|
18767 |
+
"learning_rate": 3.4314114187229406e-07,
|
18768 |
+
"loss": 35.0312,
|
18769 |
+
"step": 2680
|
18770 |
+
},
|
18771 |
+
{
|
18772 |
+
"epoch": 0.9641283826305853,
|
18773 |
+
"grad_norm": 6.563116073608398,
|
18774 |
+
"learning_rate": 3.363202987924474e-07,
|
18775 |
+
"loss": 35.287,
|
18776 |
+
"step": 2681
|
18777 |
+
},
|
18778 |
+
{
|
18779 |
+
"epoch": 0.9644879978423088,
|
18780 |
+
"grad_norm": 7.000607967376709,
|
18781 |
+
"learning_rate": 3.295677004240605e-07,
|
18782 |
+
"loss": 34.9819,
|
18783 |
+
"step": 2682
|
18784 |
+
},
|
18785 |
+
{
|
18786 |
+
"epoch": 0.9648476130540322,
|
18787 |
+
"grad_norm": 6.875179290771484,
|
18788 |
+
"learning_rate": 3.228833560461386e-07,
|
18789 |
+
"loss": 35.0564,
|
18790 |
+
"step": 2683
|
18791 |
+
},
|
18792 |
+
{
|
18793 |
+
"epoch": 0.9652072282657557,
|
18794 |
+
"grad_norm": 6.937688827514648,
|
18795 |
+
"learning_rate": 3.162672748438844e-07,
|
18796 |
+
"loss": 35.0283,
|
18797 |
+
"step": 2684
|
18798 |
+
},
|
18799 |
+
{
|
18800 |
+
"epoch": 0.9655668434774791,
|
18801 |
+
"grad_norm": 6.844105243682861,
|
18802 |
+
"learning_rate": 3.097194659086977e-07,
|
18803 |
+
"loss": 35.046,
|
18804 |
+
"step": 2685
|
18805 |
+
},
|
18806 |
+
{
|
18807 |
+
"epoch": 0.9659264586892026,
|
18808 |
+
"grad_norm": 6.907447814941406,
|
18809 |
+
"learning_rate": 3.032399382381812e-07,
|
18810 |
+
"loss": 35.0243,
|
18811 |
+
"step": 2686
|
18812 |
+
},
|
18813 |
+
{
|
18814 |
+
"epoch": 0.966286073900926,
|
18815 |
+
"grad_norm": 6.939781665802002,
|
18816 |
+
"learning_rate": 2.9682870073607924e-07,
|
18817 |
+
"loss": 35.0202,
|
18818 |
+
"step": 2687
|
18819 |
+
},
|
18820 |
+
{
|
18821 |
+
"epoch": 0.9666456891126495,
|
18822 |
+
"grad_norm": 6.969338893890381,
|
18823 |
+
"learning_rate": 2.904857622123114e-07,
|
18824 |
+
"loss": 34.9846,
|
18825 |
+
"step": 2688
|
18826 |
+
},
|
18827 |
+
{
|
18828 |
+
"epoch": 0.9670053043243729,
|
18829 |
+
"grad_norm": 7.066370487213135,
|
18830 |
+
"learning_rate": 2.8421113138296096e-07,
|
18831 |
+
"loss": 35.0785,
|
18832 |
+
"step": 2689
|
18833 |
+
},
|
18834 |
+
{
|
18835 |
+
"epoch": 0.9673649195360964,
|
18836 |
+
"grad_norm": 6.65691614151001,
|
18837 |
+
"learning_rate": 2.7800481687021987e-07,
|
18838 |
+
"loss": 35.2234,
|
18839 |
+
"step": 2690
|
18840 |
+
},
|
18841 |
+
{
|
18842 |
+
"epoch": 0.9677245347478198,
|
18843 |
+
"grad_norm": 6.906788349151611,
|
18844 |
+
"learning_rate": 2.7186682720241053e-07,
|
18845 |
+
"loss": 35.0046,
|
18846 |
+
"step": 2691
|
18847 |
+
},
|
18848 |
+
{
|
18849 |
+
"epoch": 0.9680841499595433,
|
18850 |
+
"grad_norm": 6.875458717346191,
|
18851 |
+
"learning_rate": 2.657971708139917e-07,
|
18852 |
+
"loss": 35.0472,
|
18853 |
+
"step": 2692
|
18854 |
+
},
|
18855 |
+
{
|
18856 |
+
"epoch": 0.9684437651712667,
|
18857 |
+
"grad_norm": 6.875387191772461,
|
18858 |
+
"learning_rate": 2.5979585604549164e-07,
|
18859 |
+
"loss": 35.0196,
|
18860 |
+
"step": 2693
|
18861 |
+
},
|
18862 |
+
{
|
18863 |
+
"epoch": 0.9688033803829902,
|
18864 |
+
"grad_norm": 6.938167095184326,
|
18865 |
+
"learning_rate": 2.538628911435359e-07,
|
18866 |
+
"loss": 35.0231,
|
18867 |
+
"step": 2694
|
18868 |
+
},
|
18869 |
+
{
|
18870 |
+
"epoch": 0.9691629955947136,
|
18871 |
+
"grad_norm": 7.031561374664307,
|
18872 |
+
"learning_rate": 2.4799828426084747e-07,
|
18873 |
+
"loss": 34.9725,
|
18874 |
+
"step": 2695
|
18875 |
+
},
|
18876 |
+
{
|
18877 |
+
"epoch": 0.9695226108064371,
|
18878 |
+
"grad_norm": 6.750325679779053,
|
18879 |
+
"learning_rate": 2.4220204345618557e-07,
|
18880 |
+
"loss": 35.1077,
|
18881 |
+
"step": 2696
|
18882 |
+
},
|
18883 |
+
{
|
18884 |
+
"epoch": 0.9698822260181605,
|
18885 |
+
"grad_norm": 6.937902927398682,
|
18886 |
+
"learning_rate": 2.364741766943901e-07,
|
18887 |
+
"loss": 35.0435,
|
18888 |
+
"step": 2697
|
18889 |
+
},
|
18890 |
+
{
|
18891 |
+
"epoch": 0.970241841229884,
|
18892 |
+
"grad_norm": 6.844454765319824,
|
18893 |
+
"learning_rate": 2.3081469184633164e-07,
|
18894 |
+
"loss": 35.1286,
|
18895 |
+
"step": 2698
|
18896 |
+
},
|
18897 |
+
{
|
18898 |
+
"epoch": 0.9706014564416074,
|
18899 |
+
"grad_norm": 6.876379013061523,
|
18900 |
+
"learning_rate": 2.2522359668892268e-07,
|
18901 |
+
"loss": 35.0179,
|
18902 |
+
"step": 2699
|
18903 |
+
},
|
18904 |
+
{
|
18905 |
+
"epoch": 0.9709610716533309,
|
18906 |
+
"grad_norm": 7.064993381500244,
|
18907 |
+
"learning_rate": 2.1970089890509527e-07,
|
18908 |
+
"loss": 35.0263,
|
18909 |
+
"step": 2700
|
18910 |
+
},
|
18911 |
+
{
|
18912 |
+
"epoch": 0.9713206868650544,
|
18913 |
+
"grad_norm": 6.846558570861816,
|
18914 |
+
"learning_rate": 2.1424660608378998e-07,
|
18915 |
+
"loss": 35.0245,
|
18916 |
+
"step": 2701
|
18917 |
+
},
|
18918 |
+
{
|
18919 |
+
"epoch": 0.9716803020767778,
|
18920 |
+
"grad_norm": 6.1268630027771,
|
18921 |
+
"learning_rate": 2.0886072571995597e-07,
|
18922 |
+
"loss": 35.5075,
|
18923 |
+
"step": 2702
|
18924 |
+
},
|
18925 |
+
{
|
18926 |
+
"epoch": 0.9720399172885013,
|
18927 |
+
"grad_norm": 6.87537145614624,
|
18928 |
+
"learning_rate": 2.0354326521453414e-07,
|
18929 |
+
"loss": 35.0149,
|
18930 |
+
"step": 2703
|
18931 |
+
},
|
18932 |
+
{
|
18933 |
+
"epoch": 0.9723995325002247,
|
18934 |
+
"grad_norm": 6.844267845153809,
|
18935 |
+
"learning_rate": 1.9829423187444074e-07,
|
18936 |
+
"loss": 35.0315,
|
18937 |
+
"step": 2704
|
18938 |
+
},
|
18939 |
+
{
|
18940 |
+
"epoch": 0.9727591477119483,
|
18941 |
+
"grad_norm": 6.875933647155762,
|
18942 |
+
"learning_rate": 1.9311363291257268e-07,
|
18943 |
+
"loss": 35.0775,
|
18944 |
+
"step": 2705
|
18945 |
+
},
|
18946 |
+
{
|
18947 |
+
"epoch": 0.9731187629236717,
|
18948 |
+
"grad_norm": 6.884557247161865,
|
18949 |
+
"learning_rate": 1.8800147544777435e-07,
|
18950 |
+
"loss": 35.081,
|
18951 |
+
"step": 2706
|
18952 |
+
},
|
18953 |
+
{
|
18954 |
+
"epoch": 0.9734783781353952,
|
18955 |
+
"grad_norm": 6.84420919418335,
|
18956 |
+
"learning_rate": 1.829577665048654e-07,
|
18957 |
+
"loss": 35.0697,
|
18958 |
+
"step": 2707
|
18959 |
+
},
|
18960 |
+
{
|
18961 |
+
"epoch": 0.9738379933471186,
|
18962 |
+
"grad_norm": 6.656634330749512,
|
18963 |
+
"learning_rate": 1.7798251301458513e-07,
|
18964 |
+
"loss": 35.1576,
|
18965 |
+
"step": 2708
|
18966 |
+
},
|
18967 |
+
{
|
18968 |
+
"epoch": 0.9741976085588421,
|
18969 |
+
"grad_norm": 6.719245433807373,
|
18970 |
+
"learning_rate": 1.7307572181361475e-07,
|
18971 |
+
"loss": 35.1254,
|
18972 |
+
"step": 2709
|
18973 |
+
},
|
18974 |
+
{
|
18975 |
+
"epoch": 0.9745572237705655,
|
18976 |
+
"grad_norm": 6.782287120819092,
|
18977 |
+
"learning_rate": 1.6823739964456075e-07,
|
18978 |
+
"loss": 35.0883,
|
18979 |
+
"step": 2710
|
18980 |
+
},
|
18981 |
+
{
|
18982 |
+
"epoch": 0.974916838982289,
|
18983 |
+
"grad_norm": 6.750965118408203,
|
18984 |
+
"learning_rate": 1.6346755315594375e-07,
|
18985 |
+
"loss": 35.0792,
|
18986 |
+
"step": 2711
|
18987 |
+
},
|
18988 |
+
{
|
18989 |
+
"epoch": 0.9752764541940124,
|
18990 |
+
"grad_norm": 6.970799922943115,
|
18991 |
+
"learning_rate": 1.5876618890218186e-07,
|
18992 |
+
"loss": 34.9827,
|
18993 |
+
"step": 2712
|
18994 |
+
},
|
18995 |
+
{
|
18996 |
+
"epoch": 0.9756360694057359,
|
18997 |
+
"grad_norm": 6.938418865203857,
|
18998 |
+
"learning_rate": 1.5413331334360182e-07,
|
18999 |
+
"loss": 34.9946,
|
19000 |
+
"step": 2713
|
19001 |
+
},
|
19002 |
+
{
|
19003 |
+
"epoch": 0.9759956846174593,
|
19004 |
+
"grad_norm": 6.911016941070557,
|
19005 |
+
"learning_rate": 1.495689328464056e-07,
|
19006 |
+
"loss": 35.18,
|
19007 |
+
"step": 2714
|
19008 |
+
},
|
19009 |
+
{
|
19010 |
+
"epoch": 0.9763552998291828,
|
19011 |
+
"grad_norm": 6.906633377075195,
|
19012 |
+
"learning_rate": 1.4507305368268166e-07,
|
19013 |
+
"loss": 35.0348,
|
19014 |
+
"step": 2715
|
19015 |
+
},
|
19016 |
+
{
|
19017 |
+
"epoch": 0.9767149150409062,
|
19018 |
+
"grad_norm": 6.938003063201904,
|
19019 |
+
"learning_rate": 1.4064568203037697e-07,
|
19020 |
+
"loss": 35.009,
|
19021 |
+
"step": 2716
|
19022 |
+
},
|
19023 |
+
{
|
19024 |
+
"epoch": 0.9770745302526297,
|
19025 |
+
"grad_norm": 6.781818866729736,
|
19026 |
+
"learning_rate": 1.362868239733195e-07,
|
19027 |
+
"loss": 35.0737,
|
19028 |
+
"step": 2717
|
19029 |
+
},
|
19030 |
+
{
|
19031 |
+
"epoch": 0.9774341454643531,
|
19032 |
+
"grad_norm": 6.969637870788574,
|
19033 |
+
"learning_rate": 1.3199648550116795e-07,
|
19034 |
+
"loss": 35.055,
|
19035 |
+
"step": 2718
|
19036 |
+
},
|
19037 |
+
{
|
19038 |
+
"epoch": 0.9777937606760766,
|
19039 |
+
"grad_norm": 6.906702995300293,
|
19040 |
+
"learning_rate": 1.277746725094453e-07,
|
19041 |
+
"loss": 35.0483,
|
19042 |
+
"step": 2719
|
19043 |
+
},
|
19044 |
+
{
|
19045 |
+
"epoch": 0.9781533758878,
|
19046 |
+
"grad_norm": 7.000565528869629,
|
19047 |
+
"learning_rate": 1.236213907994943e-07,
|
19048 |
+
"loss": 35.0131,
|
19049 |
+
"step": 2720
|
19050 |
+
},
|
19051 |
+
{
|
19052 |
+
"epoch": 0.9785129910995235,
|
19053 |
+
"grad_norm": 6.750234603881836,
|
19054 |
+
"learning_rate": 1.1953664607849968e-07,
|
19055 |
+
"loss": 35.1448,
|
19056 |
+
"step": 2721
|
19057 |
+
},
|
19058 |
+
{
|
19059 |
+
"epoch": 0.978872606311247,
|
19060 |
+
"grad_norm": 6.8440260887146,
|
19061 |
+
"learning_rate": 1.1552044395945482e-07,
|
19062 |
+
"loss": 35.1569,
|
19063 |
+
"step": 2722
|
19064 |
+
},
|
19065 |
+
{
|
19066 |
+
"epoch": 0.9792322215229704,
|
19067 |
+
"grad_norm": 6.937960147857666,
|
19068 |
+
"learning_rate": 1.1157278996118404e-07,
|
19069 |
+
"loss": 35.0192,
|
19070 |
+
"step": 2723
|
19071 |
+
},
|
19072 |
+
{
|
19073 |
+
"epoch": 0.9795918367346939,
|
19074 |
+
"grad_norm": 6.845226764678955,
|
19075 |
+
"learning_rate": 1.076936895082925e-07,
|
19076 |
+
"loss": 35.0441,
|
19077 |
+
"step": 2724
|
19078 |
+
},
|
19079 |
+
{
|
19080 |
+
"epoch": 0.9799514519464173,
|
19081 |
+
"grad_norm": 7.003016471862793,
|
19082 |
+
"learning_rate": 1.0388314793119968e-07,
|
19083 |
+
"loss": 35.0005,
|
19084 |
+
"step": 2725
|
19085 |
+
},
|
19086 |
+
{
|
19087 |
+
"epoch": 0.9803110671581408,
|
19088 |
+
"grad_norm": 6.627377986907959,
|
19089 |
+
"learning_rate": 1.0014117046612259e-07,
|
19090 |
+
"loss": 35.1722,
|
19091 |
+
"step": 2726
|
19092 |
+
},
|
19093 |
+
{
|
19094 |
+
"epoch": 0.9806706823698642,
|
19095 |
+
"grad_norm": 7.000593185424805,
|
19096 |
+
"learning_rate": 9.646776225503696e-08,
|
19097 |
+
"loss": 34.9891,
|
19098 |
+
"step": 2727
|
19099 |
+
},
|
19100 |
+
{
|
19101 |
+
"epoch": 0.9810302975815877,
|
19102 |
+
"grad_norm": 6.906768798828125,
|
19103 |
+
"learning_rate": 9.286292834572164e-08,
|
19104 |
+
"loss": 35.0124,
|
19105 |
+
"step": 2728
|
19106 |
+
},
|
19107 |
+
{
|
19108 |
+
"epoch": 0.9813899127933111,
|
19109 |
+
"grad_norm": 6.9691667556762695,
|
19110 |
+
"learning_rate": 8.932667369170866e-08,
|
19111 |
+
"loss": 34.9706,
|
19112 |
+
"step": 2729
|
19113 |
+
},
|
19114 |
+
{
|
19115 |
+
"epoch": 0.9817495280050346,
|
19116 |
+
"grad_norm": 6.844206809997559,
|
19117 |
+
"learning_rate": 8.585900315229434e-08,
|
19118 |
+
"loss": 35.0817,
|
19119 |
+
"step": 2730
|
19120 |
+
},
|
19121 |
+
{
|
19122 |
+
"epoch": 0.982109143216758,
|
19123 |
+
"grad_norm": 7.031874656677246,
|
19124 |
+
"learning_rate": 8.245992149253923e-08,
|
19125 |
+
"loss": 34.9827,
|
19126 |
+
"step": 2731
|
19127 |
+
},
|
19128 |
+
{
|
19129 |
+
"epoch": 0.9824687584284815,
|
19130 |
+
"grad_norm": 6.781763553619385,
|
19131 |
+
"learning_rate": 7.912943338324596e-08,
|
19132 |
+
"loss": 35.0802,
|
19133 |
+
"step": 2732
|
19134 |
+
},
|
19135 |
+
{
|
19136 |
+
"epoch": 0.982828373640205,
|
19137 |
+
"grad_norm": 6.844075679779053,
|
19138 |
+
"learning_rate": 7.58675434009648e-08,
|
19139 |
+
"loss": 35.017,
|
19140 |
+
"step": 2733
|
19141 |
+
},
|
19142 |
+
{
|
19143 |
+
"epoch": 0.9831879888519285,
|
19144 |
+
"grad_norm": 6.843966484069824,
|
19145 |
+
"learning_rate": 7.26742560279714e-08,
|
19146 |
+
"loss": 35.0268,
|
19147 |
+
"step": 2734
|
19148 |
+
},
|
19149 |
+
{
|
19150 |
+
"epoch": 0.9835476040636519,
|
19151 |
+
"grad_norm": 6.8442463874816895,
|
19152 |
+
"learning_rate": 6.954957565228904e-08,
|
19153 |
+
"loss": 35.0883,
|
19154 |
+
"step": 2735
|
19155 |
+
},
|
19156 |
+
{
|
19157 |
+
"epoch": 0.9839072192753754,
|
19158 |
+
"grad_norm": 6.9076337814331055,
|
19159 |
+
"learning_rate": 6.64935065676553e-08,
|
19160 |
+
"loss": 35.006,
|
19161 |
+
"step": 2736
|
19162 |
+
},
|
19163 |
+
{
|
19164 |
+
"epoch": 0.9842668344870988,
|
19165 |
+
"grad_norm": 6.93890905380249,
|
19166 |
+
"learning_rate": 6.350605297352763e-08,
|
19167 |
+
"loss": 35.03,
|
19168 |
+
"step": 2737
|
19169 |
+
},
|
19170 |
+
{
|
19171 |
+
"epoch": 0.9846264496988223,
|
19172 |
+
"grad_norm": 6.9072394371032715,
|
19173 |
+
"learning_rate": 6.05872189750778e-08,
|
19174 |
+
"loss": 35.0858,
|
19175 |
+
"step": 2738
|
19176 |
+
},
|
19177 |
+
{
|
19178 |
+
"epoch": 0.9849860649105457,
|
19179 |
+
"grad_norm": 6.504227638244629,
|
19180 |
+
"learning_rate": 5.773700858318631e-08,
|
19181 |
+
"loss": 35.4658,
|
19182 |
+
"step": 2739
|
19183 |
+
},
|
19184 |
+
{
|
19185 |
+
"epoch": 0.9853456801222692,
|
19186 |
+
"grad_norm": 6.875583171844482,
|
19187 |
+
"learning_rate": 5.4955425714431353e-08,
|
19188 |
+
"loss": 35.0411,
|
19189 |
+
"step": 2740
|
19190 |
+
},
|
19191 |
+
{
|
19192 |
+
"epoch": 0.9857052953339926,
|
19193 |
+
"grad_norm": 6.938041687011719,
|
19194 |
+
"learning_rate": 5.224247419108319e-08,
|
19195 |
+
"loss": 34.9976,
|
19196 |
+
"step": 2741
|
19197 |
+
},
|
19198 |
+
{
|
19199 |
+
"epoch": 0.9860649105457161,
|
19200 |
+
"grad_norm": 6.844311714172363,
|
19201 |
+
"learning_rate": 4.9598157741120866e-08,
|
19202 |
+
"loss": 35.0794,
|
19203 |
+
"step": 2742
|
19204 |
+
},
|
19205 |
+
{
|
19206 |
+
"epoch": 0.9864245257574396,
|
19207 |
+
"grad_norm": 6.844622611999512,
|
19208 |
+
"learning_rate": 4.702247999819887e-08,
|
19209 |
+
"loss": 35.0342,
|
19210 |
+
"step": 2743
|
19211 |
+
},
|
19212 |
+
{
|
19213 |
+
"epoch": 0.986784140969163,
|
19214 |
+
"grad_norm": 6.906755447387695,
|
19215 |
+
"learning_rate": 4.451544450163603e-08,
|
19216 |
+
"loss": 35.0091,
|
19217 |
+
"step": 2744
|
19218 |
+
},
|
19219 |
+
{
|
19220 |
+
"epoch": 0.9871437561808865,
|
19221 |
+
"grad_norm": 6.844326496124268,
|
19222 |
+
"learning_rate": 4.207705469645995e-08,
|
19223 |
+
"loss": 35.0878,
|
19224 |
+
"step": 2745
|
19225 |
+
},
|
19226 |
+
{
|
19227 |
+
"epoch": 0.9875033713926099,
|
19228 |
+
"grad_norm": 6.9065752029418945,
|
19229 |
+
"learning_rate": 3.9707313933345926e-08,
|
19230 |
+
"loss": 35.1009,
|
19231 |
+
"step": 2746
|
19232 |
+
},
|
19233 |
+
{
|
19234 |
+
"epoch": 0.9878629866043334,
|
19235 |
+
"grad_norm": 6.875674724578857,
|
19236 |
+
"learning_rate": 3.740622546863914e-08,
|
19237 |
+
"loss": 35.0409,
|
19238 |
+
"step": 2747
|
19239 |
+
},
|
19240 |
+
{
|
19241 |
+
"epoch": 0.9882226018160568,
|
19242 |
+
"grad_norm": 6.906862735748291,
|
19243 |
+
"learning_rate": 3.517379246436026e-08,
|
19244 |
+
"loss": 35.026,
|
19245 |
+
"step": 2748
|
19246 |
+
},
|
19247 |
+
{
|
19248 |
+
"epoch": 0.9885822170277803,
|
19249 |
+
"grad_norm": 6.876607418060303,
|
19250 |
+
"learning_rate": 3.3010017988166495e-08,
|
19251 |
+
"loss": 35.0271,
|
19252 |
+
"step": 2749
|
19253 |
+
},
|
19254 |
+
{
|
19255 |
+
"epoch": 0.9889418322395037,
|
19256 |
+
"grad_norm": 7.0033369064331055,
|
19257 |
+
"learning_rate": 3.091490501339611e-08,
|
19258 |
+
"loss": 35.0037,
|
19259 |
+
"step": 2750
|
19260 |
+
},
|
19261 |
+
{
|
19262 |
+
"epoch": 0.9893014474512272,
|
19263 |
+
"grad_norm": 6.878208637237549,
|
19264 |
+
"learning_rate": 2.888845641900728e-08,
|
19265 |
+
"loss": 35.1367,
|
19266 |
+
"step": 2751
|
19267 |
+
},
|
19268 |
+
{
|
19269 |
+
"epoch": 0.9896610626629506,
|
19270 |
+
"grad_norm": 6.6880035400390625,
|
19271 |
+
"learning_rate": 2.6930674989628092e-08,
|
19272 |
+
"loss": 35.1422,
|
19273 |
+
"step": 2752
|
19274 |
+
},
|
19275 |
+
{
|
19276 |
+
"epoch": 0.9900206778746741,
|
19277 |
+
"grad_norm": 6.8129658699035645,
|
19278 |
+
"learning_rate": 2.5041563415512115e-08,
|
19279 |
+
"loss": 35.1067,
|
19280 |
+
"step": 2753
|
19281 |
+
},
|
19282 |
+
{
|
19283 |
+
"epoch": 0.9903802930863975,
|
19284 |
+
"grad_norm": 6.875419616699219,
|
19285 |
+
"learning_rate": 2.3221124292566176e-08,
|
19286 |
+
"loss": 35.0506,
|
19287 |
+
"step": 2754
|
19288 |
+
},
|
19289 |
+
{
|
19290 |
+
"epoch": 0.990739908298121,
|
19291 |
+
"grad_norm": 6.781773090362549,
|
19292 |
+
"learning_rate": 2.146936012231704e-08,
|
19293 |
+
"loss": 35.061,
|
19294 |
+
"step": 2755
|
19295 |
+
},
|
19296 |
+
{
|
19297 |
+
"epoch": 0.9910995235098444,
|
19298 |
+
"grad_norm": 6.813143253326416,
|
19299 |
+
"learning_rate": 1.9786273311928062e-08,
|
19300 |
+
"loss": 35.0469,
|
19301 |
+
"step": 2756
|
19302 |
+
},
|
19303 |
+
{
|
19304 |
+
"epoch": 0.9914591387215679,
|
19305 |
+
"grad_norm": 7.0006608963012695,
|
19306 |
+
"learning_rate": 1.817186617419364e-08,
|
19307 |
+
"loss": 35.0013,
|
19308 |
+
"step": 2757
|
19309 |
+
},
|
19310 |
+
{
|
19311 |
+
"epoch": 0.9918187539332913,
|
19312 |
+
"grad_norm": 6.781820297241211,
|
19313 |
+
"learning_rate": 1.6626140927533673e-08,
|
19314 |
+
"loss": 35.1017,
|
19315 |
+
"step": 2758
|
19316 |
+
},
|
19317 |
+
{
|
19318 |
+
"epoch": 0.9921783691450148,
|
19319 |
+
"grad_norm": 6.781604290008545,
|
19320 |
+
"learning_rate": 1.5149099695987988e-08,
|
19321 |
+
"loss": 35.0649,
|
19322 |
+
"step": 2759
|
19323 |
+
},
|
19324 |
+
{
|
19325 |
+
"epoch": 0.9925379843567383,
|
19326 |
+
"grad_norm": 6.844202995300293,
|
19327 |
+
"learning_rate": 1.3740744509205261e-08,
|
19328 |
+
"loss": 35.0584,
|
19329 |
+
"step": 2760
|
19330 |
+
},
|
19331 |
+
{
|
19332 |
+
"epoch": 0.9928975995684618,
|
19333 |
+
"grad_norm": 6.813276767730713,
|
19334 |
+
"learning_rate": 1.2401077302465203e-08,
|
19335 |
+
"loss": 35.0444,
|
19336 |
+
"step": 2761
|
19337 |
+
},
|
19338 |
+
{
|
19339 |
+
"epoch": 0.9932572147801852,
|
19340 |
+
"grad_norm": 6.9080119132995605,
|
19341 |
+
"learning_rate": 1.1130099916650816e-08,
|
19342 |
+
"loss": 35.029,
|
19343 |
+
"step": 2762
|
19344 |
+
},
|
19345 |
+
{
|
19346 |
+
"epoch": 0.9936168299919087,
|
19347 |
+
"grad_norm": 6.939767360687256,
|
19348 |
+
"learning_rate": 9.927814098265043e-09,
|
19349 |
+
"loss": 35.1383,
|
19350 |
+
"step": 2763
|
19351 |
+
},
|
19352 |
+
{
|
19353 |
+
"epoch": 0.9939764452036322,
|
19354 |
+
"grad_norm": 6.628198146820068,
|
19355 |
+
"learning_rate": 8.794221499408561e-09,
|
19356 |
+
"loss": 35.2006,
|
19357 |
+
"step": 2764
|
19358 |
+
},
|
19359 |
+
{
|
19360 |
+
"epoch": 0.9943360604153556,
|
19361 |
+
"grad_norm": 6.875469207763672,
|
19362 |
+
"learning_rate": 7.72932367779089e-09,
|
19363 |
+
"loss": 35.0291,
|
19364 |
+
"step": 2765
|
19365 |
+
},
|
19366 |
+
{
|
19367 |
+
"epoch": 0.9946956756270791,
|
19368 |
+
"grad_norm": 6.812909126281738,
|
19369 |
+
"learning_rate": 6.7331220967359336e-09,
|
19370 |
+
"loss": 35.1033,
|
19371 |
+
"step": 2766
|
19372 |
+
},
|
19373 |
+
{
|
19374 |
+
"epoch": 0.9950552908388025,
|
19375 |
+
"grad_norm": 6.937972068786621,
|
19376 |
+
"learning_rate": 5.805618125159784e-09,
|
19377 |
+
"loss": 35.0225,
|
19378 |
+
"step": 2767
|
19379 |
+
},
|
19380 |
+
{
|
19381 |
+
"epoch": 0.995414906050526,
|
19382 |
+
"grad_norm": 6.8758111000061035,
|
19383 |
+
"learning_rate": 4.94681303757627e-09,
|
19384 |
+
"loss": 35.0932,
|
19385 |
+
"step": 2768
|
19386 |
+
},
|
19387 |
+
{
|
19388 |
+
"epoch": 0.9957745212622494,
|
19389 |
+
"grad_norm": 6.969333648681641,
|
19390 |
+
"learning_rate": 4.156708014096955e-09,
|
19391 |
+
"loss": 35.0095,
|
19392 |
+
"step": 2769
|
19393 |
+
},
|
19394 |
+
{
|
19395 |
+
"epoch": 0.9961341364739729,
|
19396 |
+
"grad_norm": 6.812990188598633,
|
19397 |
+
"learning_rate": 3.4353041404477927e-09,
|
19398 |
+
"loss": 35.0702,
|
19399 |
+
"step": 2770
|
19400 |
+
},
|
19401 |
+
{
|
19402 |
+
"epoch": 0.9964937516856963,
|
19403 |
+
"grad_norm": 6.8440117835998535,
|
19404 |
+
"learning_rate": 2.7826024079247172e-09,
|
19405 |
+
"loss": 35.0216,
|
19406 |
+
"step": 2771
|
19407 |
+
},
|
19408 |
+
{
|
19409 |
+
"epoch": 0.9968533668974198,
|
19410 |
+
"grad_norm": 6.812954902648926,
|
19411 |
+
"learning_rate": 2.198603713432501e-09,
|
19412 |
+
"loss": 35.0711,
|
19413 |
+
"step": 2772
|
19414 |
+
},
|
19415 |
+
{
|
19416 |
+
"epoch": 0.9972129821091432,
|
19417 |
+
"grad_norm": 6.7817559242248535,
|
19418 |
+
"learning_rate": 1.6833088594736535e-09,
|
19419 |
+
"loss": 35.0771,
|
19420 |
+
"step": 2773
|
19421 |
+
},
|
19422 |
+
{
|
19423 |
+
"epoch": 0.9975725973208667,
|
19424 |
+
"grad_norm": 6.969820022583008,
|
19425 |
+
"learning_rate": 1.236718554120664e-09,
|
19426 |
+
"loss": 34.9983,
|
19427 |
+
"step": 2774
|
19428 |
+
},
|
19429 |
+
{
|
19430 |
+
"epoch": 0.9979322125325901,
|
19431 |
+
"grad_norm": 7.004218101501465,
|
19432 |
+
"learning_rate": 8.588334110604112e-10,
|
19433 |
+
"loss": 35.0482,
|
19434 |
+
"step": 2775
|
19435 |
+
},
|
19436 |
+
{
|
19437 |
+
"epoch": 0.9982918277443136,
|
19438 |
+
"grad_norm": 6.938352584838867,
|
19439 |
+
"learning_rate": 5.496539495553065e-10,
|
19440 |
+
"loss": 35.0865,
|
19441 |
+
"step": 2776
|
19442 |
+
},
|
19443 |
+
{
|
19444 |
+
"epoch": 0.998651442956037,
|
19445 |
+
"grad_norm": 6.906884670257568,
|
19446 |
+
"learning_rate": 3.0918059445439464e-10,
|
19447 |
+
"loss": 35.0426,
|
19448 |
+
"step": 2777
|
19449 |
+
},
|
19450 |
+
{
|
19451 |
+
"epoch": 0.9990110581677605,
|
19452 |
+
"grad_norm": 6.906632900238037,
|
19453 |
+
"learning_rate": 1.3741367621555866e-10,
|
19454 |
+
"loss": 34.9973,
|
19455 |
+
"step": 2778
|
19456 |
+
},
|
19457 |
+
{
|
19458 |
+
"epoch": 0.9993706733794839,
|
19459 |
+
"grad_norm": 6.71948766708374,
|
19460 |
+
"learning_rate": 3.435343085556042e-11,
|
19461 |
+
"loss": 35.1191,
|
19462 |
+
"step": 2779
|
19463 |
+
},
|
19464 |
+
{
|
19465 |
+
"epoch": 0.9997302885912074,
|
19466 |
+
"grad_norm": 6.9692182540893555,
|
19467 |
+
"learning_rate": 0.0,
|
19468 |
+
"loss": 35.0035,
|
19469 |
+
"step": 2780
|
19470 |
}
|
19471 |
],
|
19472 |
"logging_steps": 1,
|
|
|
19481 |
"should_evaluate": false,
|
19482 |
"should_log": false,
|
19483 |
"should_save": true,
|
19484 |
+
"should_training_stop": true
|
19485 |
},
|
19486 |
"attributes": {}
|
19487 |
}
|
19488 |
},
|
19489 |
+
"total_flos": 5100266764369920.0,
|
19490 |
"train_batch_size": 4,
|
19491 |
"trial_name": null,
|
19492 |
"trial_params": null
|