Bul11k commited on
Commit
8d7b7a4
·
verified ·
1 Parent(s): 8eacda4

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,8 @@ license: mit
3
  base_model: cointegrated/rubert-tiny
4
  tags:
5
  - generated_from_trainer
 
 
6
  model-index:
7
  - name: finetuned-tiny-bert_second_attempt
8
  results: []
@@ -14,6 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
14
  # finetuned-tiny-bert_second_attempt
15
 
16
  This model is a fine-tuned version of [cointegrated/rubert-tiny](https://huggingface.co/cointegrated/rubert-tiny) on an unknown dataset.
 
 
 
17
 
18
  ## Model description
19
 
 
3
  base_model: cointegrated/rubert-tiny
4
  tags:
5
  - generated_from_trainer
6
+ metrics:
7
+ - accuracy
8
  model-index:
9
  - name: finetuned-tiny-bert_second_attempt
10
  results: []
 
16
  # finetuned-tiny-bert_second_attempt
17
 
18
  This model is a fine-tuned version of [cointegrated/rubert-tiny](https://huggingface.co/cointegrated/rubert-tiny) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 2.2433
21
+ - Accuracy: 0.5666
22
 
23
  ## Model description
24
 
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_accuracy": 0.5167136043030395,
4
- "eval_loss": 2.5832159519195557,
5
- "eval_runtime": 62.4114,
6
  "eval_samples": 15263,
7
- "eval_samples_per_second": 244.555,
8
- "eval_steps_per_second": 7.643,
9
- "perplexity": 13.23964783936109,
10
- "train_loss": 3.04393067064311,
11
- "train_runtime": 388.0084,
12
  "train_samples": 35613,
13
- "train_samples_per_second": 91.784,
14
- "train_steps_per_second": 2.868
15
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.5665694910957602,
4
+ "eval_loss": 2.2432701587677,
5
+ "eval_runtime": 62.5137,
6
  "eval_samples": 15263,
7
+ "eval_samples_per_second": 244.155,
8
+ "eval_steps_per_second": 7.63,
9
+ "perplexity": 9.424099252756871,
10
+ "train_loss": 2.6927158644757037,
11
+ "train_runtime": 1168.1664,
12
  "train_samples": 35613,
13
+ "train_samples_per_second": 91.459,
14
+ "train_steps_per_second": 2.858
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_accuracy": 0.5167136043030395,
4
- "eval_loss": 2.5832159519195557,
5
- "eval_runtime": 62.4114,
6
  "eval_samples": 15263,
7
- "eval_samples_per_second": 244.555,
8
- "eval_steps_per_second": 7.643,
9
- "perplexity": 13.23964783936109
10
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.5665694910957602,
4
+ "eval_loss": 2.2432701587677,
5
+ "eval_runtime": 62.5137,
6
  "eval_samples": 15263,
7
+ "eval_samples_per_second": 244.155,
8
+ "eval_steps_per_second": 7.63,
9
+ "perplexity": 9.424099252756871
10
  }
runs/Feb25_11-49-43_e77a61bc3de0/events.out.tfevents.1708863022.e77a61bc3de0.27155.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d433545a77706cfba0591526c7e36429313a4a024a34d87675ad3714c3bde36f
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "train_loss": 3.04393067064311,
4
- "train_runtime": 388.0084,
5
  "train_samples": 35613,
6
- "train_samples_per_second": 91.784,
7
- "train_steps_per_second": 2.868
8
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "train_loss": 2.6927158644757037,
4
+ "train_runtime": 1168.1664,
5
  "train_samples": 35613,
6
+ "train_samples_per_second": 91.459,
7
+ "train_steps_per_second": 2.858
8
  }
trainer_state.json CHANGED
@@ -1,43 +1,71 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 1113,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.45,
13
- "grad_norm": 5.323787212371826,
14
- "learning_rate": 2.75381850853549e-05,
15
- "loss": 3.2259,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.9,
20
- "grad_norm": 5.575705528259277,
21
- "learning_rate": 5.0763701707097935e-06,
22
- "loss": 2.9056,
23
  "step": 1000
24
  },
25
  {
26
- "epoch": 1.0,
27
- "step": 1113,
28
- "total_flos": 159583189651080.0,
29
- "train_loss": 3.04393067064311,
30
- "train_runtime": 388.0084,
31
- "train_samples_per_second": 91.784,
32
- "train_steps_per_second": 2.868
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  }
34
  ],
35
  "logging_steps": 500,
36
- "max_steps": 1113,
37
  "num_input_tokens_seen": 0,
38
- "num_train_epochs": 1,
39
  "save_steps": 500,
40
- "total_flos": 159583189651080.0,
41
  "train_batch_size": 32,
42
  "trial_name": null,
43
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 3339,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.45,
13
+ "grad_norm": 5.29541015625,
14
+ "learning_rate": 4.251272836178497e-05,
15
+ "loss": 3.2051,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.9,
20
+ "grad_norm": 5.331775188446045,
21
+ "learning_rate": 3.502545672356993e-05,
22
+ "loss": 2.8264,
23
  "step": 1000
24
  },
25
  {
26
+ "epoch": 1.35,
27
+ "grad_norm": 6.496217250823975,
28
+ "learning_rate": 2.75381850853549e-05,
29
+ "loss": 2.6611,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 1.8,
34
+ "grad_norm": 5.267590522766113,
35
+ "learning_rate": 2.0050913447139864e-05,
36
+ "loss": 2.5819,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 2.25,
41
+ "grad_norm": 4.9951887130737305,
42
+ "learning_rate": 1.2563641808924828e-05,
43
+ "loss": 2.5289,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 2.7,
48
+ "grad_norm": 5.234454154968262,
49
+ "learning_rate": 5.0763701707097935e-06,
50
+ "loss": 2.5066,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 3.0,
55
+ "step": 3339,
56
+ "total_flos": 480295528237080.0,
57
+ "train_loss": 2.6927158644757037,
58
+ "train_runtime": 1168.1664,
59
+ "train_samples_per_second": 91.459,
60
+ "train_steps_per_second": 2.858
61
  }
62
  ],
63
  "logging_steps": 500,
64
+ "max_steps": 3339,
65
  "num_input_tokens_seen": 0,
66
+ "num_train_epochs": 3,
67
  "save_steps": 500,
68
+ "total_flos": 480295528237080.0,
69
  "train_batch_size": 32,
70
  "trial_name": null,
71
  "trial_params": null