wingo-dz commited on
Commit
16a6cd1
·
verified ·
1 Parent(s): 258f3fb

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - ar
4
+ - en
5
+ license: apache-2.0
6
+ base_model: Helsinki-NLP/opus-mt-ar-en
7
+ tags:
8
+ - generated_from_trainer
9
+ metrics:
10
+ - bleu
11
+ model-index:
12
+ - name: opus_2
13
+ results: []
14
+ ---
15
+
16
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
+ should probably proofread and complete it, then remove this comment. -->
18
+
19
+ # opus_2
20
+
21
+ This model is a fine-tuned version of [Helsinki-NLP/opus-mt-ar-en](https://huggingface.co/Helsinki-NLP/opus-mt-ar-en) on an unknown dataset.
22
+ It achieves the following results on the evaluation set:
23
+ - Loss: 0.0489
24
+ - Bleu: 94.8725
25
+ - Gen Len: 13.95
26
+
27
+ ## Model description
28
+
29
+ More information needed
30
+
31
+ ## Intended uses & limitations
32
+
33
+ More information needed
34
+
35
+ ## Training and evaluation data
36
+
37
+ More information needed
38
+
39
+ ## Training procedure
40
+
41
+ ### Training hyperparameters
42
+
43
+ The following hyperparameters were used during training:
44
+ - learning_rate: 5e-05
45
+ - train_batch_size: 4
46
+ - eval_batch_size: 4
47
+ - seed: 42
48
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
+ - lr_scheduler_type: linear
50
+ - num_epochs: 3.0
51
+
52
+ ### Training results
53
+
54
+
55
+
56
+ ### Framework versions
57
+
58
+ - Transformers 4.40.0.dev0
59
+ - Pytorch 2.2.1+cu121
60
+ - Datasets 2.18.0
61
+ - Tokenizers 0.15.2
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_bleu": 94.8725,
4
+ "eval_gen_len": 13.95,
5
+ "eval_loss": 0.04893746227025986,
6
+ "eval_runtime": 5.9575,
7
+ "eval_samples": 80,
8
+ "eval_samples_per_second": 13.428,
9
+ "eval_steps_per_second": 3.357,
10
+ "train_loss": 0.2005171725198215,
11
+ "train_runtime": 34.6596,
12
+ "train_samples": 627,
13
+ "train_samples_per_second": 54.271,
14
+ "train_steps_per_second": 13.589
15
+ }
config.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
- "_name_or_path": "Helsinki-NLP/opus-mt-en-ar",
 
3
  "activation_dropout": 0.0,
4
  "activation_function": "swish",
5
  "add_bias_logits": false,
@@ -10,7 +11,7 @@
10
  "attention_dropout": 0.0,
11
  "bad_words_ids": [
12
  [
13
- 62801
14
  ]
15
  ],
16
  "bos_token_id": 0,
@@ -21,15 +22,14 @@
21
  "decoder_ffn_dim": 2048,
22
  "decoder_layerdrop": 0.0,
23
  "decoder_layers": 6,
24
- "decoder_start_token_id": 62801,
25
- "decoder_vocab_size": 62802,
26
  "dropout": 0.1,
27
  "encoder_attention_heads": 8,
28
  "encoder_ffn_dim": 2048,
29
  "encoder_layerdrop": 0.0,
30
  "encoder_layers": 6,
31
  "eos_token_id": 0,
32
- "extra_pos_embeddings": 62802,
33
  "forced_eos_token_id": 0,
34
  "id2label": {
35
  "0": "LABEL_0",
@@ -50,12 +50,12 @@
50
  "normalize_embedding": false,
51
  "num_beams": 4,
52
  "num_hidden_layers": 6,
53
- "pad_token_id": 62801,
54
  "scale_embedding": true,
55
  "share_encoder_decoder_embeddings": true,
56
  "static_position_embeddings": true,
57
  "torch_dtype": "float32",
58
- "transformers_version": "4.39.0.dev0",
59
  "use_cache": true,
60
- "vocab_size": 62802
61
  }
 
1
  {
2
+ "_name_or_path": "Helsinki-NLP/opus-mt-ar-en",
3
+ "_num_labels": 3,
4
  "activation_dropout": 0.0,
5
  "activation_function": "swish",
6
  "add_bias_logits": false,
 
11
  "attention_dropout": 0.0,
12
  "bad_words_ids": [
13
  [
14
+ 62833
15
  ]
16
  ],
17
  "bos_token_id": 0,
 
22
  "decoder_ffn_dim": 2048,
23
  "decoder_layerdrop": 0.0,
24
  "decoder_layers": 6,
25
+ "decoder_start_token_id": 62833,
26
+ "decoder_vocab_size": 62834,
27
  "dropout": 0.1,
28
  "encoder_attention_heads": 8,
29
  "encoder_ffn_dim": 2048,
30
  "encoder_layerdrop": 0.0,
31
  "encoder_layers": 6,
32
  "eos_token_id": 0,
 
33
  "forced_eos_token_id": 0,
34
  "id2label": {
35
  "0": "LABEL_0",
 
50
  "normalize_embedding": false,
51
  "num_beams": 4,
52
  "num_hidden_layers": 6,
53
+ "pad_token_id": 62833,
54
  "scale_embedding": true,
55
  "share_encoder_decoder_embeddings": true,
56
  "static_position_embeddings": true,
57
  "torch_dtype": "float32",
58
+ "transformers_version": "4.40.0.dev0",
59
  "use_cache": true,
60
+ "vocab_size": 62834
61
  }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_bleu": 94.8725,
4
+ "eval_gen_len": 13.95,
5
+ "eval_loss": 0.04893746227025986,
6
+ "eval_runtime": 5.9575,
7
+ "eval_samples": 80,
8
+ "eval_samples_per_second": 13.428,
9
+ "eval_steps_per_second": 3.357
10
+ }
generation_config.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "bad_words_ids": [
3
  [
4
- 62801
5
  ]
6
  ],
7
  "bos_token_id": 0,
8
- "decoder_start_token_id": 62801,
9
  "eos_token_id": 0,
10
  "forced_eos_token_id": 0,
11
  "max_length": 512,
12
  "num_beams": 4,
13
- "pad_token_id": 62801,
14
  "renormalize_logits": true,
15
- "transformers_version": "4.39.0.dev0"
16
  }
 
1
  {
2
  "bad_words_ids": [
3
  [
4
+ 62833
5
  ]
6
  ],
7
  "bos_token_id": 0,
8
+ "decoder_start_token_id": 62833,
9
  "eos_token_id": 0,
10
  "forced_eos_token_id": 0,
11
  "max_length": 512,
12
  "num_beams": 4,
13
+ "pad_token_id": 62833,
14
  "renormalize_logits": true,
15
+ "transformers_version": "4.40.0.dev0"
16
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:876fdabd8de9b8a1552a39a5ab70a384fb6299b3fb723e27aae9d8bf5f5aaecc
3
- size 305452744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce5afd45fe23e5742c4ccff0cef4bfaa9940e5d0996d9a65ba4b2ff04552f216
3
+ size 305518408
runs/Mar24_11-48-02_999c6cbb45d4/events.out.tfevents.1711280895.999c6cbb45d4.1535.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bc4b9c03ac1ed08bbcd87e4eb57d3e264409bc40dd03a85cb5136f0b77863f8
3
+ size 5807
runs/Mar24_11-48-02_999c6cbb45d4/events.out.tfevents.1711280936.999c6cbb45d4.1535.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6caa06100a5d2ad216debb326372f15a594bc04038b40ebeb79fae47846c04c
3
+ size 458
source.spm CHANGED
Binary files a/source.spm and b/source.spm differ
 
target.spm CHANGED
Binary files a/target.spm and b/target.spm differ
 
tokenizer_config.json CHANGED
@@ -16,7 +16,7 @@
16
  "single_word": false,
17
  "special": true
18
  },
19
- "62801": {
20
  "content": "<pad>",
21
  "lstrip": false,
22
  "normalized": false,
@@ -30,9 +30,9 @@
30
  "model_max_length": 512,
31
  "pad_token": "<pad>",
32
  "separate_vocabs": false,
33
- "source_lang": "eng",
34
  "sp_model_kwargs": {},
35
- "target_lang": "ara",
36
  "tokenizer_class": "MarianTokenizer",
37
  "unk_token": "<unk>"
38
  }
 
16
  "single_word": false,
17
  "special": true
18
  },
19
+ "62833": {
20
  "content": "<pad>",
21
  "lstrip": false,
22
  "normalized": false,
 
30
  "model_max_length": 512,
31
  "pad_token": "<pad>",
32
  "separate_vocabs": false,
33
+ "source_lang": "ar",
34
  "sp_model_kwargs": {},
35
+ "target_lang": "en",
36
  "tokenizer_class": "MarianTokenizer",
37
  "unk_token": "<unk>"
38
  }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.2005171725198215,
4
+ "train_runtime": 34.6596,
5
+ "train_samples": 627,
6
+ "train_samples_per_second": 54.271,
7
+ "train_steps_per_second": 13.589
8
+ }
trainer_state.json CHANGED
@@ -1,27 +1,29 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.824858757062147,
5
  "eval_steps": 500,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 2.82,
13
- "grad_norm": 3.9365735054016113,
14
- "learning_rate": 2.919020715630885e-06,
15
- "loss": 0.1939,
16
- "step": 500
 
 
17
  }
18
  ],
19
  "logging_steps": 500,
20
- "max_steps": 531,
21
  "num_input_tokens_seen": 0,
22
  "num_train_epochs": 3,
23
  "save_steps": 500,
24
- "total_flos": 6335551438848.0,
25
  "train_batch_size": 4,
26
  "trial_name": null,
27
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 471,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 3.0,
13
+ "step": 471,
14
+ "total_flos": 7909441929216.0,
15
+ "train_loss": 0.2005171725198215,
16
+ "train_runtime": 34.6596,
17
+ "train_samples_per_second": 54.271,
18
+ "train_steps_per_second": 13.589
19
  }
20
  ],
21
  "logging_steps": 500,
22
+ "max_steps": 471,
23
  "num_input_tokens_seen": 0,
24
  "num_train_epochs": 3,
25
  "save_steps": 500,
26
+ "total_flos": 7909441929216.0,
27
  "train_batch_size": 4,
28
  "trial_name": null,
29
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da2d8a8c3cce0b00111924516d0009d42009475d9cc4facd034884be6d737922
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:357745389cc2e25c38c8fd420362968539e5493a2c89bb93ab767ccc444e9b64
3
  size 5048
vocab.json CHANGED
The diff for this file is too large to render. See raw diff