finetune-NLLB-600M-on-opus100-Ar2En-with-Qlora

Browse files

Files changed (5) hide show

README.md +22 -4
adapter_config.json +2 -2
adapter_model.safetensors +1 -1
tokenizer_config.json +0 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -2,6 +2,9 @@
 base_model: facebook/nllb-200-distilled-600M
 library_name: peft
 license: cc-by-nc-4.0
 tags:
 - generated_from_trainer
 model-index:
@@ -12,11 +15,15 @@ model-index:
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/FinalProject_/NLLB_2/runs/4zuxh06b)
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/FinalProject_/NLLB_2/runs/4zuxh06b)
 # NLLB_QLoRA
 This model is a fine-tuned version of [facebook/nllb-200-distilled-600M](https://huggingface.co/facebook/nllb-200-distilled-600M) on an unknown dataset.
 ## Model description
@@ -36,13 +43,24 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 2e-05
-- train_batch_size: 1
-- eval_batch_size: 1
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - num_epochs: 3
 ### Framework versions
 - PEFT 0.12.0

 base_model: facebook/nllb-200-distilled-600M
 library_name: peft
 license: cc-by-nc-4.0
+metrics:
+- bleu
+- rouge
 tags:
 - generated_from_trainer
 model-index:
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/FinalProject_/NLLB/runs/li2er79u)
 # NLLB_QLoRA
 This model is a fine-tuned version of [facebook/nllb-200-distilled-600M](https://huggingface.co/facebook/nllb-200-distilled-600M) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.3340
+- Bleu: 31.5945
+- Rouge: 0.5906
+- Gen Len: 17.338
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 2e-05
+- train_batch_size: 2
+- eval_batch_size: 2
 - seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 8
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - num_epochs: 3
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Bleu    | Rouge  | Gen Len |
+|:-------------:|:-----:|:----:|:---------------:|:-------:|:------:|:-------:|
+| 2.858         | 1.0   | 875  | 1.4023          | 30.5493 | 0.5771 | 17.3705 |
+| 1.4649        | 2.0   | 1750 | 1.3447          | 31.343  | 0.5886 | 17.284  |
+| 1.4247        | 3.0   | 2625 | 1.3340          | 31.5945 | 0.5906 | 17.338  |
 ### Framework versions
 - PEFT 0.12.0

adapter_config.json CHANGED Viewed

@@ -20,8 +20,8 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "v_proj"
   ],
   "task_type": "SEQ_2_SEQ_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "v_proj",
+    "q_proj"
   ],
   "task_type": "SEQ_2_SEQ_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a26751dbfa8d885560b15be1cff4a1f08656bb2167688a39abf0562d9060ddd3
 size 4738744

 version https://git-lfs.github.com/spec/v1
+oid sha256:011351902334d673e7fe8b147f7d8ae89120a99995e71569b097f7019cdbb34a
 size 4738744

tokenizer_config.json CHANGED Viewed

@@ -1869,7 +1869,6 @@
   },
   "eos_token": "</s>",
   "legacy_behaviour": false,
-  "load_in_8bit": true,
   "mask_token": "<mask>",
   "model_max_length": 1024,
   "pad_token": "<pad>",

   },
   "eos_token": "</s>",
   "legacy_behaviour": false,
   "mask_token": "<mask>",
   "model_max_length": 1024,
   "pad_token": "<pad>",

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e790f46e5748df50ab36487d940ffc8dfc404426e5282d687489385e0967fb19
 size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:b94b51c0adeaef8dd9aa1b43760a5a2c72e0b5115b9a07da9847091ef443583a
 size 5304