ank087 commited on
Commit
452267e
·
1 Parent(s): 326371f

phi-2-mental-health

Browse files
Files changed (2) hide show
  1. README.md +13 -0
  2. config.json +45 -0
README.md CHANGED
@@ -6,6 +6,7 @@ tags:
6
  model-index:
7
  - name: results
8
  results: []
 
9
  ---
10
 
11
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -29,6 +30,17 @@ More information needed
29
 
30
  ## Training procedure
31
 
 
 
 
 
 
 
 
 
 
 
 
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
@@ -49,6 +61,7 @@ The following hyperparameters were used during training:
49
 
50
  ### Framework versions
51
 
 
52
  - Transformers 4.31.0
53
  - Pytorch 2.1.0+cu118
54
  - Datasets 3.0.1
 
6
  model-index:
7
  - name: results
8
  results: []
9
+ library_name: peft
10
  ---
11
 
12
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
30
 
31
  ## Training procedure
32
 
33
+
34
+ The following `bitsandbytes` quantization config was used during training:
35
+ - load_in_8bit: False
36
+ - load_in_4bit: True
37
+ - llm_int8_threshold: 6.0
38
+ - llm_int8_skip_modules: None
39
+ - llm_int8_enable_fp32_cpu_offload: False
40
+ - llm_int8_has_fp16_weight: False
41
+ - bnb_4bit_quant_type: nf4
42
+ - bnb_4bit_use_double_quant: False
43
+ - bnb_4bit_compute_dtype: float16
44
  ### Training hyperparameters
45
 
46
  The following hyperparameters were used during training:
 
61
 
62
  ### Framework versions
63
 
64
+ - PEFT 0.4.0
65
  - Transformers 4.31.0
66
  - Pytorch 2.1.0+cu118
67
  - Datasets 3.0.1
config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/phi-2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "PhiForCausalLM"
6
+ ],
7
+ "attn_pdrop": 0.0,
8
+ "auto_map": {
9
+ "AutoConfig": "microsoft/phi-2--configuration_phi.PhiConfig",
10
+ "AutoModelForCausalLM": "microsoft/phi-2--modeling_phi.PhiForCausalLM"
11
+ },
12
+ "embd_pdrop": 0.0,
13
+ "flash_attn": true,
14
+ "flash_rotary": true,
15
+ "fused_dense": true,
16
+ "img_processor": null,
17
+ "initializer_range": 0.02,
18
+ "layer_norm_epsilon": 1e-05,
19
+ "model_type": "phi-msft",
20
+ "n_embd": 2560,
21
+ "n_head": 32,
22
+ "n_head_kv": null,
23
+ "n_inner": null,
24
+ "n_layer": 32,
25
+ "n_positions": 2048,
26
+ "pretraining_tp": 1,
27
+ "quantization_config": {
28
+ "bnb_4bit_compute_dtype": "float16",
29
+ "bnb_4bit_quant_type": "nf4",
30
+ "bnb_4bit_use_double_quant": false,
31
+ "llm_int8_enable_fp32_cpu_offload": false,
32
+ "llm_int8_has_fp16_weight": false,
33
+ "llm_int8_skip_modules": null,
34
+ "llm_int8_threshold": 6.0,
35
+ "load_in_4bit": true,
36
+ "load_in_8bit": false
37
+ },
38
+ "resid_pdrop": 0.1,
39
+ "rotary_dim": 32,
40
+ "tie_word_embeddings": false,
41
+ "torch_dtype": "float16",
42
+ "transformers_version": "4.31.0",
43
+ "use_cache": false,
44
+ "vocab_size": 51200
45
+ }