pszemraj commited on
Commit
992bcdf
·
verified ·
1 Parent(s): 5c7e229

quantize per channel

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "pszemraj_nougat-base-onnx",
3
  "architectures": [
4
  "VisionEncoderDecoderModel"
5
  ],
@@ -21,7 +21,7 @@
21
  "decoder_attention_heads": 16,
22
  "decoder_ffn_dim": 4096,
23
  "decoder_layerdrop": 0.0,
24
- "decoder_layers": 10,
25
  "decoder_start_token_id": null,
26
  "diversity_penalty": 0.0,
27
  "do_sample": false,
@@ -50,7 +50,7 @@
50
  },
51
  "length_penalty": 1.0,
52
  "max_length": 20,
53
- "max_position_embeddings": 4096,
54
  "min_length": 0,
55
  "model_type": "mbart",
56
  "no_repeat_ngram_size": 0,
@@ -185,5 +185,5 @@
185
  "model_type": "vision-encoder-decoder",
186
  "tie_word_embeddings": false,
187
  "torch_dtype": "float32",
188
- "transformers_version": "4.35.2"
189
  }
 
1
  {
2
+ "_name_or_path": "pszemraj_nougat-small-onnx",
3
  "architectures": [
4
  "VisionEncoderDecoderModel"
5
  ],
 
21
  "decoder_attention_heads": 16,
22
  "decoder_ffn_dim": 4096,
23
  "decoder_layerdrop": 0.0,
24
+ "decoder_layers": 4,
25
  "decoder_start_token_id": null,
26
  "diversity_penalty": 0.0,
27
  "do_sample": false,
 
50
  },
51
  "length_penalty": 1.0,
52
  "max_length": 20,
53
+ "max_position_embeddings": 3584,
54
  "min_length": 0,
55
  "model_type": "mbart",
56
  "no_repeat_ngram_size": 0,
 
185
  "model_type": "vision-encoder-decoder",
186
  "tie_word_embeddings": false,
187
  "torch_dtype": "float32",
188
+ "transformers_version": "4.38.1"
189
  }
decoder_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fef9b60876d3101101fbbd076ddd51cb7d34ebf46756ed407f37cb5f19ed66d
3
- size 275952336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9c8f1bb7456441320245595f93fa052612b9f477b71f4822a46c070faf48e8d
3
+ size 174444105
decoder_with_past_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:828769fec141abee78ecd80c40fdc74876c409a179b080bca2dc52d62bd40ed5
3
- size 254784072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a68ddb99f8e476a6ba4e356145440de0fa70a5d2cff89c136feba9691f27c98
3
+ size 165923718
encoder_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e5a8b5be2bafd166b95f2bd0f387a457c8ba90a36d1c7bee422ea197d4fbae7
3
- size 81514091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bec906871770afe111f0f3474bd5cb143cfd7f659c242431ee5431be580684d
3
+ size 82052587
ort_config.json CHANGED
@@ -2,7 +2,7 @@
2
  "one_external_file": true,
3
  "opset": null,
4
  "optimization": {},
5
- "optimum_version": "1.14.1",
6
  "quantization": {
7
  "activations_dtype": "QUInt8",
8
  "activations_symmetric": false,
@@ -20,7 +20,7 @@
20
  "Transpose",
21
  "EmbedLayerNormalization"
22
  ],
23
- "per_channel": false,
24
  "qdq_add_pair_to_weight": false,
25
  "qdq_dedicated_pair": false,
26
  "qdq_op_type_per_channel_support_to_axis": {
@@ -30,6 +30,6 @@
30
  "weights_dtype": "QUInt8",
31
  "weights_symmetric": true
32
  },
33
- "transformers_version": "4.35.2",
34
  "use_external_data_format": false
35
  }
 
2
  "one_external_file": true,
3
  "opset": null,
4
  "optimization": {},
5
+ "optimum_version": "1.17.1",
6
  "quantization": {
7
  "activations_dtype": "QUInt8",
8
  "activations_symmetric": false,
 
20
  "Transpose",
21
  "EmbedLayerNormalization"
22
  ],
23
+ "per_channel": true,
24
  "qdq_add_pair_to_weight": false,
25
  "qdq_dedicated_pair": false,
26
  "qdq_op_type_per_channel_support_to_axis": {
 
30
  "weights_dtype": "QUInt8",
31
  "weights_symmetric": true
32
  },
33
+ "transformers_version": "4.38.1",
34
  "use_external_data_format": false
35
  }
tokenizer.json CHANGED
@@ -2,13 +2,13 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 4096,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
- "Fixed": 4096
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 3584,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
+ "Fixed": 3584
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
tokenizer_config.json CHANGED
@@ -189,7 +189,7 @@
189
  "bos_token": "<s>",
190
  "clean_up_tokenization_spaces": false,
191
  "eos_token": "</s>",
192
- "max_length": 4096,
193
  "model_max_length": 3584,
194
  "pad_to_multiple_of": null,
195
  "pad_token": "<pad>",
 
189
  "bos_token": "<s>",
190
  "clean_up_tokenization_spaces": false,
191
  "eos_token": "</s>",
192
+ "max_length": 3584,
193
  "model_max_length": 3584,
194
  "pad_to_multiple_of": null,
195
  "pad_token": "<pad>",