keremturgutlu commited on
Commit
95e7187
1 Parent(s): 7b1f25b

Upload folder using huggingface_hub

Browse files
Files changed (32) hide show
  1. vllm_gemlite_fp16/config.json +1 -0
  2. vllm_gemlite_fp16/model-00001-of-00030.safetensors +3 -0
  3. vllm_gemlite_fp16/model-00002-of-00030.safetensors +3 -0
  4. vllm_gemlite_fp16/model-00003-of-00030.safetensors +3 -0
  5. vllm_gemlite_fp16/model-00004-of-00030.safetensors +3 -0
  6. vllm_gemlite_fp16/model-00005-of-00030.safetensors +3 -0
  7. vllm_gemlite_fp16/model-00006-of-00030.safetensors +3 -0
  8. vllm_gemlite_fp16/model-00007-of-00030.safetensors +3 -0
  9. vllm_gemlite_fp16/model-00008-of-00030.safetensors +3 -0
  10. vllm_gemlite_fp16/model-00009-of-00030.safetensors +3 -0
  11. vllm_gemlite_fp16/model-00010-of-00030.safetensors +3 -0
  12. vllm_gemlite_fp16/model-00011-of-00030.safetensors +3 -0
  13. vllm_gemlite_fp16/model-00012-of-00030.safetensors +3 -0
  14. vllm_gemlite_fp16/model-00013-of-00030.safetensors +3 -0
  15. vllm_gemlite_fp16/model-00014-of-00030.safetensors +3 -0
  16. vllm_gemlite_fp16/model-00015-of-00030.safetensors +3 -0
  17. vllm_gemlite_fp16/model-00016-of-00030.safetensors +3 -0
  18. vllm_gemlite_fp16/model-00017-of-00030.safetensors +3 -0
  19. vllm_gemlite_fp16/model-00018-of-00030.safetensors +3 -0
  20. vllm_gemlite_fp16/model-00019-of-00030.safetensors +3 -0
  21. vllm_gemlite_fp16/model-00020-of-00030.safetensors +3 -0
  22. vllm_gemlite_fp16/model-00021-of-00030.safetensors +3 -0
  23. vllm_gemlite_fp16/model-00022-of-00030.safetensors +3 -0
  24. vllm_gemlite_fp16/model-00023-of-00030.safetensors +3 -0
  25. vllm_gemlite_fp16/model-00024-of-00030.safetensors +3 -0
  26. vllm_gemlite_fp16/model-00025-of-00030.safetensors +3 -0
  27. vllm_gemlite_fp16/model-00026-of-00030.safetensors +3 -0
  28. vllm_gemlite_fp16/model-00027-of-00030.safetensors +3 -0
  29. vllm_gemlite_fp16/model-00028-of-00030.safetensors +3 -0
  30. vllm_gemlite_fp16/model-00029-of-00030.safetensors +3 -0
  31. vllm_gemlite_fp16/model-00030-of-00030.safetensors +3 -0
  32. vllm_gemlite_fp16/quantize_config.json +1 -0
vllm_gemlite_fp16/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"vocab_size": 128256, "max_position_embeddings": 131072, "hidden_size": 8192, "intermediate_size": 28672, "num_hidden_layers": 80, "num_attention_heads": 64, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 500000.0, "rope_scaling": {"factor": 8.0, "low_freq_factor": 1.0, "high_freq_factor": 4.0, "original_max_position_embeddings": 8192, "rope_type": "llama3"}, "attention_bias": false, "attention_dropout": 0.0, "mlp_bias": false, "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": "bfloat16", "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": false, "chunk_size_feed_forward": 0, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": ["LlamaForCausalLM"], "finetuning_task": null, "id2label": {"0": "LABEL_0", "1": "LABEL_1"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1}, "tokenizer_class": null, "prefix": null, "bos_token_id": 128000, "pad_token_id": null, "eos_token_id": [128001, 128008, 128009], "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": null, "problem_type": null, "_name_or_path": "meta-llama/Meta-Llama-3.1-70B-Instruct", "transformers_version": "4.44.2", "model_type": "llama"}
vllm_gemlite_fp16/model-00001-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18f7442d62b4a487b590b7d51952bcb3330e0bd457107177b79db71ad95c3388
3
+ size 2756485456
vllm_gemlite_fp16/model-00002-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56f7eb909976c39807b9877e8192409ab4b0619a0f0f060a322053781f7d7496
3
+ size 1003259648
vllm_gemlite_fp16/model-00003-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7818939209d36a272d4bd0ff3b2d27c1e8d18ee6521a08660bc917d574a70bd0
3
+ size 1058350864
vllm_gemlite_fp16/model-00004-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e9f57dfbbd598bbc29fd6f1af192fd2aae659bca36f4d9ff2439f633e43120a
3
+ size 1047073216
vllm_gemlite_fp16/model-00005-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be3f6dd230f5d421172feea00a1f73fb5ab811fb564f838aaff99775f8a332f5
3
+ size 1003267744
vllm_gemlite_fp16/model-00006-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f63f8bf82134c83d0634bd0bd2aa84283ff47b993861c21a2d36d0eb1e42f2b2
3
+ size 1150060496
vllm_gemlite_fp16/model-00007-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab50c3d72509de5d62b180fff081bf2c35d2b61113585da829745e9282662c07
3
+ size 1186760760
vllm_gemlite_fp16/model-00008-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b45a65bd1b65632b65f24cac9cafd4a4f7c1a67813f451ae06cdcaf8dd9e06e8
3
+ size 1168451512
vllm_gemlite_fp16/model-00009-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c0c43df81ed0075b46dfa67a9023759bf1b762ecee90cec19b9f65567f3386e
3
+ size 1267274464
vllm_gemlite_fp16/model-00010-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8d471643f63ed772f0d84c13ea22237e13cbfea44585ff8fb067b5749bad470
3
+ size 1076668096
vllm_gemlite_fp16/model-00011-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:432061a6447cc02fbb8f1116eaf0f759bc11938b65f0cb46d25dd1e769277fda
3
+ size 1076660144
vllm_gemlite_fp16/model-00012-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10fa4536b6b287c97481def59eddae6e6a1147800bd7209cbf57db68fda26ae9
3
+ size 1186760760
vllm_gemlite_fp16/model-00013-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8874e8ec2e20114030266372a41410a7ffe0af64690745f8fe0f632d6240a3f2
3
+ size 1168451512
vllm_gemlite_fp16/model-00014-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efdc1bdb68c3fe0055f9f352ea2ef1fe1205eaca704783adb456e538a3c24824
3
+ size 1047073328
vllm_gemlite_fp16/model-00015-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:577fe73fe18bbdcfa8083a55a59fbc978d19917010288ebcc262ead7bd75c660
3
+ size 1003267744
vllm_gemlite_fp16/model-00016-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db40123be429fc4961932be83742d3cf3508eec8cd4d6b6123ff5253f084bdc7
3
+ size 1003259792
vllm_gemlite_fp16/model-00017-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6df0f659820e6337dd7ac1cb351e74c4a6de4653b3feceb15e531f5aaac6ecf
3
+ size 1003259776
vllm_gemlite_fp16/model-00018-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:596899b4741d0113fecae2c6de243d19db851d83478e6b6188dd985c760c3558
3
+ size 1058350992
vllm_gemlite_fp16/model-00019-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:453da070a89a727eb08f805673edd7acc5a68697ee77e196585678c6163cfcea
3
+ size 1047073328
vllm_gemlite_fp16/model-00020-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e33af68bdc473e51ae2105e1f935da5d42a62a471ec457df244e3d6aa049188
3
+ size 1003267744
vllm_gemlite_fp16/model-00021-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1583d8c335922d2a0213294760ac6e3be0c12c81b245215e20d8db93757e8f4e
3
+ size 1113360328
vllm_gemlite_fp16/model-00022-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b38f06b861dac95cc4ebfe356d6449fe1dcde1083fcee7008a85ba6b54068cf8
3
+ size 1113360320
vllm_gemlite_fp16/model-00023-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e344bea605a60b009b300a13b7da61a3a13c9adc6be7ac5427c724d2c9feb4bb
3
+ size 1058350992
vllm_gemlite_fp16/model-00024-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67d5794205d58c53d6d5b58a4517a53f5e1183b55de2a7b3c30d9ff6c4ac15ae
3
+ size 1047073328
vllm_gemlite_fp16/model-00025-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91ddcfa4d21189a7dfdb322c27234908b6ce7574a5aaa6346e05ca16e8f768f2
3
+ size 1076668096
vllm_gemlite_fp16/model-00026-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54164e51ab1f2505a2903c63234a07ba6cb499ca89ef11d1d0c5c8cbaac938fe
3
+ size 1076660144
vllm_gemlite_fp16/model-00027-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ede65a7f1c243c0f5e1e0119b6d99951f23b45a873cbaeff98604b67494e97bf
3
+ size 1076660128
vllm_gemlite_fp16/model-00028-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:669be245bac815fe623a0dfd753b1c8f7221e1c5b4a09de1bd3f8b63153d8210
3
+ size 1058350992
vllm_gemlite_fp16/model-00029-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f731269c5d5e573fda1abe03e67dcd548df2bae7ff31794834872a131d141113
3
+ size 1157190320
vllm_gemlite_fp16/model-00030-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c845d7ccba30e2e0eb8ab20aa313d5ed45dca6634e214a62ed323b70462fa828
3
+ size 2101346432
vllm_gemlite_fp16/quantize_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"group_size": {"gate_up_proj": 32, "qkv_proj": 128, "o_proj": 128, "down_proj": 32}, "nbits": {"gate_up_proj": 2, "qkv_proj": 4, "o_proj": 4, "down_proj": 2}, "lora_rank": 64, "skipped_dora_layers": [], "block_influence_layers": ["layers.0", "layers.13", "layers.15", "layers.17", "layers.19", "layers.21", "layers.23", "layers.26", "layers.29", "layers.31", "layers.33", "layers.56", "layers.59", "layers.68", "layers.71", "layers.79"], "groupsize_4bit": 128, "bitblas_dtype": null}