XAgentLlama-7B-preview-q4f32_1-MLC / ndarray-cache-b16.json
gatepoet's picture
Initial commit
8b95013 verified
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 4065166592.0,
"BitsPerParam": 4.826164192631324
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 65952960,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65952960,
"byteOffset": 0
}
],
"md5sum": "3e9f0d8f4906d14e7ebb1bfd6d76b1df"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 65952960,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65952960,
"byteOffset": 0
}
],
"md5sum": "f2788857360e466befebe30e5c7503e3"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 25313280,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 0
}
],
"md5sum": "f46f09330ca90fabc7d9ac8afb05c109"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "6a580f3f776072e15f2598ce5f0cb4ec"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "cd631a9d2ef60ac84f454b41579da555"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 25313280,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 0
}
],
"md5sum": "30737a8c65153e14512a94f6b0610238"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 31832512,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
32016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6595296,
"byteOffset": 0
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 6595296
},
{
"name": "lm_head.q_scale",
"shape": [
32016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6595296,
"byteOffset": 6603488
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 13198784
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 13206976
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 15738304
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 24176064
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 25019840
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 25028032
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 29563328
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 31824320
}
],
"md5sum": "5ca000c3ed8c29be30f888fcc68cb19d"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "606750825d756ff72d308731d7999516"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "ac04a3deadf9baebf017e8d75fa7ce35"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 25313280,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 0
}
],
"md5sum": "ead0e5361f59a0866644d3e56d289533"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "d19d5560674d55c540cc730e5f935bcb"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 30446592,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 2531328
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 10969088
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 11812864
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 11821056
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 16356352
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 18617344
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 18625536
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 21156864
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 29594624
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 30438400
}
],
"md5sum": "32efd138d9211984f5fd9ae8dec1ae11"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 25313280,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 0
}
],
"md5sum": "cdd1724d334b2a249f164ba6a661d640"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 31945728,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 4535296
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 27145216
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 29406208
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 29414400
}
],
"md5sum": "42a816c729a3cd6b1d4fe8f22b9b3181"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "332329a61a6a6a9616ebb3ddf2d8ef24"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "563d28dda288cb5aab62425bcaebaf1b"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 25313280,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 0
}
],
"md5sum": "a39447b95c746ca915301ba628c967de"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "c2cd37e6497384a1ce1c2588840cefe7"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "4cdc8b60338e88a8b07d92123afc283f"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 32450560,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 8437760
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9281536
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 9289728
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 13825024
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 16086016
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 16094208
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 18625536
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 27063296
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27907072
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27915264
}
],
"md5sum": "ed7474fb35769d0ee9a2bb46dc50479f"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 30113792,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 0
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 2260992
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 2269184
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 27582464
}
],
"md5sum": "d8f0abf86bc7a2a530808e0051ca16c9"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "a9ce8f115ffbb0644f0e3183ad53914e"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "c3df7675caf4c3e5410a7fb78ea0057c"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 25313280,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 0
}
],
"md5sum": "845a0a8137a01afb3d52c73e83d292d2"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "511c318725f0b294441e80a687cd5a2f"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "4bfe587d4632d4a904fb8f6764a23e3e"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 32450560,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 8437760
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9281536
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 9289728
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 13825024
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 16086016
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 16094208
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 18625536
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 27063296
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27907072
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27915264
}
],
"md5sum": "0bed92fdf24bafa29e49f2e690bed37f"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 30113792,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 0
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 2260992
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 2269184
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 27582464
}
],
"md5sum": "685e87e1c511185f404d135910914b84"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "d76b1a3017657f506ac390f0f49c637b"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "c888eb31a05cb69e2579c02f64d8585d"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 25313280,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 0
}
],
"md5sum": "aaaa271ed5bdbf69204752bfcae23977"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "f24031b8a6b6ceb9f76e0e306a3f8fe4"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "467eae9a5071ead0928785399b8cbb05"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 32450560,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 8437760
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9281536
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 9289728
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 13825024
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 16086016
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 16094208
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 18625536
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 27063296
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27907072
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27915264
}
],
"md5sum": "2cdbba1aab18b69e3650b3c55218ad3b"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 30113792,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 0
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 2260992
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 2269184
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 27582464
}
],
"md5sum": "c24932f73f3c31c359a1965bfa559fd0"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "881916c8a66dbf48f280fc0d813a16d3"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "a4b193c0c04327ba6006c83b9c7d1b79"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 25313280,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 0
}
],
"md5sum": "3098bd5ac1aa7fcd27241e6c72aaecc2"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "fccef87bcf00e5b8cb405dcd4ff0269f"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "c9e97f57ac8fa6f090c1fd5d266e8001"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 32450560,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 8437760
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9281536
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 9289728
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 13825024
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 16086016
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 16094208
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 18625536
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 27063296
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27907072
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27915264
}
],
"md5sum": "6ee20a846c1ab013c924c3cd964ff8f0"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 30113792,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 0
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 2260992
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 2269184
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 27582464
}
],
"md5sum": "707cd6a398406ecbce8d25b8ecc69e50"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "ed6dd61a68ffe170b4cf4e0d9c2a2aa5"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "3f0c5c25ec4b342850a412d8ce46d05a"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 25313280,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 0
}
],
"md5sum": "aa6faa03725f38e4f10f0f96b9b57c18"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "626cac8dfa60cf44e5ccf72957825529"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "9f9dd7b7779bf19271cc5db9ae44dd0c"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 32450560,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 8437760
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9281536
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 9289728
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 13825024
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 16086016
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 16094208
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 18625536
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 27063296
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27907072
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27915264
}
],
"md5sum": "ce9815749c9b25bd426c46a28ed350f2"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 30113792,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 0
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 2260992
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 2269184
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 27582464
}
],
"md5sum": "87ac07c34bf90b3dba866889430a3d49"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "9e4b93cc276213a32ac8202d8a47b455"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "4d8fe0cace19f51b5a1f4bbbd97348be"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 25313280,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 0
}
],
"md5sum": "f9b19cf4bfbfdf144e96e4cd0290d608"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "79c63f45e22636328f858a7eb1c2ad27"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "bb7d0e6a7d61cd974ca51657252ca429"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 32450560,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 8437760
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9281536
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 9289728
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 13825024
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 16086016
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 16094208
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 18625536
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 27063296
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27907072
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27915264
}
],
"md5sum": "91b5488d429c1369ab6d3a422ef1a203"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 30113792,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 0
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 2260992
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 2269184
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 27582464
}
],
"md5sum": "8a6029d0b01f05cb678b0a812b35c410"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "106a680297319126b035a6800296b3c2"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "a9fef6091b79ec19d70b4d7d139c735b"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 25313280,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 0
}
],
"md5sum": "8bad3d061476a9e9e09b798272e240e4"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "d1b42943faa91b04c12dad5b5729221c"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "f2db0a8e54fee87a80d729f6e4aecb89"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 32450560,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 8437760
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9281536
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 9289728
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 13825024
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 16086016
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 16094208
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 18625536
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 27063296
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27907072
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27915264
}
],
"md5sum": "9ec1bb71ef7b4df5c3c1fef82c618930"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 30113792,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 0
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 2260992
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 2269184
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 27582464
}
],
"md5sum": "8d67be05359f6c4f110f937a2f7d3dec"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "a2fd9103fbd32a01030ceb3a49949f1d"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "d0e6e23e49c0dfa46a7ac9df23f407b1"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 25313280,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 0
}
],
"md5sum": "5fa4f8a27b07319b8cdfe6fb04005f06"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "af7660dc3fa83e0b387965ddfbe3b296"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "3e3de0d62c36bd1c62e02d6932490b7a"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 32450560,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 8437760
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9281536
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 9289728
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 13825024
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 16086016
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 16094208
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 18625536
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 27063296
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27907072
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27915264
}
],
"md5sum": "c875e5605a68eeb8202ec84b474b8b76"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 30113792,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 0
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 2260992
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 2269184
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 27582464
}
],
"md5sum": "9e83d14108237d02c1e6e9a651a0320d"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "a87ac6470eec8259de9b63222fa868e9"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "dac0f9fe3b6fd6ab74f8400072aceff9"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 25313280,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 0
}
],
"md5sum": "5290e4371521d59549739c6cf585c18c"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "afabb12f1d30bd920dd20de09148d9bd"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "3457c563ce43c660d40d7fe7c800dc97"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 32450560,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 8437760
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9281536
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 9289728
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 13825024
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 16086016
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 16094208
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 18625536
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 27063296
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27907072
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27915264
}
],
"md5sum": "4ff3d7c21e3500017de37a2d39dd8d05"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 30113792,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 0
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 2260992
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 2269184
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 27582464
}
],
"md5sum": "d4e3ea7d14cd24a8d4dd8d7e28757337"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "76716aa722eb9e6b08ddd8e5d36e206f"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "6aac28eff8708efb49de2fad6fe89cfd"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 25313280,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 0
}
],
"md5sum": "abe29f9b083dacb5754484b6d678a934"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "23c404a556c2473da372fa5f68f38963"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "964d1a5ec4eb0f413b4900deb9abd033"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 32450560,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 8437760
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9281536
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 9289728
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 13825024
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 16086016
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 16094208
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 18625536
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 27063296
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27907072
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27915264
}
],
"md5sum": "31d54601cfcbb93fb846a1313e916637"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 30113792,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 0
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 2260992
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 2269184
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 27582464
}
],
"md5sum": "d44cf9a33948c64f149e1099ca55d0ae"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "f2aba2742b0641f096672ab90e0b76dd"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "36ff7a8c6fc0c110e8c178e1f91f64ee"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 25313280,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 0
}
],
"md5sum": "5630a8f833cebb2bcb9624fabd420b66"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "aa3b1f34acbeedaf13b646b10b2c9839"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "1f6c6a1520018b5266af675aa0d2f5d3"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 32450560,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 8437760
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9281536
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 9289728
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 13825024
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 16086016
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 16094208
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 18625536
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 27063296
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27907072
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27915264
}
],
"md5sum": "701533d620d7e4ad2d51fd0ff204a3c8"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 30113792,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 0
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 2260992
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 2269184
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 27582464
}
],
"md5sum": "1db408bc08b30fc1128107cd0b9318ae"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "f99426c5f5c1cd3fd6373572d86b5185"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "d10d70cddd812838bd2a745ac8cb7dea"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 25313280,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 0
}
],
"md5sum": "dbe197173042fdeb2065b759c37bb8f4"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "7558277f67d83639fc9c74be15c770b0"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "43a9026b68611eab9cb879fef9e1867c"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 32450560,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 8437760
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9281536
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 9289728
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 13825024
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 16086016
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 16094208
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 18625536
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 27063296
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27907072
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27915264
}
],
"md5sum": "c7ca1f5a2d73ab6e1625f5eef3343c65"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 30113792,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 0
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 2260992
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 2269184
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 27582464
}
],
"md5sum": "24605f23111849e6e8c4736db8e2e5de"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "dbd7542eb8f91d407f2a2d497a905a26"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "c3b2a8508fe9c98f2951a84075ec209f"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 25313280,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 0
}
],
"md5sum": "42c8b9d9a3523d664b9e082f521b3f12"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "aa6d57da91f815617026fdc4c748f565"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "97ce29229ccca74ae0795d66b563454f"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 32450560,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 8437760
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9281536
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 9289728
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 13825024
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 16086016
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 16094208
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 18625536
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 27063296
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27907072
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27915264
}
],
"md5sum": "3653d4594a72e58d421d30fb8d16521f"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 30113792,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 0
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 2260992
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 2269184
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 27582464
}
],
"md5sum": "86a2da0e12fc7e468cf584f9c5fc429f"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "cdef0b5a701930bad74cd983b72e29d1"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "9c32e88d29c77796772eeb389147db30"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 25313280,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 0
}
],
"md5sum": "5b8acff3a08611ca314507c5cba3ffc0"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "6d3dba68c6d157b6b2a7acae8c1b26e8"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "f9a68afaeae71fdfa1c6f7d25dcd17fb"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 32450560,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 8437760
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9281536
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 9289728
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 13825024
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 16086016
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 16094208
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 18625536
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 27063296
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 27907072
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27915264
}
],
"md5sum": "b8589adf09fef20aac0fffd8d26a499d"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 30113792,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 0
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 2260992
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
12288,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25313280,
"byteOffset": 2269184
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 27582464
}
],
"md5sum": "b7e45faffbb62a347d7b1dde5bf9bcfd"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 45352960,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
22016,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45352960,
"byteOffset": 0
}
],
"md5sum": "93e9c8b4f4d113e12eeb0496a4d7e16a"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 22609920,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1380
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22609920,
"byteOffset": 0
}
],
"md5sum": "4b25dd42c0e625baca5a046c22bb4c66"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 16086016,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
515
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8437760,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 8437760
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 9281536
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 9289728
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 13825024
}
],
"md5sum": "1daed501e1ea0213bcc63eb47097a986"
}
]
}