|
{ |
|
"metadata": { |
|
"ParamSize": 325, |
|
"ParamBytes": 4065166592.0, |
|
"BitsPerParam": 4.826164192631324 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 65952960, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_weight", |
|
"shape": [ |
|
32016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65952960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3e9f0d8f4906d14e7ebb1bfd6d76b1df" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 65952960, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_weight", |
|
"shape": [ |
|
32016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 65952960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f2788857360e466befebe30e5c7503e3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25313280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f46f09330ca90fabc7d9ac8afb05c109" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6a580f3f776072e15f2598ce5f0cb4ec" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cd631a9d2ef60ac84f454b41579da555" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25313280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "30737a8c65153e14512a94f6b0610238" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31832512, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_scale", |
|
"shape": [ |
|
32016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6595296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 6595296 |
|
}, |
|
{ |
|
"name": "lm_head.q_scale", |
|
"shape": [ |
|
32016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6595296, |
|
"byteOffset": 6603488 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 13198784 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 13206976 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 15738304 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 24176064 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 25019840 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 25028032 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 29563328 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 31824320 |
|
} |
|
], |
|
"md5sum": "5ca000c3ed8c29be30f888fcc68cb19d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "606750825d756ff72d308731d7999516" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ac04a3deadf9baebf017e8d75fa7ce35" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25313280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ead0e5361f59a0866644d3e56d289533" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d19d5560674d55c540cc730e5f935bcb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30446592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 2531328 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 10969088 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 11812864 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 11821056 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 16356352 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 18617344 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 18625536 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 21156864 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 29594624 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 30438400 |
|
} |
|
], |
|
"md5sum": "32efd138d9211984f5fd9ae8dec1ae11" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25313280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cdd1724d334b2a249f164ba6a661d640" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31945728, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 4535296 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 27145216 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 29406208 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 29414400 |
|
} |
|
], |
|
"md5sum": "42a816c729a3cd6b1d4fe8f22b9b3181" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "332329a61a6a6a9616ebb3ddf2d8ef24" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "563d28dda288cb5aab62425bcaebaf1b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25313280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a39447b95c746ca915301ba628c967de" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c2cd37e6497384a1ce1c2588840cefe7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4cdc8b60338e88a8b07d92123afc283f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32450560, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 8437760 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 13825024 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 16086016 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 16094208 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 18625536 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 27063296 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27907072 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 27915264 |
|
} |
|
], |
|
"md5sum": "ed7474fb35769d0ee9a2bb46dc50479f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30113792, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 2260992 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 2269184 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 27582464 |
|
} |
|
], |
|
"md5sum": "d8f0abf86bc7a2a530808e0051ca16c9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a9ce8f115ffbb0644f0e3183ad53914e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c3df7675caf4c3e5410a7fb78ea0057c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25313280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "845a0a8137a01afb3d52c73e83d292d2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "511c318725f0b294441e80a687cd5a2f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4bfe587d4632d4a904fb8f6764a23e3e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32450560, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 8437760 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 13825024 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 16086016 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 16094208 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 18625536 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 27063296 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27907072 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 27915264 |
|
} |
|
], |
|
"md5sum": "0bed92fdf24bafa29e49f2e690bed37f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30113792, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 2260992 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 2269184 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 27582464 |
|
} |
|
], |
|
"md5sum": "685e87e1c511185f404d135910914b84" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d76b1a3017657f506ac390f0f49c637b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c888eb31a05cb69e2579c02f64d8585d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25313280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aaaa271ed5bdbf69204752bfcae23977" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f24031b8a6b6ceb9f76e0e306a3f8fe4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "467eae9a5071ead0928785399b8cbb05" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32450560, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 8437760 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 13825024 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 16086016 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 16094208 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 18625536 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 27063296 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27907072 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 27915264 |
|
} |
|
], |
|
"md5sum": "2cdbba1aab18b69e3650b3c55218ad3b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30113792, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 2260992 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 2269184 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 27582464 |
|
} |
|
], |
|
"md5sum": "c24932f73f3c31c359a1965bfa559fd0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "881916c8a66dbf48f280fc0d813a16d3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a4b193c0c04327ba6006c83b9c7d1b79" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25313280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3098bd5ac1aa7fcd27241e6c72aaecc2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fccef87bcf00e5b8cb405dcd4ff0269f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c9e97f57ac8fa6f090c1fd5d266e8001" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32450560, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 8437760 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 13825024 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 16086016 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 16094208 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 18625536 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 27063296 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27907072 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 27915264 |
|
} |
|
], |
|
"md5sum": "6ee20a846c1ab013c924c3cd964ff8f0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30113792, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 2260992 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 2269184 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 27582464 |
|
} |
|
], |
|
"md5sum": "707cd6a398406ecbce8d25b8ecc69e50" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ed6dd61a68ffe170b4cf4e0d9c2a2aa5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3f0c5c25ec4b342850a412d8ce46d05a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25313280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aa6faa03725f38e4f10f0f96b9b57c18" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "626cac8dfa60cf44e5ccf72957825529" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9f9dd7b7779bf19271cc5db9ae44dd0c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32450560, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 8437760 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 13825024 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 16086016 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 16094208 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 18625536 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 27063296 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27907072 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 27915264 |
|
} |
|
], |
|
"md5sum": "ce9815749c9b25bd426c46a28ed350f2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30113792, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 2260992 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 2269184 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 27582464 |
|
} |
|
], |
|
"md5sum": "87ac07c34bf90b3dba866889430a3d49" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9e4b93cc276213a32ac8202d8a47b455" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4d8fe0cace19f51b5a1f4bbbd97348be" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25313280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f9b19cf4bfbfdf144e96e4cd0290d608" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "79c63f45e22636328f858a7eb1c2ad27" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bb7d0e6a7d61cd974ca51657252ca429" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32450560, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 8437760 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 13825024 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 16086016 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 16094208 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 18625536 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 27063296 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27907072 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 27915264 |
|
} |
|
], |
|
"md5sum": "91b5488d429c1369ab6d3a422ef1a203" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30113792, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 2260992 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 2269184 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 27582464 |
|
} |
|
], |
|
"md5sum": "8a6029d0b01f05cb678b0a812b35c410" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "106a680297319126b035a6800296b3c2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a9fef6091b79ec19d70b4d7d139c735b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25313280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8bad3d061476a9e9e09b798272e240e4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d1b42943faa91b04c12dad5b5729221c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f2db0a8e54fee87a80d729f6e4aecb89" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32450560, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 8437760 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 13825024 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 16086016 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 16094208 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 18625536 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 27063296 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27907072 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 27915264 |
|
} |
|
], |
|
"md5sum": "9ec1bb71ef7b4df5c3c1fef82c618930" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30113792, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 2260992 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 2269184 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 27582464 |
|
} |
|
], |
|
"md5sum": "8d67be05359f6c4f110f937a2f7d3dec" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a2fd9103fbd32a01030ceb3a49949f1d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d0e6e23e49c0dfa46a7ac9df23f407b1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25313280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5fa4f8a27b07319b8cdfe6fb04005f06" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "af7660dc3fa83e0b387965ddfbe3b296" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3e3de0d62c36bd1c62e02d6932490b7a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32450560, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 8437760 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 13825024 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 16086016 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 16094208 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 18625536 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 27063296 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27907072 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 27915264 |
|
} |
|
], |
|
"md5sum": "c875e5605a68eeb8202ec84b474b8b76" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30113792, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 2260992 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 2269184 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 27582464 |
|
} |
|
], |
|
"md5sum": "9e83d14108237d02c1e6e9a651a0320d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a87ac6470eec8259de9b63222fa868e9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dac0f9fe3b6fd6ab74f8400072aceff9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25313280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5290e4371521d59549739c6cf585c18c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "afabb12f1d30bd920dd20de09148d9bd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3457c563ce43c660d40d7fe7c800dc97" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32450560, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 8437760 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 13825024 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 16086016 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 16094208 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 18625536 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 27063296 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27907072 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 27915264 |
|
} |
|
], |
|
"md5sum": "4ff3d7c21e3500017de37a2d39dd8d05" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30113792, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 2260992 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 2269184 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 27582464 |
|
} |
|
], |
|
"md5sum": "d4e3ea7d14cd24a8d4dd8d7e28757337" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "76716aa722eb9e6b08ddd8e5d36e206f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6aac28eff8708efb49de2fad6fe89cfd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25313280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "abe29f9b083dacb5754484b6d678a934" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "23c404a556c2473da372fa5f68f38963" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "964d1a5ec4eb0f413b4900deb9abd033" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32450560, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 8437760 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 13825024 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 16086016 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 16094208 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 18625536 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 27063296 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27907072 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 27915264 |
|
} |
|
], |
|
"md5sum": "31d54601cfcbb93fb846a1313e916637" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30113792, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 2260992 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 2269184 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 27582464 |
|
} |
|
], |
|
"md5sum": "d44cf9a33948c64f149e1099ca55d0ae" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f2aba2742b0641f096672ab90e0b76dd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "36ff7a8c6fc0c110e8c178e1f91f64ee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25313280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5630a8f833cebb2bcb9624fabd420b66" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aa3b1f34acbeedaf13b646b10b2c9839" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1f6c6a1520018b5266af675aa0d2f5d3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32450560, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 8437760 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 13825024 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 16086016 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 16094208 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 18625536 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 27063296 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27907072 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 27915264 |
|
} |
|
], |
|
"md5sum": "701533d620d7e4ad2d51fd0ff204a3c8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30113792, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 2260992 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 2269184 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 27582464 |
|
} |
|
], |
|
"md5sum": "1db408bc08b30fc1128107cd0b9318ae" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f99426c5f5c1cd3fd6373572d86b5185" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d10d70cddd812838bd2a745ac8cb7dea" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25313280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dbe197173042fdeb2065b759c37bb8f4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7558277f67d83639fc9c74be15c770b0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "43a9026b68611eab9cb879fef9e1867c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32450560, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 8437760 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 13825024 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 16086016 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 16094208 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 18625536 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 27063296 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27907072 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 27915264 |
|
} |
|
], |
|
"md5sum": "c7ca1f5a2d73ab6e1625f5eef3343c65" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30113792, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 2260992 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 2269184 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 27582464 |
|
} |
|
], |
|
"md5sum": "24605f23111849e6e8c4736db8e2e5de" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dbd7542eb8f91d407f2a2d497a905a26" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c3b2a8508fe9c98f2951a84075ec209f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25313280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "42c8b9d9a3523d664b9e082f521b3f12" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aa6d57da91f815617026fdc4c748f565" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "97ce29229ccca74ae0795d66b563454f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32450560, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 8437760 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 13825024 |
|
}, |
|
{ |
|
"name": "model.layers.28.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 16086016 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 16094208 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 18625536 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 27063296 |
|
}, |
|
{ |
|
"name": "model.layers.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27907072 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 27915264 |
|
} |
|
], |
|
"md5sum": "3653d4594a72e58d421d30fb8d16521f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30113792, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 2260992 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 2269184 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 27582464 |
|
} |
|
], |
|
"md5sum": "86a2da0e12fc7e468cf584f9c5fc429f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cdef0b5a701930bad74cd983b72e29d1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9c32e88d29c77796772eeb389147db30" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25313280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5b8acff3a08611ca314507c5cba3ffc0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6d3dba68c6d157b6b2a7acae8c1b26e8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f9a68afaeae71fdfa1c6f7d25dcd17fb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32450560, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 8437760 |
|
}, |
|
{ |
|
"name": "model.layers.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 13825024 |
|
}, |
|
{ |
|
"name": "model.layers.30.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 16086016 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 16094208 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 18625536 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 27063296 |
|
}, |
|
{ |
|
"name": "model.layers.30.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 27907072 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 27915264 |
|
} |
|
], |
|
"md5sum": "b8589adf09fef20aac0fffd8d26a499d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30113792, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.31.input_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 2260992 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
12288, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25313280, |
|
"byteOffset": 2269184 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
12288, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2531328, |
|
"byteOffset": 27582464 |
|
} |
|
], |
|
"md5sum": "b7e45faffbb62a347d7b1dde5bf9bcfd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 45352960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
22016, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 45352960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "93e9c8b4f4d113e12eeb0496a4d7e16a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22609920, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
1380 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22609920, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4b25dd42c0e625baca5a046c22bb4c66" |
|
}, |
|
{ |
|
"dataPath": "params_shard_114.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16086016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
515 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8437760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 843776, |
|
"byteOffset": 8437760 |
|
}, |
|
{ |
|
"name": "model.layers.31.post_attention_layernorm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 8192, |
|
"byteOffset": 9281536 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
22016, |
|
103 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 4535296, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
276 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2260992, |
|
"byteOffset": 13825024 |
|
} |
|
], |
|
"md5sum": "1daed501e1ea0213bcc63eb47097a986" |
|
} |
|
] |
|
} |