Qwen3-4B-q4f16_1-MLC / ndarray-cache.json
riczhou's picture
Upload folder using huggingface_hub
8b83b01 verified
{
"metadata": {
"ParamSize": 435,
"ParamBytes": 2262920192.0,
"BitsPerParam": 4.500560626944995
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 194478080,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
151936,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 194478080,
"byteOffset": 0
}
],
"md5sum": "82277c94f44a3a7153639891038ecfb0"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 24314880,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
151936,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 24309760,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 24309760
}
],
"md5sum": "62997375dbe823e54515758ffc385c30"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "4a7e127455416e86f350fa1f0c2d8e72"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.0.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.0.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "29fd592dcfaceab58a4fc963499dad82"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "99f64762f8e041a8e35eccb933ab7768"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.1.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.1.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "e074e8a91ae6c13bf5b18a029e1ca0ef"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "7f1d26463cdfa648afcedb11110aea1c"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.10.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.10.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "6a3a20cc49bfd82295918a187e0b7081"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "c7f1aa652746c1e5b43fcd791c4229db"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.11.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.11.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "71115c1046d23c131a6841e2e72028f2"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "edf74eef64f29760519e7513a8388e7c"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.12.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.12.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "750afa2776206d3fa19f69078c4442cb"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "49fe9f1430285032069c36c30797c4da"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.13.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.13.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "9b726570ca8626afcf0a13e266231b0e"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "efdd6eda73a5841267ee33595e1d42c3"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "20f827e04f653d2b2b4f223c5b99c227"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 31872512,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.14.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.14.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
}
],
"md5sum": "da7392a57116d95474a40f44920b600d"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "f76c373fb865ef58db8d0a6fb03bb047"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 31872512,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 3112960
},
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3113216
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 10977536
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11960576
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 17203456
},
{
"name": "model.layers.15.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17858816
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17859072
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17864192
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 30316032
}
],
"md5sum": "0644d63be14f7953d9b1aa1bb02ea5f4"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "6079939593d71d90af338c0bc65f9081"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3112960
},
{
"name": "model.layers.2.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 3118080
},
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3118336
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 10982656
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11965696
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 17208576
},
{
"name": "model.layers.2.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17863936
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17864192
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17869312
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 30321152
}
],
"md5sum": "02933f7e4ca337cccb50f95f27d3d6dd"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "444704eb41944633c388985da1054e21"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3112960
},
{
"name": "model.layers.3.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 3118080
},
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3118336
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 10982656
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11965696
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 17208576
},
{
"name": "model.layers.3.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17863936
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17864192
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17869312
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 30321152
}
],
"md5sum": "e9881e43186b91fa8204ca1d7fb90cc4"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "a347e6c9c633653b28dde5c2ff5677e9"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3112960
},
{
"name": "model.layers.4.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 3118080
},
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3118336
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 10982656
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11965696
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 17208576
},
{
"name": "model.layers.4.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17863936
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17864192
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17869312
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 30321152
}
],
"md5sum": "c174a0abbe53d9bb7d879ce7989edf30"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "c82410e1decfe08ee014bccb4283b1eb"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3112960
},
{
"name": "model.layers.5.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 3118080
},
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3118336
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 10982656
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11965696
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 17208576
},
{
"name": "model.layers.5.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17863936
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17864192
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17869312
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 30321152
}
],
"md5sum": "b5a6b342123de80518c0da0cb9b80b22"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "1ded7b18d6c5ab237cbbacea92ca1532"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3112960
},
{
"name": "model.layers.6.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 3118080
},
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3118336
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 10982656
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11965696
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 17208576
},
{
"name": "model.layers.6.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17863936
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17864192
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17869312
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 30321152
}
],
"md5sum": "fa317d2de5ad997f2d791bbdfe7f6aae"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "2a10028adfc046ff918b68eca2eebcd6"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3112960
},
{
"name": "model.layers.7.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 3118080
},
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3118336
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 10982656
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11965696
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 17208576
},
{
"name": "model.layers.7.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17863936
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17864192
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17869312
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 30321152
}
],
"md5sum": "3759fbbfa618b45435436983c3921c2f"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "c9dbfda92afd0cd7c350572901579d88"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3112960
},
{
"name": "model.layers.8.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 3118080
},
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3118336
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 10982656
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11965696
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 17208576
},
{
"name": "model.layers.8.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17863936
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17864192
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17869312
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 30321152
}
],
"md5sum": "0c48958743c23477210ab4702ac072b1"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 31887872,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3112960
},
{
"name": "model.layers.9.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 3118080
},
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3118336
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 10982656
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11965696
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 17208576
},
{
"name": "model.layers.9.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17863936
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17864192
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17869312
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 30321152
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31877632
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31882752
}
],
"md5sum": "f3911b943c940947e616943dceb2d40d"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "1cbf527260733263d976a15d77bae615"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.16.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.16.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "9367c3e08c76cb15cc7d2b127bbf3157"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "3dba22d3bba03d66d0c20bcbb9719275"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.17.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.17.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "b7782a78746e8de2b57d5598b69a0a17"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "0cc0fd5454bfa4ece259c5c7c5b0c8c5"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.18.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.18.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "d43d719d59eb2928950d7dc034fcec80"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "ea4f07a62c5d0ececb7819ab2e07b35d"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.19.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.19.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "ee0874b665e23f40aabef09f83c6ab3b"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "cd29c01841e42daf299feb5646ae8e3e"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.20.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.20.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "33d31884dc210b4a77f0668218bfc182"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "ebafb35e880da1dd1a86bb28d0eaa634"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.21.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.21.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "6ca3c366e2e2cd4ebce50f43b0132395"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "d7b4e73816f305d3f9db3a01d235ab72"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.22.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.22.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "8ec4b34b6f9900941df28faa46276561"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "91ec6f1ec8233d7fb4215f398d7a4211"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.23.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.23.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "c1428b47f25e8ad0c9e6142702edc150"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "c0a5b2d46655cd1909514e144b02207c"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.24.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.24.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "7e9b2059b060cc16248e3c146ca74ecc"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "4d87636ad72aadb367b2e36191f4e4d7"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.25.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.25.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "6ac7dd6abfe8070032bad3b797b42974"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "afdb9619569950a49097607fb00f745c"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.26.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.26.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "563c951484371d06654d54adfd3ee9bf"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "63713ebf0922eaf8a6d11b06ec1468f7"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.27.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.27.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "a4ade44e169a6c8f1823e93d3dd60484"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "7c21e07ce47ab803cddeb3814b3a5f83"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.28.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.28.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.28.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.28.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "31b9d40a2343791cc159809937cd4d06"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "ca1bca7d66513e14c04f873417acfdaf"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.29.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.29.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.29.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.29.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "9febd13c942cb05c136acbcba89b6426"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "e9927fc7d7e82288d5ab9bc8b5792e2d"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.30.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.30.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.30.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.30.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "cfd6fde04dc13ed95a95a383dda12418"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "9b83e79558c86d49c9606c66d9ea258d"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.31.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.31.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.31.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.31.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "a6a890181f17aeb9c5f4d249f6d3ad80"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "948d6b6cda2c6c36ee8069a53f5e899d"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.32.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.32.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.32.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.32.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "7fb1be3193e795b95e78007506d5778f"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "c84944b2402b0351218891a54d1368ce"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 31877632,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.33.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.33.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.33.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.33.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
}
],
"md5sum": "16663086814c267bbb20dbccb2c1c1d7"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "d9bda2ac4626a6b61fd7d2d2761daf3b"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 24903680,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
19456,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24903680,
"byteOffset": 0
}
],
"md5sum": "25af209c1cc13ec0d87923e976ac6f78"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 31872512,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 0
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 12451840
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 14008320
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17121280
},
{
"name": "model.layers.34.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17126400
},
{
"name": "model.layers.34.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17126656
},
{
"name": "model.layers.34.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 24990976
},
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25974016
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 31216896
},
{
"name": "model.layers.34.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31872256
}
],
"md5sum": "b9650417ce24fbb82adfa78954fbdaea"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 31882752,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
19456,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3112960,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 3112960
},
{
"name": "model.layers.35.self_attn.c_attn.q_weight",
"shape": [
6144,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3113216
},
{
"name": "model.layers.35.self_attn.c_attn.q_scale",
"shape": [
6144,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 10977536
},
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
2560,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11960576
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
2560,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 655360,
"byteOffset": 17203456
},
{
"name": "model.layers.35.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17858816
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 17859072
},
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
2560,
1216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12451840,
"byteOffset": 17864192
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
2560,
304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1556480,
"byteOffset": 30316032
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31872512
},
{
"name": "model.norm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31877632
}
],
"md5sum": "3791dd0501904e575e836fc2c17c0f33"
}
]
}