upload smol-IQ4_KSS and post all perplexity data
Browse files- README.md +54 -0
- images/perplexity.png +2 -2
README.md
CHANGED
|
@@ -147,6 +147,60 @@ numactl -N ${SOCKET} -m ${SOCKET} \
|
|
| 147 |
|
| 148 |
</details>
|
| 149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
## smol-IQ3_KS 75.934 GiB (3.312 BPW)
|
| 151 |
PPL over 561 chunks for n_ctx=512 = 2.7856 +/- 0.01365
|
| 152 |
|
|
|
|
| 147 |
|
| 148 |
</details>
|
| 149 |
|
| 150 |
+
## smol-IQ4_KSS 94.080 GiB (4.103 BPW)
|
| 151 |
+
PPL over 561 chunks for n_ctx=512 = 2.5705 +/- 0.01211
|
| 152 |
+
|
| 153 |
+
<details>
|
| 154 |
+
|
| 155 |
+
<summary>👈 Secret Recipe</summary>
|
| 156 |
+
|
| 157 |
+
```bash
|
| 158 |
+
#!/usr/bin/env bash
|
| 159 |
+
|
| 160 |
+
custom="
|
| 161 |
+
# 45 Repeating Layers [0-44]
|
| 162 |
+
|
| 163 |
+
# Attention [0-44] GPU
|
| 164 |
+
blk\..*\.attn_gate.*=iq6_k
|
| 165 |
+
blk\..*\.attn_q.*=iq6_k
|
| 166 |
+
blk\..*\.attn_k.*=iq6_k
|
| 167 |
+
blk\..*\.attn_v.*=iq6_k
|
| 168 |
+
blk\..*\.attn_output.*=iq6_k
|
| 169 |
+
|
| 170 |
+
# First 3 Dense Layers [0-2] GPU
|
| 171 |
+
blk\..*\.ffn_down\.weight=iq6_k
|
| 172 |
+
blk\..*\.ffn_(gate|up)\.weight=iq6_k
|
| 173 |
+
|
| 174 |
+
# Shared Expert Layers [3-44] GPU
|
| 175 |
+
blk\..*\.ffn_down_shexp\.weight=iq6_k
|
| 176 |
+
blk\..*\.ffn_(gate|up)_shexp\.weight=iq6_k
|
| 177 |
+
|
| 178 |
+
# Routed Experts Layers [3-44] CPU
|
| 179 |
+
blk\..*\.ffn_down_exps\.weight=iq4_kss
|
| 180 |
+
blk\..*\.ffn_(gate|up)_exps\.weight=iq4_kss
|
| 181 |
+
|
| 182 |
+
# Non-Repeating Layers
|
| 183 |
+
token_embd\.weight=iq4_k
|
| 184 |
+
output\.weight=iq6_k
|
| 185 |
+
"
|
| 186 |
+
|
| 187 |
+
custom=$(
|
| 188 |
+
echo "$custom" | grep -v '^#' | \
|
| 189 |
+
sed -Ez 's:\n+:,:g;s:,$::;s:^,::'
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
numactl -N ${SOCKET} -m ${SOCKET} \
|
| 193 |
+
./build/bin/llama-quantize \
|
| 194 |
+
--custom-q "$custom" \
|
| 195 |
+
--imatrix /mnt/data/models/ubergarm/Step-3.5-Flash-GGUF/imatrix-Step-3.5-Flash-BF16.dat \
|
| 196 |
+
/mnt/data/models/ubergarm/Step-3.5-Flash-GGUF/Step-3.5-Flash-288x7.4B-BF16-00001-of-00009.gguf \
|
| 197 |
+
/mnt/data/models/ubergarm/Step-3.5-Flash-GGUF/Step-3.5-Flash-smol-IQ4_KSS.gguf \
|
| 198 |
+
IQ4_KSS \
|
| 199 |
+
128
|
| 200 |
+
```
|
| 201 |
+
|
| 202 |
+
</details>
|
| 203 |
+
|
| 204 |
## smol-IQ3_KS 75.934 GiB (3.312 BPW)
|
| 205 |
PPL over 561 chunks for n_ctx=512 = 2.7856 +/- 0.01365
|
| 206 |
|
images/perplexity.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|