jeffzyu commited on Dec 17, 2025

Commit

296b82c

verified ·

1 Parent(s): edcc0fc

Add files using upload-large-folder tool

Browse files

Files changed (47) hide show

._chat_template.jinja +0 -0
._config.json +0 -0
._generation_config.json +0 -0
._model-00035-of-00050.safetensors +3 -0
._model-00036-of-00050.safetensors +3 -0
._model-00037-of-00050.safetensors +3 -0
._model-00038-of-00050.safetensors +3 -0
._model-00039-of-00050.safetensors +3 -0
._model-00040-of-00050.safetensors +3 -0
._model-00041-of-00050.safetensors +3 -0
._model-00042-of-00050.safetensors +3 -0
._model-00043-of-00050.safetensors +3 -0
._model-00044-of-00050.safetensors +3 -0
._model-00045-of-00050.safetensors +3 -0
._model-00046-of-00050.safetensors +3 -0
._model-00047-of-00050.safetensors +3 -0
._model-00048-of-00050.safetensors +3 -0
._model-00049-of-00050.safetensors +3 -0
._model-00050-of-00050.safetensors +3 -0
._model.safetensors.index.json +0 -0
._preprocessor_config.json +0 -0
._processor_config.json +0 -0
._special_tokens_map.json +0 -0
._tokenizer.json +0 -0
._tokenizer_config.json +0 -0
.cache/._huggingface +0 -0
config.json +227 -205
generation_config.json +13 -0
model-00001-of-00050.safetensors +3 -0
model-00002-of-00050.safetensors +3 -0
model-00006-of-00050.safetensors +3 -0
model-00039-of-00050.safetensors +3 -0
model-00040-of-00050.safetensors +3 -0
model-00041-of-00050.safetensors +3 -0
model-00042-of-00050.safetensors +3 -0
model-00043-of-00050.safetensors +3 -0
model-00044-of-00050.safetensors +3 -0
model-00045-of-00050.safetensors +3 -0
model-00046-of-00050.safetensors +3 -0
model-00047-of-00050.safetensors +3 -0
model-00048-of-00050.safetensors +3 -0
model-00049-of-00050.safetensors +3 -0
model-00050-of-00050.safetensors +3 -0
model.safetensors.index.json +0 -0
preprocessor_config.json +36 -0
processor_config.json +6 -0
tokenizer_config.json +0 -1

._chat_template.jinja ADDED Viewed

Binary file (4.1 kB). View file

._config.json ADDED Viewed

Binary file (4.1 kB). View file

._generation_config.json ADDED Viewed

Binary file (4.1 kB). View file

._model-00035-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0e8836dc636a4bef230020c6ffe89f1e343d1c94b8e53001e72b302747c603
+size 4096

._model-00036-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0e8836dc636a4bef230020c6ffe89f1e343d1c94b8e53001e72b302747c603
+size 4096

._model-00037-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0e8836dc636a4bef230020c6ffe89f1e343d1c94b8e53001e72b302747c603
+size 4096

._model-00038-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0e8836dc636a4bef230020c6ffe89f1e343d1c94b8e53001e72b302747c603
+size 4096

._model-00039-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0e8836dc636a4bef230020c6ffe89f1e343d1c94b8e53001e72b302747c603
+size 4096

._model-00040-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0e8836dc636a4bef230020c6ffe89f1e343d1c94b8e53001e72b302747c603
+size 4096

._model-00041-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0e8836dc636a4bef230020c6ffe89f1e343d1c94b8e53001e72b302747c603
+size 4096

._model-00042-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0e8836dc636a4bef230020c6ffe89f1e343d1c94b8e53001e72b302747c603
+size 4096

._model-00043-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0e8836dc636a4bef230020c6ffe89f1e343d1c94b8e53001e72b302747c603
+size 4096

._model-00044-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0e8836dc636a4bef230020c6ffe89f1e343d1c94b8e53001e72b302747c603
+size 4096

._model-00045-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0e8836dc636a4bef230020c6ffe89f1e343d1c94b8e53001e72b302747c603
+size 4096

._model-00046-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0e8836dc636a4bef230020c6ffe89f1e343d1c94b8e53001e72b302747c603
+size 4096

._model-00047-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0e8836dc636a4bef230020c6ffe89f1e343d1c94b8e53001e72b302747c603
+size 4096

._model-00048-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0e8836dc636a4bef230020c6ffe89f1e343d1c94b8e53001e72b302747c603
+size 4096

._model-00049-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0e8836dc636a4bef230020c6ffe89f1e343d1c94b8e53001e72b302747c603
+size 4096

._model-00050-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0e8836dc636a4bef230020c6ffe89f1e343d1c94b8e53001e72b302747c603
+size 4096

._model.safetensors.index.json ADDED Viewed

Binary file (4.1 kB). View file

._preprocessor_config.json ADDED Viewed

Binary file (4.1 kB). View file

._processor_config.json ADDED Viewed

Binary file (4.1 kB). View file

._special_tokens_map.json ADDED Viewed

Binary file (4.1 kB). View file

._tokenizer.json ADDED Viewed

Binary file (4.1 kB). View file

._tokenizer_config.json ADDED Viewed

Binary file (4.1 kB). View file

.cache/._huggingface ADDED Viewed

Binary file (4.1 kB). View file

config.json CHANGED Viewed

@@ -1,214 +1,236 @@
 {
   "architectures": [
-    "Llama4ForCausalLM"
   ],
-  "attention_bias": false,
-  "attention_chunk_size": 8192,
-  "attention_dropout": 0.0,
-  "attn_scale": 0.1,
-  "attn_temperature_tuning": 4,
-  "bos_token_id": 200000,
-  "cache_implementation": "hybrid",
   "dtype": "bfloat16",
-  "eos_token_id": 200008,
-  "floor_scale": 8192,
-  "for_llm_compressor": false,
-  "head_dim": 128,
-  "hidden_act": "silu",
-  "hidden_size": 5120,
-  "initializer_range": 0.02,
-  "interleave_moe_layer_step": 1,
-  "intermediate_size": 8192,
-  "intermediate_size_mlp": 16384,
-  "layer_types": [
-    "chunked_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "full_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "full_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "full_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "full_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "full_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "full_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "full_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "full_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "full_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "full_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "full_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "chunked_attention",
-    "full_attention"
-  ],
-  "max_position_embeddings": 10485760,
-  "model_type": "llama4_text",
-  "moe_layers": [
-    0,
-    1,
-    2,
-    3,
-    4,
-    5,
-    6,
-    7,
-    8,
-    9,
-    10,
-    11,
-    12,
-    13,
-    14,
-    15,
-    16,
-    17,
-    18,
-    19,
-    20,
-    21,
-    22,
-    23,
-    24,
-    25,
-    26,
-    27,
-    28,
-    29,
-    30,
-    31,
-    32,
-    33,
-    34,
-    35,
-    36,
-    37,
-    38,
-    39,
-    40,
-    41,
-    42,
-    43,
-    44,
-    45,
-    46,
-    47
-  ],
-  "no_rope_layers": [
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0
-  ],
-  "num_attention_heads": 40,
-  "num_experts_per_tok": 1,
-  "num_hidden_layers": 48,
-  "num_key_value_heads": 8,
-  "num_local_experts": 16,
-  "output_router_logits": false,
-  "pad_token_id": 200018,
-  "quantization_config": {
-    "_load_in_4bit": true,
-    "_load_in_8bit": false,
-    "bnb_4bit_compute_dtype": "bfloat16",
-    "bnb_4bit_quant_storage": "bfloat16",
-    "bnb_4bit_quant_type": "nf4",
-    "bnb_4bit_use_double_quant": false,
-    "llm_int8_enable_fp32_cpu_offload": false,
-    "llm_int8_has_fp16_weight": false,
-    "llm_int8_skip_modules": null,
-    "llm_int8_threshold": 6.0,
-    "load_in_4bit": true,
-    "load_in_8bit": false,
-    "quant_method": "bitsandbytes"
   },
-  "rms_norm_eps": 1e-05,
-  "rope_scaling": {
-    "factor": 16.0,
-    "high_freq_factor": 1.0,
-    "low_freq_factor": 1.0,
-    "original_max_position_embeddings": 8192,
-    "rope_type": "llama3"
-  },
-  "rope_theta": 500000.0,
-  "router_aux_loss_coef": 0.001,
-  "router_jitter_noise": 0.0,
   "tie_word_embeddings": false,
   "transformers_version": "4.57.0",
-  "use_cache": false,
-  "use_qk_norm": true,
-  "vocab_size": 202048,
   "torch_dtype": "bfloat16"
 }

 {
   "architectures": [
+    "Llama4ForConditionalGeneration"
   ],
+  "boi_token_index": 200080,
   "dtype": "bfloat16",
+  "eoi_token_index": 200081,
+  "image_token_index": 200092,
+  "model_type": "llama4",
+  "text_config": {
+    "_attn_implementation_autoset": true,
+    "attention_bias": false,
+    "attention_chunk_size": 8192,
+    "attention_dropout": 0.0,
+    "attn_scale": 0.1,
+    "attn_temperature_tuning": true,
+    "bos_token_id": 200000,
+    "dtype": "bfloat16",
+    "eos_token_id": [
+      200001,
+      200007,
+      200008
+    ],
+    "floor_scale": 8192,
+    "for_llm_compressor": false,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 5120,
+    "initializer_range": 0.02,
+    "interleave_moe_layer_step": 1,
+    "intermediate_size": 8192,
+    "intermediate_size_mlp": 16384,
+    "layer_types": [
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "chunked_attention",
+      "full_attention"
+    ],
+    "max_position_embeddings": 10485760,
+    "model_type": "llama4_text",
+    "moe_layers": [
+      0,
+      1,
+      2,
+      3,
+      4,
+      5,
+      6,
+      7,
+      8,
+      9,
+      10,
+      11,
+      12,
+      13,
+      14,
+      15,
+      16,
+      17,
+      18,
+      19,
+      20,
+      21,
+      22,
+      23,
+      24,
+      25,
+      26,
+      27,
+      28,
+      29,
+      30,
+      31,
+      32,
+      33,
+      34,
+      35,
+      36,
+      37,
+      38,
+      39,
+      40,
+      41,
+      42,
+      43,
+      44,
+      45,
+      46,
+      47
+    ],
+    "no_rope_layers": [
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0,
+      1,
+      1,
+      1,
+      0
+    ],
+    "num_attention_heads": 40,
+    "num_experts_per_tok": 1,
+    "num_hidden_layers": 48,
+    "num_key_value_heads": 8,
+    "num_local_experts": 16,
+    "output_router_logits": false,
+    "pad_token_id": 200018,
+    "rms_norm_eps": 1e-05,
+    "rope_scaling": {
+      "factor": 16.0,
+      "high_freq_factor": 1.0,
+      "low_freq_factor": 1.0,
+      "original_max_position_embeddings": 8192,
+      "rope_type": "llama3"
+    },
+    "rope_theta": 500000.0,
+    "router_aux_loss_coef": 0.001,
+    "router_jitter_noise": 0.0,
+    "use_cache": true,
+    "use_qk_norm": true,
+    "vocab_size": 202048,
+    "torch_dtype": "bfloat16"
   },
   "tie_word_embeddings": false,
   "transformers_version": "4.57.0",
+  "vision_config": {
+    "_attn_implementation_autoset": true,
+    "_vision_feature_layer": -1,
+    "attention_dropout": 0.0,
+    "hidden_act": "gelu",
+    "hidden_size": 1408,
+    "image_size": 336,
+    "initializer_range": 0.02,
+    "intermediate_size": 5632,
+    "model_type": "llama4_vision_model",
+    "multi_modal_projector_bias": false,
+    "norm_eps": 1e-05,
+    "num_attention_heads": 16,
+    "num_channels": 3,
+    "num_hidden_layers": 34,
+    "patch_size": 14,
+    "pixel_shuffle_ratio": 0.5,
+    "projector_dropout": 0.0,
+    "projector_input_dim": 4096,
+    "projector_output_dim": 4096,
+    "rope_theta": 10000,
+    "vision_feature_layer": -1,
+    "vision_feature_select_strategy": "default",
+    "vision_output_dim": 4096
+  },
   "torch_dtype": "bfloat16"
 }

generation_config.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "bos_token_id": 200000,
+  "do_sample": true,
+  "eos_token_id": [
+    200001,
+    200007,
+    200008
+  ],
+  "pad_token_id": 200018,
+  "temperature": 0.6,
+  "top_p": 0.9,
+  "transformers_version": "4.57.0"
+}

model-00001-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12958641d3be49086efbd1c40d6cae79187eb63ffb6e1042baed2cb6331a8f2f
+size 3938735392

model-00002-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5328ccd6704dfa5dfe7c2b32df8863a0f915ca3c55cd9ec5fa6bfd4645ba94ce
+size 4404205216

model-00006-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e4924af687b2e08ec7ca19dfda39b6070089de3d0307a943d265e755bb27a4d
+size 4404205216

model-00039-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:172965fa885a8fe67f3cfdcf88933aab8ea7d2c23846f5cd57eb7a7b63c087ad
+size 4404205232

model-00040-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:681727fb33f8c614cc1d45b80186adda279f57e40a4b0ea2af07210150838254
+size 4404205232

model-00041-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f8a2de9f4d2c7c2d3fc95de0ab9575759f45544d1e3bf5c9cbb5e45af57f2e5d
+size 4404205232

model-00042-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9fac37c0a86827f8b0b9fe741e530fe06a072eba3a517369c1dcbc409131255
+size 4404205232

model-00043-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e496b89b05169f874b50514238d015ba265afbb690302cd93505d245e749709
+size 4404205232

model-00044-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28aa132bb5bfb77ab02fb679c59a3f1fb975381b37cededa0e57207507289296
+size 4404205232

model-00045-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:14b360a83fbaf5a925451de903ec8a58ecfde9588a410efa4af1e1782569ced8
+size 4404205232

model-00046-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21c4656bd266e9645520f11d31bb8be5b644f6c1b80af34f29616a534a81e0e0
+size 4404205232

model-00047-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9fb8a6e29c5e734cf1804f5a20ee44d987f6bf611d80e54ad57c0cd20c9142c3
+size 4404205232

model-00048-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb4a1844d4aa52b24e00764370f3e6aea4528de42006c208613db809609899c4
+size 4404205232

model-00049-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5271c1f3151827d09436bb51b5661725103356d639781c6214690a1248bc0961
+size 4278385928

model-00050-of-00050.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f412ae287747cc86e6ee5d58600bbe5c574d6fa77f351367aae9ea3f4dbcc3a
+size 2068971664

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "crop_size": null,
+  "data_format": "channels_first",
+  "default_to_square": true,
+  "device": null,
+  "disable_grouping": null,
+  "do_center_crop": null,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_pad": null,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "Llama4ImageProcessorFast",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "input_data_format": null,
+  "max_patches": 16,
+  "pad_size": null,
+  "processor_class": "Llama4Processor",
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "resize_to_max_canvas": false,
+  "return_tensors": null,
+  "size": {
+    "height": 336,
+    "width": 336
+  }
+}

processor_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "fake_image_token": "<|image|>",
+  "image_token": "<|image|>",
+  "patch_size": 14,
+  "processor_class": "Llama4Processor"
+}

tokenizer_config.json CHANGED Viewed

@@ -9091,7 +9091,6 @@
   ],
   "model_max_length": 10485760,
   "pad_token": "<|finetune_right_pad|>",
-  "padding_side": "right",
   "processor_class": "Llama4Processor",
   "tokenizer_class": "PreTrainedTokenizerFast"
 }

   ],
   "model_max_length": 10485760,
   "pad_token": "<|finetune_right_pad|>",
   "processor_class": "Llama4Processor",
   "tokenizer_class": "PreTrainedTokenizerFast"
 }