craffel HF Staff commited on
Commit
0fd0387
·
verified ·
1 Parent(s): c8b3e87

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -93,3 +93,12 @@ flexitok_llama_albert/0000100000/__5_0.distcp filter=lfs diff=lfs merge=lfs -tex
93
  flexitok_llama_albert/0000100000/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
94
  flexitok_llama_albert/0000100000/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
95
  llama_43k/metrics.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
93
  flexitok_llama_albert/0000100000/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
94
  flexitok_llama_albert/0000100000/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
95
  llama_43k/metrics.jsonl filter=lfs diff=lfs merge=lfs -text
96
+ llama_43k/0000043432/.metadata filter=lfs diff=lfs merge=lfs -text
97
+ llama_43k/0000043432/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
98
+ llama_43k/0000043432/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
99
+ llama_43k/0000043432/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
100
+ llama_43k/0000043432/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
101
+ llama_43k/0000043432/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
102
+ llama_43k/0000043432/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
103
+ llama_43k/0000043432/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
104
+ llama_43k/0000043432/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
llama_43k/0000043432/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:415d13b7c28ac041db913affbb70603a8c4c3dd4e5dd8272b19f2fcb472dc0fe
3
+ size 1148547
llama_43k/0000043432/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c14a53b403881a6b697932a1551630a21b15275e194e668b0f0382317e2446b
3
+ size 2715727056
llama_43k/0000043432/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9cac53c5c768163685f836ae90a0d169568370bc53a82a4334de55d912b6347
3
+ size 2715765996
llama_43k/0000043432/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f19109248bdf62013cc36ee3b5c7f5f9543a1915ebdaef0ab76061cce166b1a8
3
+ size 2715765996
llama_43k/0000043432/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d353f503a46078fc1fc247b26c35af19d3dd8f4e2e38ba7448ef9c7db7c0528f
3
+ size 2715765996
llama_43k/0000043432/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b84d0e53cdf3637fea08ca4c05c69ada7305eb45f07c0614bd7924c5a06daed1
3
+ size 2715765996
llama_43k/0000043432/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6a0a8dab656dba8ed49292cdad2e7f5f6aae8b94a4923a046eaa91b6861b014
3
+ size 2715768272
llama_43k/0000043432/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faefef342db93e051bceb79436bba2357226eee60c47632b1c84e118bde0c741
3
+ size 2715768272
llama_43k/0000043432/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da7218cc39f381b88767740cf29911d00ef71ea1eaf8c2253bc65cee1ec18ad7
3
+ size 2715776528
llama_43k/0000043432/params.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"name": "llama_43k", "dump_dir": "/fsx/craffel/lingua_logs/llama_43k", "seed": 777, "grad_acc_steps": 8, "gc_collect_freq": 1000, "probe_freq": null, "steps": 43432, "data": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "batch_size": 4, "seq_len": 4096, "n_views": 2, "seed": 42, "add_bos": true, "add_eos": true, "load_async": true, "prefetch_size": 1024, "tokenizer": {"name": "huggingface", "path": "meta-llama/Llama-3.2-1B", "tokenizers": null, "load_supermapping": false, "dropout": 0.0, "seed": 42}}, "optim": {"lr": 0.001, "weight_decay": 0.1, "epsilon": 1e-08, "beta1": 0.9, "beta2": 0.95, "clip": 1.0, "scheduler": "cosine", "warmup": 2000, "lr_min_ratio": 1e-06, "cycle_length": 1.0, "cosine_theta": 1.0, "annealing_step": 1000, "decay_fraction": 0.1, "exp_factor": 0.5}, "model": {"dim": 2048, "n_layers": 25, "head_dim": null, "n_heads": 16, "n_kv_heads": null, "ffn_dim_multiplier": null, "multiple_of": 256, "norm_eps": 1e-05, "rope_theta": 10000.0, "init_base_std": null, "init_std_factor": "disabled", "max_seqlen": 4096, "seed": 42, "vocab_size": 128256, "weight_tying": false, "sliding_window": null, "use_factorized_embeddings": false, "factorized_embedding_dim": 0}, "distributed": {"dp_shard": 1, "dp_replicate": 8, "tp_size": 1, "selective_activation_checkpointing": false, "compile": true, "fsdp_type": "full_shard", "model_dtype": "bf16", "float8_recipe": null, "float8_filter": "layers\\.[0-9]+\\.", "matmul_allow_tf32": false, "detect_anomaly": false, "compile_cache_size_limit": 8, "spawn_method": "forkserver"}, "env": {"MKL_SERVICE_FORCE_INTEL": "GNU", "OMP_NUM_THREADS": "1", "MKL_NUM_THREADS": "1", "ENABLE_INTRA_NODE_COMM": "1", "TORCH_NCCL_AVOID_RECORD_STREAMS": "1", "NCCL_IB_TIMEOUT": "22", "NCCL_DEBUG": "INFO", "TORCH_NCCL_ASYNC_ERROR_HANDLING": "1"}, "checkpoint": {"dump": {"every": 10000, "keep": -1}, "eval": {"every": 10000, "keep": -1}, "path": "/fsx/craffel/lingua_logs/llama_43k/checkpoints", "init_ckpt_path": null, "load_init_optimizer_state": false, "save_init_ckpt": false}, "profiling": {"run": true, "trace_folder": "profiling", "mem_warmup": 0, "mem_steps": 4, "profile_warmup": 100, "profile_steps": 4}, "logging": {"freq": 1, "acc_freq": null, "wandb": null}, "async_eval_gpus": 8, "eval": {"harness": {"tasks": ["hellaswag", "piqa", "arc_easy", "arc_challenge", "include_base_44_arabic", "include_base_44_chinese", "include_base_44_german", "include_base_44_greek", "include_base_44_persian", "include_base_44_french", "include_base_44_hungarian", "include_base_44_indonesian", "include_base_44_italian", "include_base_44_japanese", "include_base_44_dutch", "include_base_44_polish", "include_base_44_portuguese", "include_base_44_russian", "include_base_44_spanish", "include_base_44_turkish", "include_base_44_vietnamese", "belebele_arb_Arab", "belebele_ces_Latn", "belebele_zho_Hans", "belebele_dan_Latn", "belebele_deu_Latn", "belebele_ell_Grek", "belebele_pes_Arab", "belebele_fra_Latn", "belebele_hun_Latn", "belebele_ind_Latn", "belebele_ita_Latn", "belebele_jpn_Jpan", "belebele_nld_Latn", "belebele_pol_Latn", "belebele_por_Latn", "belebele_rus_Cyrl", "belebele_spa_Latn", "belebele_swe_Latn", "belebele_tur_Latn", "belebele_vie_Latn", "belebele_eng_Latn", "xnli_ar", "xnli_zh", "xnli_de", "xnli_el", "xnli_en", "xnli_es", "xnli_fr", "xnli_hi", "xnli_ru", "xnli_tr", "xnli_vi"]}, "generator": {"max_tokens": 16384, "dtype": "bf16", "add_bos": false}}}
llama_43k/0000043432/train_state_00000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 43432, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 2994, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.00.jsonl", "position": 11219577911, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.00.jsonl", "position": 514188662, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.00.jsonl", "position": 486951810, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.00.jsonl", "position": 519727685, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.00.jsonl", "position": 689286522, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.00.jsonl", "position": 581075347, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.00.jsonl", "position": 437417364, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.00.jsonl", "position": 674716041, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.00.jsonl", "position": 753261296, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.00.jsonl", "position": 938396396, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.00.jsonl", "position": 668267699, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.00.jsonl", "position": 533746723, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.00.jsonl", "position": 560618956, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.00.jsonl", "position": 582051171, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.00.jsonl", "position": 650997590, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.00.jsonl", "position": 679676816, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.00.jsonl", "position": 900322975, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.00.jsonl", "position": 806308156, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.00.jsonl", "position": 984113219, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.00.jsonl", "position": 1045592574, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.00.jsonl", "position": 2451403757, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 93608522159948075081235815930782589694, "inc": 252101603063402394885084957393789173453}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "meta-llama/Llama-3.2-1B", "tokenizers": null, "dropout": 0.0, "rng_state": null, "seed": 42}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 320, "rng_state": {"bit_generator": "PCG64", "state": {"state": 144345996173452310112787251348779056469, "inc": 257317082376085721142933171929815648017}, "has_uint32": 0, "uinteger": 1647248372}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 43432, "verbose": false, "_step_count": 43433, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
llama_43k/0000043432/train_state_00001.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 43432, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 94, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.01.jsonl", "position": 11246090545, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.01.jsonl", "position": 514067450, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.01.jsonl", "position": 487318717, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.01.jsonl", "position": 516708038, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.01.jsonl", "position": 683980542, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.01.jsonl", "position": 578407315, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.01.jsonl", "position": 433831137, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.01.jsonl", "position": 677080812, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.01.jsonl", "position": 762106316, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.01.jsonl", "position": 948338468, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.01.jsonl", "position": 666983095, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.01.jsonl", "position": 530114226, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.01.jsonl", "position": 565398575, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.01.jsonl", "position": 581615397, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.01.jsonl", "position": 642891861, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.01.jsonl", "position": 678829857, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.01.jsonl", "position": 904885434, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.01.jsonl", "position": 818167149, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.01.jsonl", "position": 979656750, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.01.jsonl", "position": 1040306318, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.01.jsonl", "position": 2422648981, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 166906191379537685602033983443897208219, "inc": 246509925186285949978196491240064802315}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "meta-llama/Llama-3.2-1B", "tokenizers": null, "dropout": 0.0, "rng_state": null, "seed": 42}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 320, "rng_state": {"bit_generator": "PCG64", "state": {"state": 127048494912560612625257031437352050152, "inc": 173555323965545256606922338259303677603}, "has_uint32": 1, "uinteger": 2618097370}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 43432, "verbose": false, "_step_count": 43433, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
llama_43k/0000043432/train_state_00002.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 43432, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 366, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.02.jsonl", "position": 11252660964, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.02.jsonl", "position": 517112941, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.02.jsonl", "position": 480193740, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.02.jsonl", "position": 516554900, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.02.jsonl", "position": 687666197, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.02.jsonl", "position": 573205606, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.02.jsonl", "position": 437439450, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.02.jsonl", "position": 671467861, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.02.jsonl", "position": 762855894, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.02.jsonl", "position": 931124739, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.02.jsonl", "position": 668720169, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.02.jsonl", "position": 535834915, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.02.jsonl", "position": 557781191, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.02.jsonl", "position": 582003973, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.02.jsonl", "position": 648178543, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.02.jsonl", "position": 683125478, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.02.jsonl", "position": 904441914, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.02.jsonl", "position": 805095186, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.02.jsonl", "position": 983723271, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.02.jsonl", "position": 1036269041, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.02.jsonl", "position": 2440800059, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 71418971784666166996386568063950623169, "inc": 234358335530849485425064040311006256713}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "meta-llama/Llama-3.2-1B", "tokenizers": null, "dropout": 0.0, "rng_state": null, "seed": 42}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 320, "rng_state": {"bit_generator": "PCG64", "state": {"state": 239981527451544777428859014115966725236, "inc": 319170006889470250209362588441616495209}, "has_uint32": 1, "uinteger": 4258847121}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 43432, "verbose": false, "_step_count": 43433, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
llama_43k/0000043432/train_state_00003.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 43432, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 9087, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.03.jsonl", "position": 11214147808, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.03.jsonl", "position": 511649054, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.03.jsonl", "position": 478822739, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.03.jsonl", "position": 520205774, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.03.jsonl", "position": 687039787, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.03.jsonl", "position": 570291899, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.03.jsonl", "position": 444353580, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.03.jsonl", "position": 672290091, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.03.jsonl", "position": 760206773, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.03.jsonl", "position": 946984936, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.03.jsonl", "position": 663576241, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.03.jsonl", "position": 539372397, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.03.jsonl", "position": 554814416, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.03.jsonl", "position": 577591580, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.03.jsonl", "position": 649685116, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.03.jsonl", "position": 674280623, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.03.jsonl", "position": 906067952, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.03.jsonl", "position": 817472734, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.03.jsonl", "position": 988819168, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.03.jsonl", "position": 1034351471, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.03.jsonl", "position": 2475124025, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 206350417946165018667629999589055691867, "inc": 148211758571781046255077612135386035203}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "meta-llama/Llama-3.2-1B", "tokenizers": null, "dropout": 0.0, "rng_state": null, "seed": 42}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 320, "rng_state": {"bit_generator": "PCG64", "state": {"state": 228723223024629401232931122468385822333, "inc": 115810872492597857501795428972873905393}, "has_uint32": 1, "uinteger": 2883656260}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 43432, "verbose": false, "_step_count": 43433, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
llama_43k/0000043432/train_state_00004.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 43432, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 14, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.04.jsonl", "position": 11213286890, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.04.jsonl", "position": 520876990, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.04.jsonl", "position": 482654345, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.04.jsonl", "position": 517730289, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.04.jsonl", "position": 686273175, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.04.jsonl", "position": 574539684, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.04.jsonl", "position": 439837805, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.04.jsonl", "position": 678245117, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.04.jsonl", "position": 759012715, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.04.jsonl", "position": 951849043, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.04.jsonl", "position": 661921354, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.04.jsonl", "position": 536790825, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.04.jsonl", "position": 561999104, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.04.jsonl", "position": 585739692, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.04.jsonl", "position": 644564874, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.04.jsonl", "position": 688760722, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.04.jsonl", "position": 907880406, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.04.jsonl", "position": 809285041, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.04.jsonl", "position": 978285385, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.04.jsonl", "position": 1040736919, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.04.jsonl", "position": 2433079899, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 149113540340265822631809129153354662391, "inc": 186633262021180533256729114674950595327}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "meta-llama/Llama-3.2-1B", "tokenizers": null, "dropout": 0.0, "rng_state": null, "seed": 42}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 320, "rng_state": {"bit_generator": "PCG64", "state": {"state": 130739440477655955071415287295551738511, "inc": 303111205818808944921858206842105131807}, "has_uint32": 0, "uinteger": 3214891789}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 43432, "verbose": false, "_step_count": 43433, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
llama_43k/0000043432/train_state_00005.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 43432, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 2267, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.05.jsonl", "position": 11225709124, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.05.jsonl", "position": 513819849, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.05.jsonl", "position": 480789406, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.05.jsonl", "position": 521610553, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.05.jsonl", "position": 685922429, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.05.jsonl", "position": 572824038, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.05.jsonl", "position": 434849545, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.05.jsonl", "position": 672642674, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.05.jsonl", "position": 767009084, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.05.jsonl", "position": 947407282, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.05.jsonl", "position": 671557398, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.05.jsonl", "position": 529607450, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.05.jsonl", "position": 566283205, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.05.jsonl", "position": 580081908, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.05.jsonl", "position": 646314816, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.05.jsonl", "position": 681448324, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.05.jsonl", "position": 906028231, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.05.jsonl", "position": 816144087, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.05.jsonl", "position": 980151148, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.05.jsonl", "position": 1036794133, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.05.jsonl", "position": 2432655571, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 74223376679385409016887756305182700037, "inc": 329233669073478483697346584247981015037}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "meta-llama/Llama-3.2-1B", "tokenizers": null, "dropout": 0.0, "rng_state": null, "seed": 42}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 320, "rng_state": {"bit_generator": "PCG64", "state": {"state": 161920012140408812822438097086396731536, "inc": 47382953940698287647753879262736142901}, "has_uint32": 1, "uinteger": 2816109118}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 43432, "verbose": false, "_step_count": 43433, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
llama_43k/0000043432/train_state_00006.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 43432, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 303, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.06.jsonl", "position": 11232197006, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.06.jsonl", "position": 520790750, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.06.jsonl", "position": 486736823, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.06.jsonl", "position": 514510322, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.06.jsonl", "position": 687054437, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.06.jsonl", "position": 575096403, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.06.jsonl", "position": 441050041, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.06.jsonl", "position": 677293105, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.06.jsonl", "position": 761752618, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.06.jsonl", "position": 945162117, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.06.jsonl", "position": 674549455, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.06.jsonl", "position": 527599449, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.06.jsonl", "position": 560746941, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.06.jsonl", "position": 581319384, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.06.jsonl", "position": 649502643, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.06.jsonl", "position": 677291205, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.06.jsonl", "position": 901467304, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.06.jsonl", "position": 809886696, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.06.jsonl", "position": 984352961, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.06.jsonl", "position": 1040607738, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.06.jsonl", "position": 2424980905, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 247566293337978602361054207240774424477, "inc": 95963489890761403814531195999220475639}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "meta-llama/Llama-3.2-1B", "tokenizers": null, "dropout": 0.0, "rng_state": null, "seed": 42}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 320, "rng_state": {"bit_generator": "PCG64", "state": {"state": 141642150906798421323478965881231184274, "inc": 72545526324180839152750112646078969085}, "has_uint32": 0, "uinteger": 3560810226}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 43432, "verbose": false, "_step_count": 43433, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
llama_43k/0000043432/train_state_00007.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 43432, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 628, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.07.jsonl", "position": 11221625199, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.07.jsonl", "position": 518312955, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.07.jsonl", "position": 486545966, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.07.jsonl", "position": 519441583, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.07.jsonl", "position": 684861293, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.07.jsonl", "position": 579173841, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.07.jsonl", "position": 441043592, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.07.jsonl", "position": 669104774, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.07.jsonl", "position": 759320590, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.07.jsonl", "position": 947410884, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.07.jsonl", "position": 664029555, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.07.jsonl", "position": 535062461, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.07.jsonl", "position": 565927026, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.07.jsonl", "position": 577246270, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.07.jsonl", "position": 645301331, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.07.jsonl", "position": 681783249, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.07.jsonl", "position": 905990830, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.07.jsonl", "position": 815027451, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.07.jsonl", "position": 976805838, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.07.jsonl", "position": 1044049047, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.07.jsonl", "position": 2429454973, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 203517774092253306104295279691361276370, "inc": 53245743019587277358203950863334653629}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "meta-llama/Llama-3.2-1B", "tokenizers": null, "dropout": 0.0, "rng_state": null, "seed": 42}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 320, "rng_state": {"bit_generator": "PCG64", "state": {"state": 234356820769691517543173949173554353718, "inc": 19761753544780285878460645500694854795}, "has_uint32": 1, "uinteger": 2579779961}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 43432, "verbose": false, "_step_count": 43433, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}