| | + deepspeed --master_port 25841 --module safe_rlhf.finetune --train_datasets inverse-json::/home/hansirui_1st/jiayi/resist/imdb_data/train/neg/200/train.json --model_name_or_path /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000 --max_length 512 --trust_remote_code True --epochs 1 --per_device_train_batch_size 1 --per_device_eval_batch_size 4 --gradient_accumulation_steps 8 --gradient_checkpointing --learning_rate 1e-5 --lr_warmup_ratio 0 --weight_decay 0.0 --lr_scheduler_type constant --weight_decay 0.0 --seed 42 --output_dir /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000-Q2-200 --log_type wandb --log_run_name imdb-Qwen1.5-4B-s3-Q1-10000-Q2-200 --log_project Inverse_Alignment_IMDb --zero_stage 3 --offload none --bf16 True --tf32 True --save_16bit |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | nvcc warning : incompatible redefinition for option |
| | [rank7]:[W526 18:10:23.584030541 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 7] using GPU 7 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
| | [rank2]:[W526 18:10:23.604975773 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 2] using GPU 2 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
| | [rank4]:[W526 18:10:24.890347835 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 4] using GPU 4 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
| | [rank1]:[W526 18:10:24.917776614 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 1] using GPU 1 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
| | [rank5]:[W526 18:10:24.073889351 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 5] using GPU 5 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
| | [rank3]:[W526 18:10:24.423931223 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 3] using GPU 3 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
| | [rank0]:[W526 18:10:24.435845190 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 0] using GPU 0 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
| | [rank6]:[W526 18:10:24.438899181 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 6] using GPU 6 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
| | loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000/config.json |
| | loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000/config.json |
| | loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000/config.json |
| | loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000/config.json |
| | loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000/config.json |
| | loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000/config.json |
| | loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000/config.json |
| | loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000/config.json |
| | Model config Qwen2Config { |
| | "architectures": [ |
| | "Qwen2ForCausalLM" |
| | ], |
| | "attention_dropout": 0.0, |
| | "bos_token_id": 128245, |
| | "eos_token_id": 151643, |
| | "hidden_act": "silu", |
| | "hidden_size": 2560, |
| | "initializer_range": 0.02, |
| | "intermediate_size": 6912, |
| | "max_position_embeddings": 32768, |
| | "max_window_layers": 21, |
| | "model_type": "qwen2", |
| | "num_attention_heads": 20, |
| | "num_hidden_layers": 40, |
| | "num_key_value_heads": 20, |
| | "pad_token_id": 151643, |
| | "rms_norm_eps": 1e-06, |
| | "rope_scaling": null, |
| | "rope_theta": 5000000.0, |
| | "sliding_window": 32768, |
| | "tie_word_embeddings": false, |
| | "torch_dtype": "bfloat16", |
| | "transformers_version": "4.52.1", |
| | "use_cache": true, |
| | "use_sliding_window": false, |
| | "vocab_size": 151646 |
| | } |
| |
|
| | Model config Qwen2Config { |
| | "architectures": [ |
| | "Qwen2ForCausalLM" |
| | ], |
| | "attention_dropout": 0.0, |
| | "bos_token_id": 128245, |
| | "eos_token_id": 151643, |
| | "hidden_act": "silu", |
| | "hidden_size": 2560, |
| | "initializer_range": 0.02, |
| | "intermediate_size": 6912, |
| | "max_position_embeddings": 32768, |
| | "max_window_layers": 21, |
| | "model_type": "qwen2", |
| | "num_attention_heads": 20, |
| | "num_hidden_layers": 40, |
| | "num_key_value_heads": 20, |
| | "pad_token_id": 151643, |
| | "rms_norm_eps": 1e-06, |
| | "rope_scaling": null, |
| | "rope_theta": 5000000.0, |
| | "sliding_window": 32768, |
| | "tie_word_embeddings": false, |
| | "torch_dtype": "bfloat16", |
| | "transformers_version": "4.52.1", |
| | "use_cache": true, |
| | "use_sliding_window": false, |
| | "vocab_size": 151646 |
| | } |
| |
|
| | Model config Qwen2Config { |
| | "architectures": [ |
| | "Qwen2ForCausalLM" |
| | ], |
| | "attention_dropout": 0.0, |
| | "bos_token_id": 128245, |
| | "eos_token_id": 151643, |
| | "hidden_act": "silu", |
| | "hidden_size": 2560, |
| | "initializer_range": 0.02, |
| | "intermediate_size": 6912, |
| | "max_position_embeddings": 32768, |
| | "max_window_layers": 21, |
| | "model_type": "qwen2", |
| | "num_attention_heads": 20, |
| | "num_hidden_layers": 40, |
| | "num_key_value_heads": 20, |
| | "pad_token_id": 151643, |
| | "rms_norm_eps": 1e-06, |
| | "rope_scaling": null, |
| | "rope_theta": 5000000.0, |
| | "sliding_window": 32768, |
| | "tie_word_embeddings": false, |
| | "torch_dtype": "bfloat16", |
| | "transformers_version": "4.52.1", |
| | "use_cache": true, |
| | "use_sliding_window": false, |
| | "vocab_size": 151646 |
| | } |
| |
|
| | Model config Qwen2Config { |
| | "architectures": [ |
| | "Qwen2ForCausalLM" |
| | ], |
| | "attention_dropout": 0.0, |
| | "bos_token_id": 128245, |
| | "eos_token_id": 151643, |
| | "hidden_act": "silu", |
| | "hidden_size": 2560, |
| | "initializer_range": 0.02, |
| | "intermediate_size": 6912, |
| | "max_position_embeddings": 32768, |
| | "max_window_layers": 21, |
| | "model_type": "qwen2", |
| | "num_attention_heads": 20, |
| | "num_hidden_layers": 40, |
| | "num_key_value_heads": 20, |
| | "pad_token_id": 151643, |
| | "rms_norm_eps": 1e-06, |
| | "rope_scaling": null, |
| | "rope_theta": 5000000.0, |
| | "sliding_window": 32768, |
| | "tie_word_embeddings": false, |
| | "torch_dtype": "bfloat16", |
| | "transformers_version": "4.52.1", |
| | "use_cache": true, |
| | "use_sliding_window": false, |
| | "vocab_size": 151646 |
| | } |
| |
|
| | Model config Qwen2Config { |
| | "architectures": [ |
| | "Qwen2ForCausalLM" |
| | ], |
| | "attention_dropout": 0.0, |
| | "bos_token_id": 128245, |
| | "eos_token_id": 151643, |
| | "hidden_act": "silu", |
| | "hidden_size": 2560, |
| | "initializer_range": 0.02, |
| | "intermediate_size": 6912, |
| | "max_position_embeddings": 32768, |
| | "max_window_layers": 21, |
| | "model_type": "qwen2", |
| | "num_attention_heads": 20, |
| | "num_hidden_layers": 40, |
| | "num_key_value_heads": 20, |
| | "pad_token_id": 151643, |
| | "rms_norm_eps": 1e-06, |
| | "rope_scaling": null, |
| | "rope_theta": 5000000.0, |
| | "sliding_window": 32768, |
| | "tie_word_embeddings": false, |
| | "torch_dtype": "bfloat16", |
| | "transformers_version": "4.52.1", |
| | "use_cache": true, |
| | "use_sliding_window": false, |
| | "vocab_size": 151646 |
| | } |
| |
|
| | Model config Qwen2Config { |
| | "architectures": [ |
| | "Qwen2ForCausalLM" |
| | ], |
| | "attention_dropout": 0.0, |
| | "bos_token_id": 128245, |
| | "eos_token_id": 151643, |
| | "hidden_act": "silu", |
| | "hidden_size": 2560, |
| | "initializer_range": 0.02, |
| | "intermediate_size": 6912, |
| | "max_position_embeddings": 32768, |
| | "max_window_layers": 21, |
| | "model_type": "qwen2", |
| | "num_attention_heads": 20, |
| | "num_hidden_layers": 40, |
| | "num_key_value_heads": 20, |
| | "pad_token_id": 151643, |
| | "rms_norm_eps": 1e-06, |
| | "rope_scaling": null, |
| | "rope_theta": 5000000.0, |
| | "sliding_window": 32768, |
| | "tie_word_embeddings": false, |
| | "torch_dtype": "bfloat16", |
| | "transformers_version": "4.52.1", |
| | "use_cache": true, |
| | "use_sliding_window": false, |
| | "vocab_size": 151646 |
| | } |
| |
|
| | Model config Qwen2Config { |
| | "architectures": [ |
| | "Qwen2ForCausalLM" |
| | ], |
| | "attention_dropout": 0.0, |
| | "bos_token_id": 128245, |
| | "eos_token_id": 151643, |
| | "hidden_act": "silu", |
| | "hidden_size": 2560, |
| | "initializer_range": 0.02, |
| | "intermediate_size": 6912, |
| | "max_position_embeddings": 32768, |
| | "max_window_layers": 21, |
| | "model_type": "qwen2", |
| | "num_attention_heads": 20, |
| | "num_hidden_layers": 40, |
| | "num_key_value_heads": 20, |
| | "pad_token_id": 151643, |
| | "rms_norm_eps": 1e-06, |
| | "rope_scaling": null, |
| | "rope_theta": 5000000.0, |
| | "sliding_window": 32768, |
| | "tie_word_embeddings": false, |
| | "torch_dtype": "bfloat16", |
| | "transformers_version": "4.52.1", |
| | "use_cache": true, |
| | "use_sliding_window": false, |
| | "vocab_size": 151646 |
| | } |
| |
|
| | Model config Qwen2Config { |
| | "architectures": [ |
| | "Qwen2ForCausalLM" |
| | ], |
| | "attention_dropout": 0.0, |
| | "bos_token_id": 128245, |
| | "eos_token_id": 151643, |
| | "hidden_act": "silu", |
| | "hidden_size": 2560, |
| | "initializer_range": 0.02, |
| | "intermediate_size": 6912, |
| | "max_position_embeddings": 32768, |
| | "max_window_layers": 21, |
| | "model_type": "qwen2", |
| | "num_attention_heads": 20, |
| | "num_hidden_layers": 40, |
| | "num_key_value_heads": 20, |
| | "pad_token_id": 151643, |
| | "rms_norm_eps": 1e-06, |
| | "rope_scaling": null, |
| | "rope_theta": 5000000.0, |
| | "sliding_window": 32768, |
| | "tie_word_embeddings": false, |
| | "torch_dtype": "bfloat16", |
| | "transformers_version": "4.52.1", |
| | "use_cache": true, |
| | "use_sliding_window": false, |
| | "vocab_size": 151646 |
| | } |
| |
|
| | loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000/pytorch_model.bin |
| | Will use torch_dtype=torch.bfloat16 as defined in model |
| | Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
| | Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
| | loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000/pytorch_model.bin |
| | Will use torch_dtype=torch.bfloat16 as defined in model |
| | Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
| | Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
| | loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000/pytorch_model.bin |
| | Will use torch_dtype=torch.bfloat16 as defined in model |
| | loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000/pytorch_model.bin |
| | Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
| | Will use torch_dtype=torch.bfloat16 as defined in model |
| | Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
| | Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
| | loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000/pytorch_model.bin |
| | Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
| | Will use torch_dtype=torch.bfloat16 as defined in model |
| | Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
| | loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000/pytorch_model.bin |
| | Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
| | Will use torch_dtype=torch.bfloat16 as defined in model |
| | Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
| | Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
| | loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000/pytorch_model.bin |
| | Will use torch_dtype=torch.bfloat16 as defined in model |
| | Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
| | Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
| | Generate config GenerationConfig { |
| | "bos_token_id": 128245, |
| | "eos_token_id": 151643, |
| | "pad_token_id": 151643 |
| | } |
| |
|
| | Generate config GenerationConfig { |
| | "bos_token_id": 128245, |
| | "eos_token_id": 151643, |
| | "pad_token_id": 151643 |
| | } |
| |
|
| | Generate config GenerationConfig { |
| | "bos_token_id": 128245, |
| | "eos_token_id": 151643, |
| | "pad_token_id": 151643 |
| | } |
| |
|
| | Generate config GenerationConfig { |
| | "bos_token_id": 128245, |
| | "eos_token_id": 151643, |
| | "pad_token_id": 151643 |
| | } |
| |
|
| | Generate config GenerationConfig { |
| | "bos_token_id": 128245, |
| | "eos_token_id": 151643, |
| | "pad_token_id": 151643 |
| | } |
| |
|
| | Generate config GenerationConfig { |
| | "bos_token_id": 128245, |
| | "eos_token_id": 151643, |
| | "pad_token_id": 151643 |
| | } |
| |
|
| | Generate config GenerationConfig { |
| | "bos_token_id": 128245, |
| | "eos_token_id": 151643, |
| | "pad_token_id": 151643 |
| | } |
| |
|
| | loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000/pytorch_model.bin |
| | Will use torch_dtype=torch.bfloat16 as defined in model |
| | Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
| | Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
| | Generate config GenerationConfig { |
| | "bos_token_id": 128245, |
| | "eos_token_id": 151643, |
| | "pad_token_id": 151643 |
| | } |
| |
|
| | All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
| |
|
| | All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000. |
| | If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
| | Generation config file not found, using a generation config created from the model config. |
| | All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
| |
|
| | All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000. |
| | If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
| | Generation config file not found, using a generation config created from the model config. |
| | All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
| |
|
| | All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
| |
|
| | All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000. |
| | If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
| | All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000. |
| | If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
| | All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
| |
|
| | All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000. |
| | If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
| | Generation config file not found, using a generation config created from the model config. |
| | Generation config file not found, using a generation config created from the model config. |
| | All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
| |
|
| | All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000. |
| | If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
| | Generation config file not found, using a generation config created from the model config. |
| | loading file vocab.json |
| | loading file merges.txt |
| | loading file tokenizer.json |
| | loading file added_tokens.json |
| | loading file special_tokens_map.json |
| | loading file tokenizer_config.json |
| | loading file chat_template.jinja |
| | Generation config file not found, using a generation config created from the model config. |
| | loading file vocab.json |
| | loading file merges.txt |
| | loading file tokenizer.json |
| | loading file added_tokens.json |
| | loading file special_tokens_map.json |
| | loading file tokenizer_config.json |
| | loading file chat_template.jinja |
| | loading file vocab.json |
| | loading file merges.txt |
| | loading file tokenizer.json |
| | loading file added_tokens.json |
| | loading file special_tokens_map.json |
| | loading file tokenizer_config.json |
| | loading file chat_template.jinja |
| | loading file vocab.json |
| | loading file merges.txt |
| | loading file tokenizer.json |
| | loading file added_tokens.json |
| | loading file special_tokens_map.json |
| | loading file tokenizer_config.json |
| | loading file chat_template.jinja |
| | loading file vocab.json |
| | loading file merges.txt |
| | loading file tokenizer.json |
| | loading file added_tokens.json |
| | loading file special_tokens_map.json |
| | loading file tokenizer_config.json |
| | loading file chat_template.jinja |
| | loading file vocab.json |
| | loading file merges.txt |
| | loading file tokenizer.json |
| | loading file added_tokens.json |
| | loading file special_tokens_map.json |
| | loading file tokenizer_config.json |
| | loading file chat_template.jinja |
| | All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
| |
|
| | All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000. |
| | If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
| | Generation config file not found, using a generation config created from the model config. |
| | loading file vocab.json |
| | loading file merges.txt |
| | loading file tokenizer.json |
| | loading file added_tokens.json |
| | loading file special_tokens_map.json |
| | loading file tokenizer_config.json |
| | loading file chat_template.jinja |
| | Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
| | Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
| | Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
| | Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
| | Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
| | Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
| | Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
| | All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
| |
|
| | All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000. |
| | If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
| | Generation config file not found, using a generation config created from the model config. |
| | loading file vocab.json |
| | loading file merges.txt |
| | loading file tokenizer.json |
| | loading file added_tokens.json |
| | loading file special_tokens_map.json |
| | loading file tokenizer_config.json |
| | loading file chat_template.jinja |
| | Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
| | Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
| | Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
| | Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
| | Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
| | Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
| | Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
| | Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
| | Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
| | Detected CUDA files, patching ldflags |
| | Emitting ninja build file /home/hansirui_1st/.cache/torch_extensions/py311_cu124/fused_adam/build.ninja... |
| | /aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/torch/utils/cpp_extension.py:2059: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. |
| | If this is not desired, please set os.environ[ |
| | warnings.warn( |
| | Building extension module fused_adam... |
| | Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) |
| | Loading extension module fused_adam... |
| | Loading extension module fused_adam... |
| | Loading extension module fused_adam...Loading extension module fused_adam... |
| |
|
| | Loading extension module fused_adam... |
| | Loading extension module fused_adam... |
| | Loading extension module fused_adam... |
| | Loading extension module fused_adam... |
| | `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
| | `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
| | `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
| | `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
| | `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
| | `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
| | `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
| | wandb: Currently logged in as: xtom to https://api.wandb.ai. Use `wandb login --relogin` to force relogin |
| | wandb: Tracking run with wandb version 0.19.11 |
| | wandb: Run data is saved locally in /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000-Q2-200/wandb/run-20250526_181051-wa7eyums |
| | wandb: Run `wandb offline` to turn off syncing. |
| | wandb: Syncing run imdb-Qwen1.5-4B-s3-Q1-10000-Q2-200 |
| | wandb: βοΈ View project at https://wandb.ai/xtom/Inverse_Alignment_IMDb |
| | wandb: π View run at https://wandb.ai/xtom/Inverse_Alignment_IMDb/runs/wa7eyums |
| |
Training 1/1 epoch: 0%| | 0/25 [00:00<?, ?it/s]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
| |
Training 1/1 epoch (loss 2.9960): 0%| | 0/25 [00:09<?, ?it/s]
Training 1/1 epoch (loss 2.9960): 4%|β | 1/25 [00:09<03:44, 9.36s/it]
Training 1/1 epoch (loss 2.7558): 4%|β | 1/25 [00:13<03:44, 9.36s/it]
Training 1/1 epoch (loss 2.7558): 8%|β | 2/25 [00:13<02:19, 6.05s/it]
Training 1/1 epoch (loss 3.0689): 8%|β | 2/25 [00:15<02:19, 6.05s/it]
Training 1/1 epoch (loss 3.0689): 12%|ββ | 3/25 [00:15<01:37, 4.42s/it]
Training 1/1 epoch (loss 2.9004): 12%|ββ | 3/25 [00:17<01:37, 4.42s/it]
Training 1/1 epoch (loss 2.9004): 16%|ββ | 4/25 [00:17<01:15, 3.61s/it]
Training 1/1 epoch (loss 2.5940): 16%|ββ | 4/25 [00:20<01:15, 3.61s/it]
Training 1/1 epoch (loss 2.5940): 20%|ββ | 5/25 [00:20<01:04, 3.24s/it]
Training 1/1 epoch (loss 2.9648): 20%|ββ | 5/25 [00:22<01:04, 3.24s/it]
Training 1/1 epoch (loss 2.9648): 24%|βββ | 6/25 [00:22<00:55, 2.93s/it]
Training 1/1 epoch (loss 2.9910): 24%|βββ | 6/25 [00:26<00:55, 2.93s/it]
Training 1/1 epoch (loss 2.9910): 28%|βββ | 7/25 [00:26<00:55, 3.06s/it]
Training 1/1 epoch (loss 3.0454): 28%|βββ | 7/25 [00:29<00:55, 3.06s/it]
Training 1/1 epoch (loss 3.0454): 32%|ββββ | 8/25 [00:29<00:54, 3.20s/it]
Training 1/1 epoch (loss 2.8994): 32%|ββββ | 8/25 [00:31<00:54, 3.20s/it]
Training 1/1 epoch (loss 2.8994): 36%|ββββ | 9/25 [00:31<00:46, 2.89s/it]
Training 1/1 epoch (loss 2.8202): 36%|ββββ | 9/25 [00:33<00:46, 2.89s/it]
Training 1/1 epoch (loss 2.8202): 40%|ββββ | 10/25 [00:33<00:39, 2.63s/it]
Training 1/1 epoch (loss 2.7364): 40%|ββββ | 10/25 [00:35<00:39, 2.63s/it]
Training 1/1 epoch (loss 2.7364): 44%|βββββ | 11/25 [00:35<00:31, 2.23s/it]
Training 1/1 epoch (loss 2.8304): 44%|βββββ | 11/25 [00:37<00:31, 2.23s/it]
Training 1/1 epoch (loss 2.8304): 48%|βββββ | 12/25 [00:37<00:29, 2.26s/it]
Training 1/1 epoch (loss 3.0367): 48%|βββββ | 12/25 [00:40<00:29, 2.26s/it]
Training 1/1 epoch (loss 3.0367): 52%|ββββββ | 13/25 [00:40<00:31, 2.58s/it]
Training 1/1 epoch (loss 3.0155): 52%|ββββββ | 13/25 [00:45<00:31, 2.58s/it]
Training 1/1 epoch (loss 3.0155): 56%|ββββββ | 14/25 [00:45<00:34, 3.10s/it]
Training 1/1 epoch (loss 2.9897): 56%|ββββββ | 14/25 [00:47<00:34, 3.10s/it]
Training 1/1 epoch (loss 2.9897): 60%|ββββββ | 15/25 [00:47<00:27, 2.74s/it]
Training 1/1 epoch (loss 2.9517): 60%|ββββββ | 15/25 [00:50<00:27, 2.74s/it]
Training 1/1 epoch (loss 2.9517): 64%|βββββββ | 16/25 [00:50<00:26, 2.95s/it]
Training 1/1 epoch (loss 2.6390): 64%|βββββββ | 16/25 [00:52<00:26, 2.95s/it]
Training 1/1 epoch (loss 2.6390): 68%|βββββββ | 17/25 [00:52<00:22, 2.76s/it]
Training 1/1 epoch (loss 2.8979): 68%|βββββββ | 17/25 [00:55<00:22, 2.76s/it]
Training 1/1 epoch (loss 2.8979): 72%|ββββββββ | 18/25 [00:55<00:19, 2.77s/it]
Training 1/1 epoch (loss 2.7418): 72%|ββββββββ | 18/25 [00:57<00:19, 2.77s/it]
Training 1/1 epoch (loss 2.7418): 76%|ββββββββ | 19/25 [00:57<00:14, 2.42s/it]
Training 1/1 epoch (loss 2.6736): 76%|ββββββββ | 19/25 [00:58<00:14, 2.42s/it]
Training 1/1 epoch (loss 2.6736): 80%|ββββββββ | 20/25 [00:58<00:10, 2.02s/it]
Training 1/1 epoch (loss 2.8481): 80%|ββββββββ | 20/25 [01:00<00:10, 2.02s/it]
Training 1/1 epoch (loss 2.8481): 84%|βββββββββ | 21/25 [01:00<00:08, 2.20s/it]
Training 1/1 epoch (loss 2.6856): 84%|βββββββββ | 21/25 [01:03<00:08, 2.20s/it]
Training 1/1 epoch (loss 2.6856): 88%|βββββββββ | 22/25 [01:03<00:06, 2.30s/it]
Training 1/1 epoch (loss 2.7375): 88%|βββββββββ | 22/25 [01:05<00:06, 2.30s/it]
Training 1/1 epoch (loss 2.7375): 92%|ββββββββββ| 23/25 [01:05<00:04, 2.09s/it]
Training 1/1 epoch (loss 2.7816): 92%|ββββββββββ| 23/25 [01:08<00:04, 2.09s/it]
Training 1/1 epoch (loss 2.7816): 96%|ββββββββββ| 24/25 [01:08<00:02, 2.35s/it]
Training 1/1 epoch (loss 2.7906): 96%|ββββββββββ| 24/25 [01:09<00:02, 2.35s/it]
Training 1/1 epoch (loss 2.7906): 100%|ββββββββββ| 25/25 [01:09<00:00, 2.21s/it]
Training 1/1 epoch (loss 2.7906): 100%|ββββββββββ| 25/25 [01:09<00:00, 2.80s/it] |
| | chat template saved in /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000-Q2-200/chat_template.jinja |
| | tokenizer config file saved in /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000-Q2-200/tokenizer_config.json |
| | Special tokens file saved in /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-10000-Q2-200/special_tokens_map.json |
| | wandb: ERROR Problem finishing run |
| | Exception ignored in atexit callback: <bound method rank_zero_only.<locals>.wrapper of <safe_rlhf.logger.Logger object at 0x1550e58da650>> |
| | Traceback (most recent call last): |
| | File "/home/hansirui_1st/jiayi/resist/setting3/safe_rlhf/utils.py", line 212, in wrapper |
| | return func(*args, **kwargs) |
| | ^^^^^^^^^^^^^^^^^^^^^ |
| | File "/home/hansirui_1st/jiayi/resist/setting3/safe_rlhf/logger.py", line 183, in close |
| | self.wandb.finish() |
| | File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 406, in wrapper |
| | return func(self, *args, **kwargs) |
| | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 503, in wrapper |
| | return func(self, *args, **kwargs) |
| | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 451, in wrapper |
| | return func(self, *args, **kwargs) |
| | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2309, in finish |
| | return self._finish(exit_code) |
| | ^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 406, in wrapper |
| | return func(self, *args, **kwargs) |
| | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2337, in _finish |
| | self._atexit_cleanup(exit_code=exit_code) |
| | File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2550, in _atexit_cleanup |
| | self._on_finish() |
| | File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2806, in _on_finish |
| | wait_with_progress( |
| | File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/mailbox/wait_with_progress.py", line 24, in wait_with_progress |
| | return wait_all_with_progress( |
| | ^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/mailbox/wait_with_progress.py", line 87, in wait_all_with_progress |
| | return asyncio_compat.run(progress_loop_with_timeout) |
| | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/lib/asyncio_compat.py", line 27, in run |
| | future = executor.submit(runner.run, fn) |
| | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/concurrent/futures/thread.py", line 169, in submit |
| | raise RuntimeError( |
| | RuntimeError: cannot schedule new futures after interpreter shutdown |
| |
|