seed: 0 batch_size: 32 eval_batch_size: 32 debug: false fsdp_port: 41969 datasets: - pku-safety base_data_dir: datasets/ reverse_dataset: true wandb: enabled: true entity: null project: tis-dpo output_dir: output sample_during_eval: false n_eval_model_samples: 16 do_first_eval: true local_run_dir: output/dpo_Qwen2.5-3B-sft_pku-safety_reverse_08-05_19-10 exp_name: dpo_Qwen2.5-3B-sft_pku-safety_reverse lr: 5.0e-07 gradient_accumulation_steps: 2 max_grad_norm: 10.0 max_length: 512 max_prompt_length: 256 n_epochs: 1 n_examples: null n_eval_examples: 256 trainer: FSDPTrainer optimizer: RMSprop warmup_steps: 150 activation_checkpointing: false eval_every: 100000 minimum_log_interval_secs: 1.0 model: name_or_path: /mnt/cephfs_nj/aiweiliu/TIS-DPO/models/Qwen2.5-3B-sft tokenizer_name_or_path: null archive: null block_name: Qwen2DecoderLayer policy_dtype: float32 fsdp_policy_mp: null reference_dtype: float16 loss: name: dpo beta: 0.1 label_smoothing: 0 reference_free: false token_level: false transform: method: binary binary: top_percent: 100 threshold: upper_threshold: 1.0 lower_threshold: -1.0 threshold_and_scale: min_val: -1.5 max_val: 1.5 min_scale: 0.7 max_scale: 1.3 random: min_val: 0.7 max_val: 1.3 rank_based: min_scale: 0.7 max_scale: 1.3