seed: 0
batch_size: 32
eval_batch_size: 32
debug: false
fsdp_port: 41969
datasets:
- pku-safety
base_data_dir: datasets/
reverse_dataset: true
wandb:
  enabled: true
  entity: null
  project: tis-dpo
output_dir: output
sample_during_eval: false
n_eval_model_samples: 16
do_first_eval: true
local_run_dir: output/dpo_Qwen2.5-3B-sft_pku-safety_reverse_08-05_19-10
exp_name: dpo_Qwen2.5-3B-sft_pku-safety_reverse
lr: 5.0e-07
gradient_accumulation_steps: 2
max_grad_norm: 10.0
max_length: 512
max_prompt_length: 256
n_epochs: 1
n_examples: null
n_eval_examples: 256
trainer: FSDPTrainer
optimizer: RMSprop
warmup_steps: 150
activation_checkpointing: false
eval_every: 100000
minimum_log_interval_secs: 1.0
model:
  name_or_path: /mnt/cephfs_nj/aiweiliu/TIS-DPO/models/Qwen2.5-3B-sft
  tokenizer_name_or_path: null
  archive: null
  block_name: Qwen2DecoderLayer
  policy_dtype: float32
  fsdp_policy_mp: null
  reference_dtype: float16
loss:
  name: dpo
  beta: 0.1
  label_smoothing: 0
  reference_free: false
  token_level: false
transform:
  method: binary
  binary:
    top_percent: 100
  threshold:
    upper_threshold: 1.0
    lower_threshold: -1.0
  threshold_and_scale:
    min_val: -1.5
    max_val: 1.5
    min_scale: 0.7
    max_scale: 1.3
  random:
    min_val: 0.7
    max_val: 1.3
  rank_based:
    min_scale: 0.7
    max_scale: 1.3