{ "project_name": "azma-hermes-pro-llama-3-8b-030524", "new_model_id": "azma-hermes-pro-llama-3-8b-030524", "base_model_id": "NousResearch/Hermes-2-Pro-Llama-3-8B", "adapter_id": "azma-hermes-pro-llama-3-8b-030524-adapter", "chat_template": "chatml", "lora_config": { "lora_rank": 16, "lora_alpha": 32, "lora_dropout": 0.1, "target_modules": [ "k_proj", "v_proj", "o_proj", "q_proj", "up_proj", "gate_proj", "down_proj" ], "modules_to_save": [ "lm_head" ], "task_type": "CAUSAL_LM" }, "load_config": { "use_flash_attention": true, "load_in_4bit": false, "cache_dir": "./", "torch_dtype": "auto", "device_map": "auto", "pad_to_multiples": false }, "dataset_config": { "dataset_id": [ "Azma-AI/azma-mermaid-dataset-single-turn-chatml", "Azma-AI/azma-dataset-v2-mermaid-without-thoughts-final-chatml-8192-seq-len" ], "max_seq_length": 8192, "shuffle": true, "data_collator": "default", "template_version": 1.0, "dataset_text_field": "text", "add_custom_tokens": { "bos_token": "<|begin_of_text|>", "eos_token": "<|im_end|>", "pad_token": "<|end_of_text|>" } }, "dpo_args": { "beta": 0.6, "loss_types": "sigmoid" }, "training_args": { "seed": 42, "max_steps": -1, "weight_decay": 0.01, "num_train_epochs": 1, "learning_rate": 1e-05, "output_dir": "./results", "optim": "paged_adamw_32bit", "gradient_accumulation_steps": 2, "per_device_train_batch_size": 8, "per_device_eval_batch_size": 2, "group_by_length": false, "resume_from_checkpoint": true, "gradient_checkpointing": true, "gradient_checkpointing_kwargs": null, "do_eval": false, "eval_steps": 100, "evaluation_strategy": "steps", "save_steps": 100, "save_total_limit": 2, "save_strategy": "steps", "logging_steps": 5, "log_level": "info", "logging_strategy": "steps", "push_to_hub": true, "hub_model_id": "Azma-AI/azma-hermes-pro-llama-3-8b-030524-adapter", "hub_private_repo": true, "hub_strategy": "checkpoint", "report_to": "wandb", "warmup_steps": 5, "neftune_noise_alpha": 5, "lr_scheduler_type": "cosine", "auto_find_batch_size": true, "load_best_model_at_end": true, "deepspeed": null, "bf16": true, "fp16": false }, "merge_final_model": true, "push_to_organization": true }