{ "best_global_step": 275, "best_metric": 0.9421520233154297, "best_model_checkpoint": "./dspy-8bit-lora-deepcoder-safe/checkpoint-275", "epoch": 2.9982300884955753, "eval_steps": 25, "global_step": 425, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17699115044247787, "grad_norm": 0.6848811507225037, "learning_rate": 4.8e-05, "loss": 1.9452, "mean_token_accuracy": 0.5935992772877217, "num_tokens": 53941.0, "step": 25 }, { "epoch": 0.17699115044247787, "eval_loss": 1.9623712301254272, "eval_mean_token_accuracy": 0.6255391545593738, "eval_num_tokens": 53941.0, "eval_runtime": 96.8734, "eval_samples_per_second": 2.065, "eval_steps_per_second": 2.065, "step": 25 }, { "epoch": 0.35398230088495575, "grad_norm": 5.549977779388428, "learning_rate": 9.8e-05, "loss": 1.4238, "mean_token_accuracy": 0.6662441535294056, "num_tokens": 110474.0, "step": 50 }, { "epoch": 0.35398230088495575, "eval_loss": 1.394215703010559, "eval_mean_token_accuracy": 0.6895178978145122, "eval_num_tokens": 110474.0, "eval_runtime": 96.6623, "eval_samples_per_second": 2.069, "eval_steps_per_second": 2.069, "step": 50 }, { "epoch": 0.5309734513274337, "grad_norm": 0.6720989942550659, "learning_rate": 0.000148, "loss": 1.198, "mean_token_accuracy": 0.7074533948302268, "num_tokens": 162407.0, "step": 75 }, { "epoch": 0.5309734513274337, "eval_loss": 1.203566074371338, "eval_mean_token_accuracy": 0.7197688557207584, "eval_num_tokens": 162407.0, "eval_runtime": 97.3608, "eval_samples_per_second": 2.054, "eval_steps_per_second": 2.054, "step": 75 }, { "epoch": 0.7079646017699115, "grad_norm": 0.5576093792915344, "learning_rate": 0.00019800000000000002, "loss": 1.0969, "mean_token_accuracy": 0.7274327605962754, "num_tokens": 225140.0, "step": 100 }, { "epoch": 0.7079646017699115, "eval_loss": 1.1339093446731567, "eval_mean_token_accuracy": 0.7314411370456219, "eval_num_tokens": 225140.0, "eval_runtime": 98.1154, "eval_samples_per_second": 2.038, "eval_steps_per_second": 2.038, "step": 100 }, { "epoch": 0.8849557522123894, "grad_norm": 0.5583345293998718, "learning_rate": 0.0001852760736196319, "loss": 1.0369, "mean_token_accuracy": 0.7487157288193703, "num_tokens": 285818.0, "step": 125 }, { "epoch": 0.8849557522123894, "eval_loss": 1.0866804122924805, "eval_mean_token_accuracy": 0.739626374989748, "eval_num_tokens": 285818.0, "eval_runtime": 97.5506, "eval_samples_per_second": 2.05, "eval_steps_per_second": 2.05, "step": 125 }, { "epoch": 1.056637168141593, "grad_norm": 1.0887213945388794, "learning_rate": 0.00016993865030674846, "loss": 0.9094, "mean_token_accuracy": 0.767275901492109, "num_tokens": 338145.0, "step": 150 }, { "epoch": 1.056637168141593, "eval_loss": 1.091628074645996, "eval_mean_token_accuracy": 0.7472012284398079, "eval_num_tokens": 338145.0, "eval_runtime": 97.9463, "eval_samples_per_second": 2.042, "eval_steps_per_second": 2.042, "step": 150 }, { "epoch": 1.2336283185840708, "grad_norm": 1.2973228693008423, "learning_rate": 0.00015460122699386504, "loss": 0.7983, "mean_token_accuracy": 0.786946276128292, "num_tokens": 395324.0, "step": 175 }, { "epoch": 1.2336283185840708, "eval_loss": 1.0309680700302124, "eval_mean_token_accuracy": 0.7567924955487251, "eval_num_tokens": 395324.0, "eval_runtime": 97.3557, "eval_samples_per_second": 2.054, "eval_steps_per_second": 2.054, "step": 175 }, { "epoch": 1.4106194690265488, "grad_norm": 0.8981689810752869, "learning_rate": 0.00013926380368098159, "loss": 0.8004, "mean_token_accuracy": 0.7892016369104385, "num_tokens": 453392.0, "step": 200 }, { "epoch": 1.4106194690265488, "eval_loss": 1.0047160387039185, "eval_mean_token_accuracy": 0.7607714273035526, "eval_num_tokens": 453392.0, "eval_runtime": 97.3422, "eval_samples_per_second": 2.055, "eval_steps_per_second": 2.055, "step": 200 }, { "epoch": 1.5876106194690265, "grad_norm": 1.3156253099441528, "learning_rate": 0.00012392638036809816, "loss": 0.7496, "mean_token_accuracy": 0.800215690433979, "num_tokens": 513773.0, "step": 225 }, { "epoch": 1.5876106194690265, "eval_loss": 0.9816662073135376, "eval_mean_token_accuracy": 0.7670722763240337, "eval_num_tokens": 513773.0, "eval_runtime": 98.5747, "eval_samples_per_second": 2.029, "eval_steps_per_second": 2.029, "step": 225 }, { "epoch": 1.7646017699115044, "grad_norm": 0.7007508277893066, "learning_rate": 0.00010858895705521473, "loss": 0.7315, "mean_token_accuracy": 0.805658475458622, "num_tokens": 571382.0, "step": 250 }, { "epoch": 1.7646017699115044, "eval_loss": 0.95888352394104, "eval_mean_token_accuracy": 0.7721423482894898, "eval_num_tokens": 571382.0, "eval_runtime": 97.9779, "eval_samples_per_second": 2.041, "eval_steps_per_second": 2.041, "step": 250 }, { "epoch": 1.9415929203539823, "grad_norm": 0.8127393126487732, "learning_rate": 9.325153374233129e-05, "loss": 0.7173, "mean_token_accuracy": 0.8105942443013191, "num_tokens": 628449.0, "step": 275 }, { "epoch": 1.9415929203539823, "eval_loss": 0.9421520233154297, "eval_mean_token_accuracy": 0.7754264496266842, "eval_num_tokens": 628449.0, "eval_runtime": 97.1509, "eval_samples_per_second": 2.059, "eval_steps_per_second": 2.059, "step": 275 }, { "epoch": 2.113274336283186, "grad_norm": 1.1352859735488892, "learning_rate": 7.791411042944787e-05, "loss": 0.5105, "mean_token_accuracy": 0.8593325974400511, "num_tokens": 680687.0, "step": 300 }, { "epoch": 2.113274336283186, "eval_loss": 0.9843020439147949, "eval_mean_token_accuracy": 0.7742766647040844, "eval_num_tokens": 680687.0, "eval_runtime": 98.2979, "eval_samples_per_second": 2.035, "eval_steps_per_second": 2.035, "step": 300 }, { "epoch": 2.2902654867256635, "grad_norm": 0.8646391034126282, "learning_rate": 6.257668711656443e-05, "loss": 0.4312, "mean_token_accuracy": 0.878243299126625, "num_tokens": 733979.0, "step": 325 }, { "epoch": 2.2902654867256635, "eval_loss": 0.993569552898407, "eval_mean_token_accuracy": 0.775199833214283, "eval_num_tokens": 733979.0, "eval_runtime": 96.9581, "eval_samples_per_second": 2.063, "eval_steps_per_second": 2.063, "step": 325 }, { "epoch": 2.4672566371681417, "grad_norm": 1.0304445028305054, "learning_rate": 4.723926380368098e-05, "loss": 0.3952, "mean_token_accuracy": 0.8835302656888961, "num_tokens": 789649.0, "step": 350 }, { "epoch": 2.4672566371681417, "eval_loss": 0.9702684283256531, "eval_mean_token_accuracy": 0.77936582878232, "eval_num_tokens": 789649.0, "eval_runtime": 98.4219, "eval_samples_per_second": 2.032, "eval_steps_per_second": 2.032, "step": 350 }, { "epoch": 2.6442477876106194, "grad_norm": 0.8441785573959351, "learning_rate": 3.1901840490797544e-05, "loss": 0.3966, "mean_token_accuracy": 0.8844721549749375, "num_tokens": 845295.0, "step": 375 }, { "epoch": 2.6442477876106194, "eval_loss": 0.9620086550712585, "eval_mean_token_accuracy": 0.7812778241932392, "eval_num_tokens": 845295.0, "eval_runtime": 97.2727, "eval_samples_per_second": 2.056, "eval_steps_per_second": 2.056, "step": 375 }, { "epoch": 2.8212389380530976, "grad_norm": 0.8250017762184143, "learning_rate": 1.656441717791411e-05, "loss": 0.3743, "mean_token_accuracy": 0.8831119546294213, "num_tokens": 908151.0, "step": 400 }, { "epoch": 2.8212389380530976, "eval_loss": 0.9527075290679932, "eval_mean_token_accuracy": 0.7827157293260097, "eval_num_tokens": 908151.0, "eval_runtime": 98.1465, "eval_samples_per_second": 2.038, "eval_steps_per_second": 2.038, "step": 400 }, { "epoch": 2.9982300884955753, "grad_norm": 0.8464552164077759, "learning_rate": 1.226993865030675e-06, "loss": 0.4002, "mean_token_accuracy": 0.883520859181881, "num_tokens": 967441.0, "step": 425 }, { "epoch": 2.9982300884955753, "eval_loss": 0.9509771466255188, "eval_mean_token_accuracy": 0.7838174617290496, "eval_num_tokens": 967441.0, "eval_runtime": 98.6966, "eval_samples_per_second": 2.026, "eval_steps_per_second": 2.026, "step": 425 } ], "logging_steps": 25, "max_steps": 426, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.204357166595891e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }