{ "best_metric": 0.4415583312511444, "best_model_checkpoint": "output_pipe/tf0/origin/checkpoint-200", "epoch": 4.0, "eval_steps": 200, "global_step": 2024, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1976284584980237, "grad_norm": 1.5848366022109985, "learning_rate": 2.9240121580547114e-05, "loss": 0.5377, "step": 100 }, { "epoch": 0.3952569169960474, "grad_norm": 1.709352731704712, "learning_rate": 2.7720364741641338e-05, "loss": 0.4727, "step": 200 }, { "epoch": 0.3952569169960474, "eval_accuracy": 0.796, "eval_f1": 0.7955288985823337, "eval_loss": 0.4415583312511444, "eval_matthews_correlation": 0.591370962853321, "eval_precision": 0.7960841474628472, "eval_recall": 0.7952873521791191, "eval_runtime": 0.2556, "eval_samples_per_second": 3912.889, "eval_steps_per_second": 62.606, "step": 200 }, { "epoch": 0.5928853754940712, "grad_norm": 1.3577654361724854, "learning_rate": 2.6200607902735562e-05, "loss": 0.4454, "step": 300 }, { "epoch": 0.7905138339920948, "grad_norm": 2.5865519046783447, "learning_rate": 2.4680851063829786e-05, "loss": 0.4305, "step": 400 }, { "epoch": 0.7905138339920948, "eval_accuracy": 0.796, "eval_f1": 0.7928143128747623, "eval_loss": 0.4495840072631836, "eval_matthews_correlation": 0.5989374522199814, "eval_precision": 0.8063936013682609, "eval_recall": 0.7927003616180304, "eval_runtime": 0.2543, "eval_samples_per_second": 3932.409, "eval_steps_per_second": 62.919, "step": 400 }, { "epoch": 0.9881422924901185, "grad_norm": 0.9050748944282532, "learning_rate": 2.3161094224924013e-05, "loss": 0.4465, "step": 500 }, { "epoch": 1.1857707509881423, "grad_norm": 1.9691131114959717, "learning_rate": 2.1641337386018237e-05, "loss": 0.3415, "step": 600 }, { "epoch": 1.1857707509881423, "eval_accuracy": 0.796, "eval_f1": 0.7948767450559262, "eval_loss": 0.45666906237602234, "eval_matthews_correlation": 0.5923257505144678, "eval_precision": 0.7979339496091201, "eval_recall": 0.7944023290924309, "eval_runtime": 0.2545, "eval_samples_per_second": 3928.505, "eval_steps_per_second": 62.856, "step": 600 }, { "epoch": 1.383399209486166, "grad_norm": 1.9345086812973022, "learning_rate": 2.012158054711246e-05, "loss": 0.3257, "step": 700 }, { "epoch": 1.5810276679841897, "grad_norm": 4.1219024658203125, "learning_rate": 1.8601823708206688e-05, "loss": 0.3225, "step": 800 }, { "epoch": 1.5810276679841897, "eval_accuracy": 0.803, "eval_f1": 0.8015663166377074, "eval_loss": 0.44956839084625244, "eval_matthews_correlation": 0.6073670430930368, "eval_precision": 0.8063543296817944, "eval_recall": 0.8010359976132408, "eval_runtime": 0.2534, "eval_samples_per_second": 3946.161, "eval_steps_per_second": 63.139, "step": 800 }, { "epoch": 1.7786561264822134, "grad_norm": 2.96213436126709, "learning_rate": 1.7082066869300912e-05, "loss": 0.3249, "step": 900 }, { "epoch": 1.9762845849802373, "grad_norm": 2.0221433639526367, "learning_rate": 1.5577507598784196e-05, "loss": 0.3245, "step": 1000 }, { "epoch": 1.9762845849802373, "eval_accuracy": 0.799, "eval_f1": 0.7988308167168588, "eval_loss": 0.4458507001399994, "eval_matthews_correlation": 0.5977016347708501, "eval_precision": 0.7987640881147541, "eval_recall": 0.7989375718330389, "eval_runtime": 0.254, "eval_samples_per_second": 3937.334, "eval_steps_per_second": 62.997, "step": 1000 }, { "epoch": 2.1739130434782608, "grad_norm": 6.168671131134033, "learning_rate": 1.405775075987842e-05, "loss": 0.1443, "step": 1100 }, { "epoch": 2.3715415019762847, "grad_norm": 4.112609386444092, "learning_rate": 1.2537993920972645e-05, "loss": 0.1103, "step": 1200 }, { "epoch": 2.3715415019762847, "eval_accuracy": 0.78, "eval_f1": 0.778510705987057, "eval_loss": 0.6606701612472534, "eval_matthews_correlation": 0.5606591610125751, "eval_precision": 0.7825653544908961, "eval_recall": 0.7781114968904053, "eval_runtime": 0.2532, "eval_samples_per_second": 3949.431, "eval_steps_per_second": 63.191, "step": 1200 }, { "epoch": 2.5691699604743086, "grad_norm": 2.68495512008667, "learning_rate": 1.101823708206687e-05, "loss": 0.1111, "step": 1300 }, { "epoch": 2.766798418972332, "grad_norm": 7.105761528015137, "learning_rate": 9.498480243161095e-06, "loss": 0.1078, "step": 1400 }, { "epoch": 2.766798418972332, "eval_accuracy": 0.799, "eval_f1": 0.7989660252582687, "eval_loss": 0.6632059812545776, "eval_matthews_correlation": 0.6014707475917296, "eval_precision": 0.801103974307507, "eval_recall": 0.8003672245115353, "eval_runtime": 0.2539, "eval_samples_per_second": 3938.144, "eval_steps_per_second": 63.01, "step": 1400 }, { "epoch": 2.9644268774703555, "grad_norm": 5.618806838989258, "learning_rate": 7.978723404255319e-06, "loss": 0.1095, "step": 1500 }, { "epoch": 3.1620553359683794, "grad_norm": 0.052807554602622986, "learning_rate": 6.458966565349544e-06, "loss": 0.0384, "step": 1600 }, { "epoch": 3.1620553359683794, "eval_accuracy": 0.795, "eval_f1": 0.7947485669945684, "eval_loss": 0.958301842212677, "eval_matthews_correlation": 0.589498726885206, "eval_precision": 0.7947700219484451, "eval_recall": 0.7947287063845806, "eval_runtime": 0.253, "eval_samples_per_second": 3953.143, "eval_steps_per_second": 63.25, "step": 1600 }, { "epoch": 3.3596837944664033, "grad_norm": 0.5183179974555969, "learning_rate": 4.939209726443769e-06, "loss": 0.0242, "step": 1700 }, { "epoch": 3.5573122529644268, "grad_norm": 0.16107724606990814, "learning_rate": 3.419452887537994e-06, "loss": 0.0172, "step": 1800 }, { "epoch": 3.5573122529644268, "eval_accuracy": 0.794, "eval_f1": 0.7937326775501049, "eval_loss": 1.0012874603271484, "eval_matthews_correlation": 0.5874717193339695, "eval_precision": 0.7937782157435336, "eval_recall": 0.79369350969721, "eval_runtime": 0.2538, "eval_samples_per_second": 3940.36, "eval_steps_per_second": 63.046, "step": 1800 }, { "epoch": 3.7549407114624507, "grad_norm": 0.045861050486564636, "learning_rate": 1.899696048632219e-06, "loss": 0.0141, "step": 1900 }, { "epoch": 3.9525691699604746, "grad_norm": 0.3307662308216095, "learning_rate": 3.7993920972644377e-07, "loss": 0.0123, "step": 2000 }, { "epoch": 3.9525691699604746, "eval_accuracy": 0.789, "eval_f1": 0.7885718580124752, "eval_loss": 1.0535693168640137, "eval_matthews_correlation": 0.5773352449673653, "eval_precision": 0.7889541602465331, "eval_recall": 0.788381368862405, "eval_runtime": 0.2532, "eval_samples_per_second": 3948.915, "eval_steps_per_second": 63.183, "step": 2000 }, { "epoch": 4.0, "step": 2024, "total_flos": 8718835559772720.0, "train_loss": 0.2305071633088259, "train_runtime": 175.4879, "train_samples_per_second": 738.011, "train_steps_per_second": 11.534 } ], "logging_steps": 100, "max_steps": 2024, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8718835559772720.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }