{ "best_metric": null, "best_model_checkpoint": null, "epoch": 59.57446808510638, "eval_steps": 200, "global_step": 12600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.95, "learning_rate": 3.9200000000000004e-05, "loss": 17.3127, "step": 200 }, { "epoch": 0.95, "eval_loss": 5.568685531616211, "eval_runtime": 103.0983, "eval_samples_per_second": 28.584, "eval_steps_per_second": 3.579, "eval_wer": 1.0, "step": 200 }, { "epoch": 1.89, "learning_rate": 7.920000000000001e-05, "loss": 3.8015, "step": 400 }, { "epoch": 1.89, "eval_loss": 3.4598944187164307, "eval_runtime": 97.324, "eval_samples_per_second": 30.28, "eval_steps_per_second": 3.791, "eval_wer": 1.0, "step": 400 }, { "epoch": 2.84, "learning_rate": 9.921052631578947e-05, "loss": 3.4087, "step": 600 }, { "epoch": 2.84, "eval_loss": 3.372584581375122, "eval_runtime": 97.3453, "eval_samples_per_second": 30.274, "eval_steps_per_second": 3.791, "eval_wer": 1.0, "step": 600 }, { "epoch": 3.78, "learning_rate": 9.756578947368421e-05, "loss": 3.0965, "step": 800 }, { "epoch": 3.78, "eval_loss": 1.8918038606643677, "eval_runtime": 97.5597, "eval_samples_per_second": 30.207, "eval_steps_per_second": 3.782, "eval_wer": 1.0220335473182853, "step": 800 }, { "epoch": 4.73, "learning_rate": 9.592105263157895e-05, "loss": 1.268, "step": 1000 }, { "epoch": 4.73, "eval_loss": 0.871788501739502, "eval_runtime": 97.371, "eval_samples_per_second": 30.266, "eval_steps_per_second": 3.79, "eval_wer": 0.7296748809277283, "step": 1000 }, { "epoch": 5.67, "learning_rate": 9.427631578947369e-05, "loss": 0.7909, "step": 1200 }, { "epoch": 5.67, "eval_loss": 0.7251957654953003, "eval_runtime": 97.8397, "eval_samples_per_second": 30.121, "eval_steps_per_second": 3.771, "eval_wer": 0.6389728722302754, "step": 1200 }, { "epoch": 6.62, "learning_rate": 9.263157894736843e-05, "loss": 0.6465, "step": 1400 }, { "epoch": 6.62, "eval_loss": 0.6565062999725342, "eval_runtime": 98.4682, "eval_samples_per_second": 29.928, "eval_steps_per_second": 3.747, "eval_wer": 0.5930834541312902, "step": 1400 }, { "epoch": 7.57, "learning_rate": 9.098684210526317e-05, "loss": 0.5483, "step": 1600 }, { "epoch": 7.57, "eval_loss": 0.6099365949630737, "eval_runtime": 97.9877, "eval_samples_per_second": 30.075, "eval_steps_per_second": 3.766, "eval_wer": 0.5626009525781736, "step": 1600 }, { "epoch": 8.51, "learning_rate": 8.934210526315789e-05, "loss": 0.4824, "step": 1800 }, { "epoch": 8.51, "eval_loss": 0.5920658111572266, "eval_runtime": 98.6224, "eval_samples_per_second": 29.882, "eval_steps_per_second": 3.742, "eval_wer": 0.5455373783392007, "step": 1800 }, { "epoch": 9.46, "learning_rate": 8.769736842105264e-05, "loss": 0.4449, "step": 2000 }, { "epoch": 9.46, "eval_loss": 0.5740370750427246, "eval_runtime": 98.609, "eval_samples_per_second": 29.886, "eval_steps_per_second": 3.742, "eval_wer": 0.5382480844895423, "step": 2000 }, { "epoch": 10.4, "learning_rate": 8.605263157894738e-05, "loss": 0.4106, "step": 2200 }, { "epoch": 10.4, "eval_loss": 0.5488039255142212, "eval_runtime": 99.1998, "eval_samples_per_second": 29.708, "eval_steps_per_second": 3.72, "eval_wer": 0.52238558707807, "step": 2200 }, { "epoch": 11.35, "learning_rate": 8.44078947368421e-05, "loss": 0.3835, "step": 2400 }, { "epoch": 11.35, "eval_loss": 0.5332208275794983, "eval_runtime": 98.9207, "eval_samples_per_second": 29.792, "eval_steps_per_second": 3.73, "eval_wer": 0.5073514185131497, "step": 2400 }, { "epoch": 12.29, "learning_rate": 8.276315789473684e-05, "loss": 0.355, "step": 2600 }, { "epoch": 12.29, "eval_loss": 0.5386897325515747, "eval_runtime": 99.0067, "eval_samples_per_second": 29.766, "eval_steps_per_second": 3.727, "eval_wer": 0.5013046179333195, "step": 2600 }, { "epoch": 13.24, "learning_rate": 8.111842105263158e-05, "loss": 0.3364, "step": 2800 }, { "epoch": 13.24, "eval_loss": 0.553865373134613, "eval_runtime": 98.276, "eval_samples_per_second": 29.987, "eval_steps_per_second": 3.755, "eval_wer": 0.49662455995030025, "step": 2800 }, { "epoch": 14.18, "learning_rate": 7.947368421052632e-05, "loss": 0.3267, "step": 3000 }, { "epoch": 14.18, "eval_loss": 0.5239475965499878, "eval_runtime": 98.3443, "eval_samples_per_second": 29.966, "eval_steps_per_second": 3.752, "eval_wer": 0.49148892110167736, "step": 3000 }, { "epoch": 15.13, "learning_rate": 7.782894736842106e-05, "loss": 0.312, "step": 3200 }, { "epoch": 15.13, "eval_loss": 0.5197941064834595, "eval_runtime": 99.0276, "eval_samples_per_second": 29.759, "eval_steps_per_second": 3.726, "eval_wer": 0.4830813833091737, "step": 3200 }, { "epoch": 16.08, "learning_rate": 7.61842105263158e-05, "loss": 0.2835, "step": 3400 }, { "epoch": 16.08, "eval_loss": 0.5393648147583008, "eval_runtime": 99.0059, "eval_samples_per_second": 29.766, "eval_steps_per_second": 3.727, "eval_wer": 0.4880099399461586, "step": 3400 }, { "epoch": 17.02, "learning_rate": 7.453947368421052e-05, "loss": 0.283, "step": 3600 }, { "epoch": 17.02, "eval_loss": 0.5069576501846313, "eval_runtime": 98.5638, "eval_samples_per_second": 29.899, "eval_steps_per_second": 3.744, "eval_wer": 0.4773244978256368, "step": 3600 }, { "epoch": 17.97, "learning_rate": 7.289473684210527e-05, "loss": 0.2667, "step": 3800 }, { "epoch": 17.97, "eval_loss": 0.5272166728973389, "eval_runtime": 98.5512, "eval_samples_per_second": 29.903, "eval_steps_per_second": 3.744, "eval_wer": 0.47583350590184303, "step": 3800 }, { "epoch": 18.91, "learning_rate": 7.125000000000001e-05, "loss": 0.2581, "step": 4000 }, { "epoch": 18.91, "eval_loss": 0.5214602947235107, "eval_runtime": 98.4833, "eval_samples_per_second": 29.924, "eval_steps_per_second": 3.747, "eval_wer": 0.4733485193621868, "step": 4000 }, { "epoch": 19.86, "learning_rate": 6.960526315789474e-05, "loss": 0.249, "step": 4200 }, { "epoch": 19.86, "eval_loss": 0.5193012952804565, "eval_runtime": 98.3698, "eval_samples_per_second": 29.958, "eval_steps_per_second": 3.751, "eval_wer": 0.4731828535928764, "step": 4200 }, { "epoch": 20.8, "learning_rate": 6.796052631578947e-05, "loss": 0.2423, "step": 4400 }, { "epoch": 20.8, "eval_loss": 0.5255588889122009, "eval_runtime": 98.8258, "eval_samples_per_second": 29.82, "eval_steps_per_second": 3.734, "eval_wer": 0.47293435493891073, "step": 4400 }, { "epoch": 21.75, "learning_rate": 6.631578947368421e-05, "loss": 0.2337, "step": 4600 }, { "epoch": 21.75, "eval_loss": 0.5264955163002014, "eval_runtime": 98.5778, "eval_samples_per_second": 29.895, "eval_steps_per_second": 3.743, "eval_wer": 0.4662248912818389, "step": 4600 }, { "epoch": 22.7, "learning_rate": 6.467105263157895e-05, "loss": 0.2176, "step": 4800 }, { "epoch": 22.7, "eval_loss": 0.5588347315788269, "eval_runtime": 99.5679, "eval_samples_per_second": 29.598, "eval_steps_per_second": 3.706, "eval_wer": 0.4658521433008905, "step": 4800 }, { "epoch": 23.64, "learning_rate": 6.302631578947369e-05, "loss": 0.2184, "step": 5000 }, { "epoch": 23.64, "eval_loss": 0.5281316041946411, "eval_runtime": 98.9467, "eval_samples_per_second": 29.784, "eval_steps_per_second": 3.729, "eval_wer": 0.4603023400289915, "step": 5000 }, { "epoch": 24.59, "learning_rate": 6.138157894736843e-05, "loss": 0.2093, "step": 5200 }, { "epoch": 24.59, "eval_loss": 0.5339261293411255, "eval_runtime": 99.0129, "eval_samples_per_second": 29.764, "eval_steps_per_second": 3.727, "eval_wer": 0.45889418098985296, "step": 5200 }, { "epoch": 25.53, "learning_rate": 5.973684210526316e-05, "loss": 0.2022, "step": 5400 }, { "epoch": 25.53, "eval_loss": 0.5497910976409912, "eval_runtime": 98.8772, "eval_samples_per_second": 29.805, "eval_steps_per_second": 3.732, "eval_wer": 0.46071650445226753, "step": 5400 }, { "epoch": 26.48, "learning_rate": 5.809210526315789e-05, "loss": 0.1961, "step": 5600 }, { "epoch": 26.48, "eval_loss": 0.5337634682655334, "eval_runtime": 99.299, "eval_samples_per_second": 29.678, "eval_steps_per_second": 3.716, "eval_wer": 0.45553944916131706, "step": 5600 }, { "epoch": 27.42, "learning_rate": 5.6447368421052634e-05, "loss": 0.1919, "step": 5800 }, { "epoch": 27.42, "eval_loss": 0.5612215399742126, "eval_runtime": 100.2927, "eval_samples_per_second": 29.384, "eval_steps_per_second": 3.679, "eval_wer": 0.4570718575274384, "step": 5800 }, { "epoch": 28.37, "learning_rate": 5.480263157894737e-05, "loss": 0.183, "step": 6000 }, { "epoch": 28.37, "eval_loss": 0.5516127347946167, "eval_runtime": 98.4866, "eval_samples_per_second": 29.923, "eval_steps_per_second": 3.747, "eval_wer": 0.4569890246427832, "step": 6000 }, { "epoch": 29.31, "learning_rate": 5.3157894736842104e-05, "loss": 0.1799, "step": 6200 }, { "epoch": 29.31, "eval_loss": 0.5653464198112488, "eval_runtime": 98.9685, "eval_samples_per_second": 29.777, "eval_steps_per_second": 3.728, "eval_wer": 0.4539656243528681, "step": 6200 }, { "epoch": 30.26, "learning_rate": 5.151315789473684e-05, "loss": 0.1724, "step": 6400 }, { "epoch": 30.26, "eval_loss": 0.5827488303184509, "eval_runtime": 99.0205, "eval_samples_per_second": 29.762, "eval_steps_per_second": 3.727, "eval_wer": 0.4514392213708842, "step": 6400 }, { "epoch": 31.21, "learning_rate": 4.986842105263158e-05, "loss": 0.1736, "step": 6600 }, { "epoch": 31.21, "eval_loss": 0.5708160400390625, "eval_runtime": 98.3383, "eval_samples_per_second": 29.968, "eval_steps_per_second": 3.752, "eval_wer": 0.4497825636777801, "step": 6600 }, { "epoch": 32.15, "learning_rate": 4.822368421052631e-05, "loss": 0.1662, "step": 6800 }, { "epoch": 32.15, "eval_loss": 0.5754191279411316, "eval_runtime": 98.1735, "eval_samples_per_second": 30.018, "eval_steps_per_second": 3.759, "eval_wer": 0.4442327604058811, "step": 6800 }, { "epoch": 33.1, "learning_rate": 4.657894736842106e-05, "loss": 0.1666, "step": 7000 }, { "epoch": 33.1, "eval_loss": 0.5671477913856506, "eval_runtime": 98.673, "eval_samples_per_second": 29.866, "eval_steps_per_second": 3.74, "eval_wer": 0.44460550838682955, "step": 7000 }, { "epoch": 34.04, "learning_rate": 4.493421052631579e-05, "loss": 0.1586, "step": 7200 }, { "epoch": 34.04, "eval_loss": 0.5737903714179993, "eval_runtime": 98.3986, "eval_samples_per_second": 29.95, "eval_steps_per_second": 3.75, "eval_wer": 0.44642783184924417, "step": 7200 }, { "epoch": 34.99, "learning_rate": 4.328947368421053e-05, "loss": 0.154, "step": 7400 }, { "epoch": 34.99, "eval_loss": 0.5759162902832031, "eval_runtime": 99.171, "eval_samples_per_second": 29.716, "eval_steps_per_second": 3.721, "eval_wer": 0.4414992752122593, "step": 7400 }, { "epoch": 35.93, "learning_rate": 4.1644736842105265e-05, "loss": 0.1515, "step": 7600 }, { "epoch": 35.93, "eval_loss": 0.5758457779884338, "eval_runtime": 98.2053, "eval_samples_per_second": 30.009, "eval_steps_per_second": 3.757, "eval_wer": 0.445185338579416, "step": 7600 }, { "epoch": 36.88, "learning_rate": 4e-05, "loss": 0.1552, "step": 7800 }, { "epoch": 36.88, "eval_loss": 0.5782187581062317, "eval_runtime": 98.8215, "eval_samples_per_second": 29.821, "eval_steps_per_second": 3.734, "eval_wer": 0.44249326982812176, "step": 7800 }, { "epoch": 37.83, "learning_rate": 3.835526315789474e-05, "loss": 0.1471, "step": 8000 }, { "epoch": 37.83, "eval_loss": 0.5815852880477905, "eval_runtime": 98.7527, "eval_samples_per_second": 29.842, "eval_steps_per_second": 3.737, "eval_wer": 0.44187202319320773, "step": 8000 }, { "epoch": 38.77, "learning_rate": 3.671052631578947e-05, "loss": 0.1413, "step": 8200 }, { "epoch": 38.77, "eval_loss": 0.5855460166931152, "eval_runtime": 98.45, "eval_samples_per_second": 29.934, "eval_steps_per_second": 3.748, "eval_wer": 0.4429074342513978, "step": 8200 }, { "epoch": 39.72, "learning_rate": 3.506578947368421e-05, "loss": 0.1443, "step": 8400 }, { "epoch": 39.72, "eval_loss": 0.5851101279258728, "eval_runtime": 98.4835, "eval_samples_per_second": 29.924, "eval_steps_per_second": 3.747, "eval_wer": 0.43731621453717123, "step": 8400 }, { "epoch": 40.66, "learning_rate": 3.342927631578948e-05, "loss": 0.1391, "step": 8600 }, { "epoch": 40.66, "eval_loss": 0.5925698280334473, "eval_runtime": 98.7707, "eval_samples_per_second": 29.837, "eval_steps_per_second": 3.736, "eval_wer": 0.4420376889625181, "step": 8600 }, { "epoch": 41.61, "learning_rate": 3.178453947368421e-05, "loss": 0.1388, "step": 8800 }, { "epoch": 41.61, "eval_loss": 0.5927318334579468, "eval_runtime": 98.7868, "eval_samples_per_second": 29.832, "eval_steps_per_second": 3.735, "eval_wer": 0.43636363636363634, "step": 8800 }, { "epoch": 42.55, "learning_rate": 3.013980263157895e-05, "loss": 0.1419, "step": 9000 }, { "epoch": 42.55, "eval_loss": 0.5933807492256165, "eval_runtime": 98.9246, "eval_samples_per_second": 29.79, "eval_steps_per_second": 3.73, "eval_wer": 0.4367363843445848, "step": 9000 }, { "epoch": 43.5, "learning_rate": 2.8495065789473686e-05, "loss": 0.1349, "step": 9200 }, { "epoch": 43.5, "eval_loss": 0.5802015066146851, "eval_runtime": 98.625, "eval_samples_per_second": 29.881, "eval_steps_per_second": 3.741, "eval_wer": 0.4337958169393249, "step": 9200 }, { "epoch": 44.44, "learning_rate": 2.6850328947368424e-05, "loss": 0.1281, "step": 9400 }, { "epoch": 44.44, "eval_loss": 0.6156647205352783, "eval_runtime": 98.9856, "eval_samples_per_second": 29.772, "eval_steps_per_second": 3.728, "eval_wer": 0.43706771588320564, "step": 9400 }, { "epoch": 45.39, "learning_rate": 2.520559210526316e-05, "loss": 0.1301, "step": 9600 }, { "epoch": 45.39, "eval_loss": 0.6005872488021851, "eval_runtime": 98.6022, "eval_samples_per_second": 29.888, "eval_steps_per_second": 3.742, "eval_wer": 0.43122799751501345, "step": 9600 }, { "epoch": 46.34, "learning_rate": 2.3560855263157897e-05, "loss": 0.127, "step": 9800 }, { "epoch": 46.34, "eval_loss": 0.5937264561653137, "eval_runtime": 98.389, "eval_samples_per_second": 29.953, "eval_steps_per_second": 3.75, "eval_wer": 0.432429074342514, "step": 9800 }, { "epoch": 47.28, "learning_rate": 2.1916118421052635e-05, "loss": 0.134, "step": 10000 }, { "epoch": 47.28, "eval_loss": 0.5867904424667358, "eval_runtime": 98.4962, "eval_samples_per_second": 29.92, "eval_steps_per_second": 3.746, "eval_wer": 0.43416856492027334, "step": 10000 }, { "epoch": 48.23, "learning_rate": 2.0271381578947367e-05, "loss": 0.1277, "step": 10200 }, { "epoch": 48.23, "eval_loss": 0.5865517258644104, "eval_runtime": 98.5691, "eval_samples_per_second": 29.898, "eval_steps_per_second": 3.744, "eval_wer": 0.4337958169393249, "step": 10200 }, { "epoch": 49.17, "learning_rate": 1.8626644736842105e-05, "loss": 0.1229, "step": 10400 }, { "epoch": 49.17, "eval_loss": 0.6024904251098633, "eval_runtime": 99.0412, "eval_samples_per_second": 29.755, "eval_steps_per_second": 3.726, "eval_wer": 0.4347483951128598, "step": 10400 }, { "epoch": 50.12, "learning_rate": 1.6981907894736843e-05, "loss": 0.127, "step": 10600 }, { "epoch": 50.12, "eval_loss": 0.6073756217956543, "eval_runtime": 98.6531, "eval_samples_per_second": 29.872, "eval_steps_per_second": 3.74, "eval_wer": 0.4332574031890661, "step": 10600 }, { "epoch": 51.06, "learning_rate": 1.5337171052631578e-05, "loss": 0.1208, "step": 10800 }, { "epoch": 51.06, "eval_loss": 0.5911235809326172, "eval_runtime": 98.9125, "eval_samples_per_second": 29.794, "eval_steps_per_second": 3.731, "eval_wer": 0.43081383309173743, "step": 10800 }, { "epoch": 52.01, "learning_rate": 1.3692434210526316e-05, "loss": 0.1198, "step": 11000 }, { "epoch": 52.01, "eval_loss": 0.5970575213432312, "eval_runtime": 98.4395, "eval_samples_per_second": 29.937, "eval_steps_per_second": 3.748, "eval_wer": 0.42700352039759787, "step": 11000 }, { "epoch": 52.96, "learning_rate": 1.2047697368421053e-05, "loss": 0.1164, "step": 11200 }, { "epoch": 52.96, "eval_loss": 0.5997304916381836, "eval_runtime": 98.2241, "eval_samples_per_second": 30.003, "eval_steps_per_second": 3.757, "eval_wer": 0.4294885069372541, "step": 11200 }, { "epoch": 53.9, "learning_rate": 1.040296052631579e-05, "loss": 0.1169, "step": 11400 }, { "epoch": 53.9, "eval_loss": 0.5975005626678467, "eval_runtime": 99.0735, "eval_samples_per_second": 29.746, "eval_steps_per_second": 3.725, "eval_wer": 0.4315179126113067, "step": 11400 }, { "epoch": 54.85, "learning_rate": 8.758223684210528e-06, "loss": 0.1153, "step": 11600 }, { "epoch": 54.85, "eval_loss": 0.5941161513328552, "eval_runtime": 98.5335, "eval_samples_per_second": 29.909, "eval_steps_per_second": 3.745, "eval_wer": 0.42870159453302964, "step": 11600 }, { "epoch": 55.79, "learning_rate": 7.113486842105263e-06, "loss": 0.1114, "step": 11800 }, { "epoch": 55.79, "eval_loss": 0.6075084209442139, "eval_runtime": 98.2774, "eval_samples_per_second": 29.987, "eval_steps_per_second": 3.755, "eval_wer": 0.4307310002070822, "step": 11800 }, { "epoch": 56.74, "learning_rate": 5.46875e-06, "loss": 0.122, "step": 12000 }, { "epoch": 56.74, "eval_loss": 0.6036771535873413, "eval_runtime": 98.3003, "eval_samples_per_second": 29.98, "eval_steps_per_second": 3.754, "eval_wer": 0.428453095879064, "step": 12000 }, { "epoch": 57.68, "learning_rate": 3.8240131578947365e-06, "loss": 0.1099, "step": 12200 }, { "epoch": 57.68, "eval_loss": 0.6065577864646912, "eval_runtime": 99.008, "eval_samples_per_second": 29.765, "eval_steps_per_second": 3.727, "eval_wer": 0.42820459722509835, "step": 12200 }, { "epoch": 58.63, "learning_rate": 2.1792763157894734e-06, "loss": 0.1151, "step": 12400 }, { "epoch": 58.63, "eval_loss": 0.6106641292572021, "eval_runtime": 98.7395, "eval_samples_per_second": 29.846, "eval_steps_per_second": 3.737, "eval_wer": 0.429074342513978, "step": 12400 }, { "epoch": 59.57, "learning_rate": 5.345394736842105e-07, "loss": 0.1091, "step": 12600 }, { "epoch": 59.57, "eval_loss": 0.6096903085708618, "eval_runtime": 99.3597, "eval_samples_per_second": 29.66, "eval_steps_per_second": 3.714, "eval_wer": 0.42799751501346034, "step": 12600 } ], "logging_steps": 200, "max_steps": 12660, "num_train_epochs": 60, "save_steps": 200, "total_flos": 5.409308117842062e+19, "trial_name": null, "trial_params": null }