{ "best_global_step": 27000, "best_metric": 0.3375175870559268, "best_model_checkpoint": "./wav2vec2-urdu-finetuned\\checkpoint-27000", "epoch": 30.0, "eval_steps": 1000, "global_step": 27480, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.027307482250136537, "grad_norm": 195.3361358642578, "learning_rate": 7.2e-07, "loss": 20.8334, "step": 25 }, { "epoch": 0.054614964500273075, "grad_norm": 250.09060668945312, "learning_rate": 1.4700000000000001e-06, "loss": 21.6915, "step": 50 }, { "epoch": 0.0819224467504096, "grad_norm": NaN, "learning_rate": 2.22e-06, "loss": 13.1546, "step": 75 }, { "epoch": 0.10922992900054615, "grad_norm": 48.306678771972656, "learning_rate": 2.9700000000000004e-06, "loss": 5.846, "step": 100 }, { "epoch": 0.1365374112506827, "grad_norm": 10.920256614685059, "learning_rate": 3.72e-06, "loss": 4.0239, "step": 125 }, { "epoch": 0.1638448935008192, "grad_norm": 22.723594665527344, "learning_rate": 4.4699999999999996e-06, "loss": 3.581, "step": 150 }, { "epoch": 0.19115237575095576, "grad_norm": 5.680387020111084, "learning_rate": 5.22e-06, "loss": 3.3823, "step": 175 }, { "epoch": 0.2184598580010923, "grad_norm": 8.895586967468262, "learning_rate": 5.9700000000000004e-06, "loss": 3.3967, "step": 200 }, { "epoch": 0.24576734025122884, "grad_norm": 6.286562442779541, "learning_rate": 6.72e-06, "loss": 3.2792, "step": 225 }, { "epoch": 0.2730748225013654, "grad_norm": 5.8914690017700195, "learning_rate": 7.4700000000000005e-06, "loss": 3.3544, "step": 250 }, { "epoch": 0.3003823047515019, "grad_norm": 10.516593933105469, "learning_rate": 8.220000000000001e-06, "loss": 3.2069, "step": 275 }, { "epoch": 0.3276897870016384, "grad_norm": 178.02813720703125, "learning_rate": 8.97e-06, "loss": 3.311, "step": 300 }, { "epoch": 0.354997269251775, "grad_norm": 4.254091739654541, "learning_rate": 9.72e-06, "loss": 3.1842, "step": 325 }, { "epoch": 0.3823047515019115, "grad_norm": 8.68007755279541, "learning_rate": 1.047e-05, "loss": 3.2566, "step": 350 }, { "epoch": 0.4096122337520481, "grad_norm": 13.134012222290039, "learning_rate": 1.1220000000000001e-05, "loss": 3.1206, "step": 375 }, { "epoch": 0.4369197160021846, "grad_norm": 6.160197734832764, "learning_rate": 1.197e-05, "loss": 3.219, "step": 400 }, { "epoch": 0.4642271982523211, "grad_norm": 10.527297019958496, "learning_rate": 1.272e-05, "loss": 3.1357, "step": 425 }, { "epoch": 0.4915346805024577, "grad_norm": 11.040935516357422, "learning_rate": 1.3470000000000001e-05, "loss": 3.0775, "step": 450 }, { "epoch": 0.5188421627525942, "grad_norm": 7.630657196044922, "learning_rate": 1.422e-05, "loss": 2.9818, "step": 475 }, { "epoch": 0.5461496450027308, "grad_norm": 8.904446601867676, "learning_rate": 1.497e-05, "loss": 2.9723, "step": 500 }, { "epoch": 0.5734571272528672, "grad_norm": 8.574958801269531, "learning_rate": 1.5720000000000002e-05, "loss": 2.5024, "step": 525 }, { "epoch": 0.6007646095030038, "grad_norm": 16.767169952392578, "learning_rate": 1.6470000000000003e-05, "loss": 2.2885, "step": 550 }, { "epoch": 0.6280720917531404, "grad_norm": 13.703070640563965, "learning_rate": 1.7219999999999998e-05, "loss": 2.1101, "step": 575 }, { "epoch": 0.6553795740032768, "grad_norm": 14.442805290222168, "learning_rate": 1.797e-05, "loss": 1.8642, "step": 600 }, { "epoch": 0.6826870562534134, "grad_norm": 8.755877494812012, "learning_rate": 1.872e-05, "loss": 1.6035, "step": 625 }, { "epoch": 0.70999453850355, "grad_norm": 14.113118171691895, "learning_rate": 1.947e-05, "loss": 1.7019, "step": 650 }, { "epoch": 0.7373020207536866, "grad_norm": 7.293804168701172, "learning_rate": 2.0220000000000003e-05, "loss": 1.4813, "step": 675 }, { "epoch": 0.764609503003823, "grad_norm": 14.101927757263184, "learning_rate": 2.097e-05, "loss": 1.5864, "step": 700 }, { "epoch": 0.7919169852539596, "grad_norm": 6.8870062828063965, "learning_rate": 2.172e-05, "loss": 1.438, "step": 725 }, { "epoch": 0.8192244675040962, "grad_norm": 36.525360107421875, "learning_rate": 2.247e-05, "loss": 1.6142, "step": 750 }, { "epoch": 0.8465319497542326, "grad_norm": 8.31971263885498, "learning_rate": 2.322e-05, "loss": 1.2854, "step": 775 }, { "epoch": 0.8738394320043692, "grad_norm": 14.801036834716797, "learning_rate": 2.3970000000000003e-05, "loss": 1.4956, "step": 800 }, { "epoch": 0.9011469142545058, "grad_norm": 8.631264686584473, "learning_rate": 2.472e-05, "loss": 1.229, "step": 825 }, { "epoch": 0.9284543965046422, "grad_norm": 15.072855949401855, "learning_rate": 2.547e-05, "loss": 1.351, "step": 850 }, { "epoch": 0.9557618787547788, "grad_norm": 9.911062240600586, "learning_rate": 2.622e-05, "loss": 1.3008, "step": 875 }, { "epoch": 0.9830693610049154, "grad_norm": 22.423351287841797, "learning_rate": 2.697e-05, "loss": 1.3854, "step": 900 }, { "epoch": 1.0098306936100492, "grad_norm": 9.4551362991333, "learning_rate": 2.7720000000000002e-05, "loss": 1.2056, "step": 925 }, { "epoch": 1.0371381758601856, "grad_norm": 12.991569519042969, "learning_rate": 2.847e-05, "loss": 1.2196, "step": 950 }, { "epoch": 1.0644456581103223, "grad_norm": 7.331196308135986, "learning_rate": 2.922e-05, "loss": 1.1822, "step": 975 }, { "epoch": 1.0917531403604588, "grad_norm": 13.038766860961914, "learning_rate": 2.997e-05, "loss": 1.1852, "step": 1000 }, { "epoch": 1.0917531403604588, "eval_cer": 0.23647798742138365, "eval_loss": 1.0215742588043213, "eval_model_preparation_time": 0.005, "eval_runtime": 125.3821, "eval_samples_per_second": 40.157, "eval_steps_per_second": 5.025, "eval_wer": 0.6106885332395356, "step": 1000 }, { "epoch": 1.1190606226105952, "grad_norm": 5.318665981292725, "learning_rate": 2.9972809667673717e-05, "loss": 1.2162, "step": 1025 }, { "epoch": 1.146368104860732, "grad_norm": 6.376751899719238, "learning_rate": 2.9944486404833837e-05, "loss": 1.0727, "step": 1050 }, { "epoch": 1.1736755871108684, "grad_norm": 5.344696998596191, "learning_rate": 2.9916163141993956e-05, "loss": 1.2704, "step": 1075 }, { "epoch": 1.2009830693610049, "grad_norm": 8.982000350952148, "learning_rate": 2.9887839879154082e-05, "loss": 0.9739, "step": 1100 }, { "epoch": 1.2282905516111415, "grad_norm": 4.730240345001221, "learning_rate": 2.98595166163142e-05, "loss": 1.1791, "step": 1125 }, { "epoch": 1.255598033861278, "grad_norm": 7.3901047706604, "learning_rate": 2.983119335347432e-05, "loss": 1.1041, "step": 1150 }, { "epoch": 1.2829055161114145, "grad_norm": 6.69800329208374, "learning_rate": 2.980287009063444e-05, "loss": 1.267, "step": 1175 }, { "epoch": 1.3102129983615511, "grad_norm": 7.281691074371338, "learning_rate": 2.9774546827794566e-05, "loss": 1.2228, "step": 1200 }, { "epoch": 1.3375204806116876, "grad_norm": 13.380387306213379, "learning_rate": 2.9746223564954685e-05, "loss": 1.0971, "step": 1225 }, { "epoch": 1.364827962861824, "grad_norm": 15.066123962402344, "learning_rate": 2.9717900302114804e-05, "loss": 1.0664, "step": 1250 }, { "epoch": 1.3921354451119607, "grad_norm": 12.714815139770508, "learning_rate": 2.9689577039274924e-05, "loss": 1.2063, "step": 1275 }, { "epoch": 1.4194429273620972, "grad_norm": 8.586477279663086, "learning_rate": 2.9661253776435043e-05, "loss": 1.0229, "step": 1300 }, { "epoch": 1.4467504096122337, "grad_norm": 6.954416751861572, "learning_rate": 2.963293051359517e-05, "loss": 1.1267, "step": 1325 }, { "epoch": 1.4740578918623704, "grad_norm": 5.479881763458252, "learning_rate": 2.960460725075529e-05, "loss": 0.9725, "step": 1350 }, { "epoch": 1.5013653741125068, "grad_norm": 17.944801330566406, "learning_rate": 2.9576283987915408e-05, "loss": 1.1133, "step": 1375 }, { "epoch": 1.5286728563626433, "grad_norm": 13.495190620422363, "learning_rate": 2.9547960725075527e-05, "loss": 0.8591, "step": 1400 }, { "epoch": 1.55598033861278, "grad_norm": 12.373242378234863, "learning_rate": 2.951963746223565e-05, "loss": 1.0943, "step": 1425 }, { "epoch": 1.5832878208629164, "grad_norm": 10.040867805480957, "learning_rate": 2.9491314199395772e-05, "loss": 0.8578, "step": 1450 }, { "epoch": 1.6105953031130529, "grad_norm": 4.8207173347473145, "learning_rate": 2.946299093655589e-05, "loss": 0.9932, "step": 1475 }, { "epoch": 1.6379027853631896, "grad_norm": 14.453290939331055, "learning_rate": 2.943466767371601e-05, "loss": 1.0443, "step": 1500 }, { "epoch": 1.665210267613326, "grad_norm": 6.432037830352783, "learning_rate": 2.9406344410876134e-05, "loss": 0.9666, "step": 1525 }, { "epoch": 1.6925177498634625, "grad_norm": 10.589091300964355, "learning_rate": 2.9378021148036256e-05, "loss": 0.8905, "step": 1550 }, { "epoch": 1.7198252321135992, "grad_norm": 5.03717565536499, "learning_rate": 2.9349697885196376e-05, "loss": 0.9318, "step": 1575 }, { "epoch": 1.7471327143637356, "grad_norm": 8.919429779052734, "learning_rate": 2.9321374622356495e-05, "loss": 0.8341, "step": 1600 }, { "epoch": 1.774440196613872, "grad_norm": 7.142226696014404, "learning_rate": 2.9293051359516617e-05, "loss": 0.9204, "step": 1625 }, { "epoch": 1.8017476788640088, "grad_norm": 8.039872169494629, "learning_rate": 2.9264728096676737e-05, "loss": 0.8055, "step": 1650 }, { "epoch": 1.8290551611141452, "grad_norm": 5.7004313468933105, "learning_rate": 2.923640483383686e-05, "loss": 0.9553, "step": 1675 }, { "epoch": 1.8563626433642817, "grad_norm": 6.501955986022949, "learning_rate": 2.920808157099698e-05, "loss": 0.9852, "step": 1700 }, { "epoch": 1.8836701256144184, "grad_norm": 7.070892810821533, "learning_rate": 2.91797583081571e-05, "loss": 0.9444, "step": 1725 }, { "epoch": 1.9109776078645548, "grad_norm": 8.434183120727539, "learning_rate": 2.915143504531722e-05, "loss": 0.8924, "step": 1750 }, { "epoch": 1.9382850901146913, "grad_norm": 5.203063011169434, "learning_rate": 2.9123111782477343e-05, "loss": 0.9569, "step": 1775 }, { "epoch": 1.965592572364828, "grad_norm": 10.05788803100586, "learning_rate": 2.9094788519637463e-05, "loss": 0.8783, "step": 1800 }, { "epoch": 1.9929000546149647, "grad_norm": 11.219220161437988, "learning_rate": 2.9066465256797585e-05, "loss": 0.8274, "step": 1825 }, { "epoch": 2.0196613872200984, "grad_norm": 6.037083148956299, "learning_rate": 2.9038141993957705e-05, "loss": 0.7961, "step": 1850 }, { "epoch": 2.046968869470235, "grad_norm": 10.075223922729492, "learning_rate": 2.9009818731117824e-05, "loss": 0.7978, "step": 1875 }, { "epoch": 2.0742763517203713, "grad_norm": 6.325448513031006, "learning_rate": 2.8981495468277947e-05, "loss": 0.754, "step": 1900 }, { "epoch": 2.101583833970508, "grad_norm": 10.192734718322754, "learning_rate": 2.895317220543807e-05, "loss": 0.7656, "step": 1925 }, { "epoch": 2.1288913162206446, "grad_norm": 7.831711769104004, "learning_rate": 2.892484894259819e-05, "loss": 0.8169, "step": 1950 }, { "epoch": 2.156198798470781, "grad_norm": 22.231117248535156, "learning_rate": 2.8896525679758308e-05, "loss": 0.724, "step": 1975 }, { "epoch": 2.1835062807209176, "grad_norm": 6.402003288269043, "learning_rate": 2.886820241691843e-05, "loss": 0.7813, "step": 2000 }, { "epoch": 2.1835062807209176, "eval_cer": 0.18491564340620945, "eval_loss": 0.8069040179252625, "eval_model_preparation_time": 0.005, "eval_runtime": 132.1232, "eval_samples_per_second": 38.108, "eval_steps_per_second": 4.768, "eval_wer": 0.4750923320436159, "step": 2000 }, { "epoch": 2.2108137629710543, "grad_norm": 9.972426414489746, "learning_rate": 2.883987915407855e-05, "loss": 0.7408, "step": 2025 }, { "epoch": 2.2381212452211905, "grad_norm": 6.963690280914307, "learning_rate": 2.8811555891238672e-05, "loss": 0.7983, "step": 2050 }, { "epoch": 2.265428727471327, "grad_norm": 10.607518196105957, "learning_rate": 2.8783232628398792e-05, "loss": 0.7442, "step": 2075 }, { "epoch": 2.292736209721464, "grad_norm": 7.823490142822266, "learning_rate": 2.875490936555891e-05, "loss": 0.7496, "step": 2100 }, { "epoch": 2.3200436919716, "grad_norm": 6.646759510040283, "learning_rate": 2.8726586102719034e-05, "loss": 0.7548, "step": 2125 }, { "epoch": 2.347351174221737, "grad_norm": 7.306237697601318, "learning_rate": 2.8698262839879156e-05, "loss": 0.7124, "step": 2150 }, { "epoch": 2.3746586564718735, "grad_norm": 8.629114151000977, "learning_rate": 2.8669939577039276e-05, "loss": 0.7258, "step": 2175 }, { "epoch": 2.4019661387220097, "grad_norm": 4.617923259735107, "learning_rate": 2.8641616314199395e-05, "loss": 0.789, "step": 2200 }, { "epoch": 2.4292736209721464, "grad_norm": 10.914888381958008, "learning_rate": 2.8613293051359514e-05, "loss": 0.7239, "step": 2225 }, { "epoch": 2.456581103222283, "grad_norm": 8.010298728942871, "learning_rate": 2.858496978851964e-05, "loss": 0.7642, "step": 2250 }, { "epoch": 2.4838885854724193, "grad_norm": 13.949987411499023, "learning_rate": 2.855664652567976e-05, "loss": 0.7234, "step": 2275 }, { "epoch": 2.511196067722556, "grad_norm": 7.933401107788086, "learning_rate": 2.852832326283988e-05, "loss": 0.806, "step": 2300 }, { "epoch": 2.5385035499726927, "grad_norm": 6.27608060836792, "learning_rate": 2.8499999999999998e-05, "loss": 0.6976, "step": 2325 }, { "epoch": 2.565811032222829, "grad_norm": 16.972362518310547, "learning_rate": 2.8471676737160124e-05, "loss": 0.8424, "step": 2350 }, { "epoch": 2.5931185144729656, "grad_norm": 10.433345794677734, "learning_rate": 2.8443353474320244e-05, "loss": 0.8162, "step": 2375 }, { "epoch": 2.6204259967231023, "grad_norm": 7.745542526245117, "learning_rate": 2.8415030211480363e-05, "loss": 0.7755, "step": 2400 }, { "epoch": 2.6477334789732385, "grad_norm": 11.558115005493164, "learning_rate": 2.8386706948640482e-05, "loss": 0.7139, "step": 2425 }, { "epoch": 2.675040961223375, "grad_norm": 5.152831077575684, "learning_rate": 2.8358383685800605e-05, "loss": 0.71, "step": 2450 }, { "epoch": 2.702348443473512, "grad_norm": 10.804621696472168, "learning_rate": 2.8330060422960727e-05, "loss": 0.7124, "step": 2475 }, { "epoch": 2.729655925723648, "grad_norm": 9.169055938720703, "learning_rate": 2.8301737160120847e-05, "loss": 0.7052, "step": 2500 }, { "epoch": 2.756963407973785, "grad_norm": 6.01881217956543, "learning_rate": 2.8273413897280966e-05, "loss": 0.7097, "step": 2525 }, { "epoch": 2.7842708902239215, "grad_norm": 7.430484771728516, "learning_rate": 2.824509063444109e-05, "loss": 0.5998, "step": 2550 }, { "epoch": 2.8115783724740577, "grad_norm": 9.369545936584473, "learning_rate": 2.821676737160121e-05, "loss": 0.7124, "step": 2575 }, { "epoch": 2.8388858547241944, "grad_norm": 17.71640396118164, "learning_rate": 2.818844410876133e-05, "loss": 0.6708, "step": 2600 }, { "epoch": 2.866193336974331, "grad_norm": 9.895975112915039, "learning_rate": 2.816012084592145e-05, "loss": 0.6853, "step": 2625 }, { "epoch": 2.8935008192244673, "grad_norm": 10.001352310180664, "learning_rate": 2.8131797583081573e-05, "loss": 0.6881, "step": 2650 }, { "epoch": 2.920808301474604, "grad_norm": 6.336743354797363, "learning_rate": 2.8103474320241692e-05, "loss": 0.7841, "step": 2675 }, { "epoch": 2.9481157837247407, "grad_norm": 6.119333744049072, "learning_rate": 2.8075151057401815e-05, "loss": 0.6596, "step": 2700 }, { "epoch": 2.975423265974877, "grad_norm": 15.343019485473633, "learning_rate": 2.8046827794561934e-05, "loss": 0.7838, "step": 2725 }, { "epoch": 3.002184598580011, "grad_norm": 5.7259135246276855, "learning_rate": 2.8018504531722057e-05, "loss": 0.7352, "step": 2750 }, { "epoch": 3.0294920808301473, "grad_norm": 6.889578819274902, "learning_rate": 2.7990181268882176e-05, "loss": 0.6068, "step": 2775 }, { "epoch": 3.056799563080284, "grad_norm": 29.84564971923828, "learning_rate": 2.7961858006042295e-05, "loss": 0.6599, "step": 2800 }, { "epoch": 3.0841070453304207, "grad_norm": 5.435133934020996, "learning_rate": 2.7933534743202418e-05, "loss": 0.6565, "step": 2825 }, { "epoch": 3.111414527580557, "grad_norm": 5.325722694396973, "learning_rate": 2.7905211480362537e-05, "loss": 0.6715, "step": 2850 }, { "epoch": 3.1387220098306936, "grad_norm": 15.133538246154785, "learning_rate": 2.787688821752266e-05, "loss": 0.5606, "step": 2875 }, { "epoch": 3.1660294920808303, "grad_norm": 4.896608352661133, "learning_rate": 2.784856495468278e-05, "loss": 0.7121, "step": 2900 }, { "epoch": 3.1933369743309665, "grad_norm": 4.6177263259887695, "learning_rate": 2.7820241691842902e-05, "loss": 0.5796, "step": 2925 }, { "epoch": 3.220644456581103, "grad_norm": 10.980415344238281, "learning_rate": 2.779191842900302e-05, "loss": 0.7208, "step": 2950 }, { "epoch": 3.24795193883124, "grad_norm": 5.0668559074401855, "learning_rate": 2.7763595166163144e-05, "loss": 0.5911, "step": 2975 }, { "epoch": 3.275259421081376, "grad_norm": 10.469432830810547, "learning_rate": 2.7735271903323263e-05, "loss": 0.7421, "step": 3000 }, { "epoch": 3.275259421081376, "eval_cer": 0.17335030448237995, "eval_loss": 0.7640864253044128, "eval_model_preparation_time": 0.005, "eval_runtime": 117.2577, "eval_samples_per_second": 42.94, "eval_steps_per_second": 5.373, "eval_wer": 0.44407316215265563, "step": 3000 }, { "epoch": 3.302566903331513, "grad_norm": 6.846032619476318, "learning_rate": 2.7706948640483382e-05, "loss": 0.6003, "step": 3025 }, { "epoch": 3.3298743855816495, "grad_norm": 5.88096809387207, "learning_rate": 2.7678625377643505e-05, "loss": 0.6948, "step": 3050 }, { "epoch": 3.3571818678317857, "grad_norm": 9.918482780456543, "learning_rate": 2.7650302114803628e-05, "loss": 0.6155, "step": 3075 }, { "epoch": 3.3844893500819224, "grad_norm": 4.603588104248047, "learning_rate": 2.7621978851963747e-05, "loss": 0.6854, "step": 3100 }, { "epoch": 3.411796832332059, "grad_norm": 5.835148334503174, "learning_rate": 2.7593655589123866e-05, "loss": 0.6098, "step": 3125 }, { "epoch": 3.4391043145821953, "grad_norm": 4.72317361831665, "learning_rate": 2.756533232628399e-05, "loss": 0.677, "step": 3150 }, { "epoch": 3.466411796832332, "grad_norm": 7.706675052642822, "learning_rate": 2.753700906344411e-05, "loss": 0.6076, "step": 3175 }, { "epoch": 3.4937192790824687, "grad_norm": 4.951596736907959, "learning_rate": 2.750868580060423e-05, "loss": 0.6822, "step": 3200 }, { "epoch": 3.521026761332605, "grad_norm": 9.894356727600098, "learning_rate": 2.748036253776435e-05, "loss": 0.6077, "step": 3225 }, { "epoch": 3.5483342435827416, "grad_norm": 10.15711784362793, "learning_rate": 2.745203927492447e-05, "loss": 0.6312, "step": 3250 }, { "epoch": 3.5756417258328783, "grad_norm": 10.562280654907227, "learning_rate": 2.7423716012084595e-05, "loss": 0.5127, "step": 3275 }, { "epoch": 3.6029492080830146, "grad_norm": 4.857013702392578, "learning_rate": 2.7395392749244715e-05, "loss": 0.6441, "step": 3300 }, { "epoch": 3.6302566903331512, "grad_norm": 11.668046951293945, "learning_rate": 2.7367069486404834e-05, "loss": 0.6193, "step": 3325 }, { "epoch": 3.657564172583288, "grad_norm": 5.295305252075195, "learning_rate": 2.7338746223564953e-05, "loss": 0.6409, "step": 3350 }, { "epoch": 3.684871654833424, "grad_norm": 6.815990924835205, "learning_rate": 2.731042296072508e-05, "loss": 0.5593, "step": 3375 }, { "epoch": 3.712179137083561, "grad_norm": 5.178914546966553, "learning_rate": 2.72820996978852e-05, "loss": 0.6451, "step": 3400 }, { "epoch": 3.7394866193336975, "grad_norm": 9.680222511291504, "learning_rate": 2.7253776435045318e-05, "loss": 0.5853, "step": 3425 }, { "epoch": 3.7667941015838338, "grad_norm": 5.88323974609375, "learning_rate": 2.7225453172205437e-05, "loss": 0.6365, "step": 3450 }, { "epoch": 3.7941015838339704, "grad_norm": 17.620262145996094, "learning_rate": 2.719712990936556e-05, "loss": 0.6164, "step": 3475 }, { "epoch": 3.821409066084107, "grad_norm": 14.48923397064209, "learning_rate": 2.7168806646525683e-05, "loss": 0.7036, "step": 3500 }, { "epoch": 3.8487165483342434, "grad_norm": 5.46053409576416, "learning_rate": 2.7140483383685802e-05, "loss": 0.5951, "step": 3525 }, { "epoch": 3.87602403058438, "grad_norm": 4.493792533874512, "learning_rate": 2.711216012084592e-05, "loss": 0.6162, "step": 3550 }, { "epoch": 3.9033315128345167, "grad_norm": 18.07945442199707, "learning_rate": 2.708383685800604e-05, "loss": 0.4893, "step": 3575 }, { "epoch": 3.930638995084653, "grad_norm": 5.754079818725586, "learning_rate": 2.7055513595166163e-05, "loss": 0.6007, "step": 3600 }, { "epoch": 3.9579464773347897, "grad_norm": 6.469464302062988, "learning_rate": 2.7027190332326286e-05, "loss": 0.5512, "step": 3625 }, { "epoch": 3.9852539595849263, "grad_norm": 6.694402694702148, "learning_rate": 2.6998867069486405e-05, "loss": 0.6957, "step": 3650 }, { "epoch": 4.0120152921900605, "grad_norm": 4.612822532653809, "learning_rate": 2.6970543806646524e-05, "loss": 0.5186, "step": 3675 }, { "epoch": 4.039322774440197, "grad_norm": 7.911181449890137, "learning_rate": 2.6942220543806647e-05, "loss": 0.4552, "step": 3700 }, { "epoch": 4.066630256690333, "grad_norm": 6.286138534545898, "learning_rate": 2.691389728096677e-05, "loss": 0.5843, "step": 3725 }, { "epoch": 4.09393773894047, "grad_norm": 19.953794479370117, "learning_rate": 2.688557401812689e-05, "loss": 0.5507, "step": 3750 }, { "epoch": 4.121245221190606, "grad_norm": 5.327853202819824, "learning_rate": 2.685725075528701e-05, "loss": 0.6213, "step": 3775 }, { "epoch": 4.148552703440743, "grad_norm": 6.555978298187256, "learning_rate": 2.682892749244713e-05, "loss": 0.4944, "step": 3800 }, { "epoch": 4.17586018569088, "grad_norm": 4.61016321182251, "learning_rate": 2.680060422960725e-05, "loss": 0.5815, "step": 3825 }, { "epoch": 4.203167667941016, "grad_norm": 4.8306097984313965, "learning_rate": 2.6772280966767373e-05, "loss": 0.5048, "step": 3850 }, { "epoch": 4.230475150191152, "grad_norm": 14.265945434570312, "learning_rate": 2.6743957703927492e-05, "loss": 0.6678, "step": 3875 }, { "epoch": 4.257782632441289, "grad_norm": 9.26665210723877, "learning_rate": 2.6715634441087615e-05, "loss": 0.5022, "step": 3900 }, { "epoch": 4.2850901146914255, "grad_norm": 4.838476181030273, "learning_rate": 2.6687311178247734e-05, "loss": 0.5946, "step": 3925 }, { "epoch": 4.312397596941562, "grad_norm": 8.117902755737305, "learning_rate": 2.6658987915407857e-05, "loss": 0.5748, "step": 3950 }, { "epoch": 4.339705079191699, "grad_norm": 5.155851364135742, "learning_rate": 2.6630664652567976e-05, "loss": 0.5486, "step": 3975 }, { "epoch": 4.367012561441835, "grad_norm": 8.987699508666992, "learning_rate": 2.66023413897281e-05, "loss": 0.5415, "step": 4000 }, { "epoch": 4.367012561441835, "eval_cer": 0.16170510132774285, "eval_loss": 0.7140392065048218, "eval_model_preparation_time": 0.005, "eval_runtime": 117.1015, "eval_samples_per_second": 42.997, "eval_steps_per_second": 5.38, "eval_wer": 0.4174727400633134, "step": 4000 }, { "epoch": 4.394320043691971, "grad_norm": 5.217986106872559, "learning_rate": 2.6574018126888218e-05, "loss": 0.5585, "step": 4025 }, { "epoch": 4.4216275259421085, "grad_norm": 10.529208183288574, "learning_rate": 2.6545694864048337e-05, "loss": 0.5142, "step": 4050 }, { "epoch": 4.448935008192245, "grad_norm": 12.6882905960083, "learning_rate": 2.651737160120846e-05, "loss": 0.5621, "step": 4075 }, { "epoch": 4.476242490442381, "grad_norm": 11.089171409606934, "learning_rate": 2.6489048338368583e-05, "loss": 0.5668, "step": 4100 }, { "epoch": 4.503549972692518, "grad_norm": 4.595709800720215, "learning_rate": 2.6460725075528702e-05, "loss": 0.5662, "step": 4125 }, { "epoch": 4.530857454942654, "grad_norm": 9.551477432250977, "learning_rate": 2.643240181268882e-05, "loss": 0.5725, "step": 4150 }, { "epoch": 4.558164937192791, "grad_norm": 5.3942389488220215, "learning_rate": 2.640407854984894e-05, "loss": 0.5674, "step": 4175 }, { "epoch": 4.585472419442928, "grad_norm": 5.688918590545654, "learning_rate": 2.6375755287009067e-05, "loss": 0.532, "step": 4200 }, { "epoch": 4.612779901693064, "grad_norm": 8.642277717590332, "learning_rate": 2.6347432024169186e-05, "loss": 0.5251, "step": 4225 }, { "epoch": 4.6400873839432, "grad_norm": 7.618096828460693, "learning_rate": 2.6319108761329305e-05, "loss": 0.4943, "step": 4250 }, { "epoch": 4.667394866193337, "grad_norm": 8.57895278930664, "learning_rate": 2.6290785498489425e-05, "loss": 0.6082, "step": 4275 }, { "epoch": 4.694702348443474, "grad_norm": 8.059052467346191, "learning_rate": 2.6262462235649547e-05, "loss": 0.5237, "step": 4300 }, { "epoch": 4.72200983069361, "grad_norm": 4.493632793426514, "learning_rate": 2.623413897280967e-05, "loss": 0.5561, "step": 4325 }, { "epoch": 4.749317312943747, "grad_norm": 16.630308151245117, "learning_rate": 2.620581570996979e-05, "loss": 0.5295, "step": 4350 }, { "epoch": 4.776624795193883, "grad_norm": 5.843674182891846, "learning_rate": 2.617749244712991e-05, "loss": 0.6352, "step": 4375 }, { "epoch": 4.803932277444019, "grad_norm": 7.346729278564453, "learning_rate": 2.6149169184290028e-05, "loss": 0.4714, "step": 4400 }, { "epoch": 4.8312397596941565, "grad_norm": 16.71006202697754, "learning_rate": 2.6120845921450154e-05, "loss": 0.5763, "step": 4425 }, { "epoch": 4.858547241944293, "grad_norm": 6.14994478225708, "learning_rate": 2.6092522658610273e-05, "loss": 0.519, "step": 4450 }, { "epoch": 4.885854724194429, "grad_norm": 5.074880599975586, "learning_rate": 2.6064199395770392e-05, "loss": 0.6056, "step": 4475 }, { "epoch": 4.913162206444566, "grad_norm": 6.593540668487549, "learning_rate": 2.6035876132930512e-05, "loss": 0.4965, "step": 4500 }, { "epoch": 4.940469688694702, "grad_norm": 8.506756782531738, "learning_rate": 2.6007552870090638e-05, "loss": 0.526, "step": 4525 }, { "epoch": 4.967777170944839, "grad_norm": 14.214432716369629, "learning_rate": 2.5979229607250757e-05, "loss": 0.5209, "step": 4550 }, { "epoch": 4.995084653194976, "grad_norm": 6.348720073699951, "learning_rate": 2.5950906344410876e-05, "loss": 0.5298, "step": 4575 }, { "epoch": 5.021845985800109, "grad_norm": 9.367478370666504, "learning_rate": 2.5922583081570996e-05, "loss": 0.4989, "step": 4600 }, { "epoch": 5.049153468050246, "grad_norm": 15.168708801269531, "learning_rate": 2.589425981873112e-05, "loss": 0.4747, "step": 4625 }, { "epoch": 5.076460950300382, "grad_norm": 4.823526859283447, "learning_rate": 2.586593655589124e-05, "loss": 0.4876, "step": 4650 }, { "epoch": 5.103768432550519, "grad_norm": 14.571444511413574, "learning_rate": 2.583761329305136e-05, "loss": 0.5501, "step": 4675 }, { "epoch": 5.131075914800656, "grad_norm": 10.8106050491333, "learning_rate": 2.580929003021148e-05, "loss": 0.5501, "step": 4700 }, { "epoch": 5.158383397050792, "grad_norm": 8.236265182495117, "learning_rate": 2.5780966767371602e-05, "loss": 0.5235, "step": 4725 }, { "epoch": 5.185690879300928, "grad_norm": 9.495753288269043, "learning_rate": 2.5752643504531725e-05, "loss": 0.4946, "step": 4750 }, { "epoch": 5.212998361551065, "grad_norm": 8.416945457458496, "learning_rate": 2.5724320241691844e-05, "loss": 0.4878, "step": 4775 }, { "epoch": 5.240305843801202, "grad_norm": 8.053985595703125, "learning_rate": 2.5695996978851963e-05, "loss": 0.5201, "step": 4800 }, { "epoch": 5.267613326051338, "grad_norm": 10.643223762512207, "learning_rate": 2.5667673716012086e-05, "loss": 0.46, "step": 4825 }, { "epoch": 5.294920808301475, "grad_norm": 6.177102565765381, "learning_rate": 2.5639350453172205e-05, "loss": 0.4957, "step": 4850 }, { "epoch": 5.322228290551611, "grad_norm": 9.753777503967285, "learning_rate": 2.5611027190332328e-05, "loss": 0.455, "step": 4875 }, { "epoch": 5.349535772801747, "grad_norm": 7.701514720916748, "learning_rate": 2.5582703927492447e-05, "loss": 0.5015, "step": 4900 }, { "epoch": 5.3768432550518845, "grad_norm": 13.037718772888184, "learning_rate": 2.555438066465257e-05, "loss": 0.4821, "step": 4925 }, { "epoch": 5.404150737302021, "grad_norm": 24.860395431518555, "learning_rate": 2.552605740181269e-05, "loss": 0.4207, "step": 4950 }, { "epoch": 5.431458219552157, "grad_norm": 9.181102752685547, "learning_rate": 2.549773413897281e-05, "loss": 0.5958, "step": 4975 }, { "epoch": 5.458765701802294, "grad_norm": 9.850212097167969, "learning_rate": 2.546941087613293e-05, "loss": 0.5339, "step": 5000 }, { "epoch": 5.458765701802294, "eval_cer": 0.1556104622142358, "eval_loss": 0.7204546928405762, "eval_model_preparation_time": 0.005, "eval_runtime": 119.9515, "eval_samples_per_second": 41.975, "eval_steps_per_second": 5.252, "eval_wer": 0.39975378121702426, "step": 5000 }, { "epoch": 5.48607318405243, "grad_norm": 6.811852931976318, "learning_rate": 2.5441087613293054e-05, "loss": 0.5451, "step": 5025 }, { "epoch": 5.513380666302567, "grad_norm": 10.183515548706055, "learning_rate": 2.5412764350453173e-05, "loss": 0.5135, "step": 5050 }, { "epoch": 5.540688148552704, "grad_norm": 7.917726993560791, "learning_rate": 2.5384441087613293e-05, "loss": 0.4718, "step": 5075 }, { "epoch": 5.56799563080284, "grad_norm": 7.3554205894470215, "learning_rate": 2.5356117824773415e-05, "loss": 0.5352, "step": 5100 }, { "epoch": 5.595303113052976, "grad_norm": 11.756702423095703, "learning_rate": 2.5327794561933535e-05, "loss": 0.5252, "step": 5125 }, { "epoch": 5.622610595303113, "grad_norm": 4.959266185760498, "learning_rate": 2.5299471299093657e-05, "loss": 0.5292, "step": 5150 }, { "epoch": 5.64991807755325, "grad_norm": 9.948278427124023, "learning_rate": 2.5271148036253777e-05, "loss": 0.5601, "step": 5175 }, { "epoch": 5.677225559803386, "grad_norm": 7.664456844329834, "learning_rate": 2.5242824773413896e-05, "loss": 0.5048, "step": 5200 }, { "epoch": 5.704533042053523, "grad_norm": 15.98771858215332, "learning_rate": 2.521450151057402e-05, "loss": 0.5792, "step": 5225 }, { "epoch": 5.731840524303659, "grad_norm": 6.826180458068848, "learning_rate": 2.518617824773414e-05, "loss": 0.5257, "step": 5250 }, { "epoch": 5.759148006553795, "grad_norm": 9.298140525817871, "learning_rate": 2.515785498489426e-05, "loss": 0.4653, "step": 5275 }, { "epoch": 5.786455488803933, "grad_norm": 10.241419792175293, "learning_rate": 2.512953172205438e-05, "loss": 0.5332, "step": 5300 }, { "epoch": 5.813762971054069, "grad_norm": 5.564615726470947, "learning_rate": 2.5101208459214502e-05, "loss": 0.4904, "step": 5325 }, { "epoch": 5.841070453304205, "grad_norm": 7.850229263305664, "learning_rate": 2.5072885196374625e-05, "loss": 0.4758, "step": 5350 }, { "epoch": 5.868377935554342, "grad_norm": 17.06355094909668, "learning_rate": 2.5044561933534744e-05, "loss": 0.5351, "step": 5375 }, { "epoch": 5.895685417804478, "grad_norm": 10.680476188659668, "learning_rate": 2.5016238670694864e-05, "loss": 0.512, "step": 5400 }, { "epoch": 5.922992900054615, "grad_norm": 11.354973793029785, "learning_rate": 2.4987915407854983e-05, "loss": 0.4939, "step": 5425 }, { "epoch": 5.950300382304752, "grad_norm": 5.025985240936279, "learning_rate": 2.495959214501511e-05, "loss": 0.5071, "step": 5450 }, { "epoch": 5.977607864554888, "grad_norm": 24.26195526123047, "learning_rate": 2.4931268882175228e-05, "loss": 0.556, "step": 5475 }, { "epoch": 6.004369197160022, "grad_norm": 7.184664726257324, "learning_rate": 2.4902945619335348e-05, "loss": 0.4954, "step": 5500 }, { "epoch": 6.031676679410158, "grad_norm": 6.5955095291137695, "learning_rate": 2.4874622356495467e-05, "loss": 0.4219, "step": 5525 }, { "epoch": 6.058984161660295, "grad_norm": 5.264342784881592, "learning_rate": 2.484629909365559e-05, "loss": 0.4766, "step": 5550 }, { "epoch": 6.086291643910432, "grad_norm": 9.842866897583008, "learning_rate": 2.4817975830815712e-05, "loss": 0.4687, "step": 5575 }, { "epoch": 6.113599126160568, "grad_norm": 5.687420845031738, "learning_rate": 2.478965256797583e-05, "loss": 0.5525, "step": 5600 }, { "epoch": 6.140906608410704, "grad_norm": 8.81843090057373, "learning_rate": 2.476132930513595e-05, "loss": 0.4343, "step": 5625 }, { "epoch": 6.168214090660841, "grad_norm": 4.142597675323486, "learning_rate": 2.4733006042296073e-05, "loss": 0.5247, "step": 5650 }, { "epoch": 6.195521572910978, "grad_norm": 3.7062594890594482, "learning_rate": 2.4704682779456196e-05, "loss": 0.4287, "step": 5675 }, { "epoch": 6.222829055161114, "grad_norm": 5.054318428039551, "learning_rate": 2.4676359516616315e-05, "loss": 0.4439, "step": 5700 }, { "epoch": 6.250136537411251, "grad_norm": 6.846076011657715, "learning_rate": 2.4648036253776435e-05, "loss": 0.4166, "step": 5725 }, { "epoch": 6.277444019661387, "grad_norm": 9.405501365661621, "learning_rate": 2.4619712990936557e-05, "loss": 0.4836, "step": 5750 }, { "epoch": 6.3047515019115234, "grad_norm": 5.6948957443237305, "learning_rate": 2.4591389728096677e-05, "loss": 0.4307, "step": 5775 }, { "epoch": 6.332058984161661, "grad_norm": 15.579938888549805, "learning_rate": 2.45630664652568e-05, "loss": 0.5279, "step": 5800 }, { "epoch": 6.359366466411797, "grad_norm": 7.0226030349731445, "learning_rate": 2.453474320241692e-05, "loss": 0.4468, "step": 5825 }, { "epoch": 6.386673948661933, "grad_norm": 6.572168827056885, "learning_rate": 2.4506419939577038e-05, "loss": 0.5772, "step": 5850 }, { "epoch": 6.41398143091207, "grad_norm": 5.556070804595947, "learning_rate": 2.447809667673716e-05, "loss": 0.4177, "step": 5875 }, { "epoch": 6.441288913162206, "grad_norm": 6.256873607635498, "learning_rate": 2.4449773413897283e-05, "loss": 0.4447, "step": 5900 }, { "epoch": 6.468596395412343, "grad_norm": 6.874795913696289, "learning_rate": 2.4421450151057403e-05, "loss": 0.388, "step": 5925 }, { "epoch": 6.49590387766248, "grad_norm": 3.867424488067627, "learning_rate": 2.4393126888217522e-05, "loss": 0.5132, "step": 5950 }, { "epoch": 6.523211359912616, "grad_norm": 4.833193778991699, "learning_rate": 2.4364803625377645e-05, "loss": 0.3888, "step": 5975 }, { "epoch": 6.550518842162752, "grad_norm": 5.933465957641602, "learning_rate": 2.4336480362537764e-05, "loss": 0.5306, "step": 6000 }, { "epoch": 6.550518842162752, "eval_cer": 0.15487670959369074, "eval_loss": 0.6949384808540344, "eval_model_preparation_time": 0.005, "eval_runtime": 115.7923, "eval_samples_per_second": 43.483, "eval_steps_per_second": 5.441, "eval_wer": 0.39702778754836443, "step": 6000 }, { "epoch": 6.577826324412889, "grad_norm": 18.11048126220703, "learning_rate": 2.4308157099697886e-05, "loss": 0.449, "step": 6025 }, { "epoch": 6.605133806663026, "grad_norm": 5.751511573791504, "learning_rate": 2.4279833836858006e-05, "loss": 0.4397, "step": 6050 }, { "epoch": 6.632441288913162, "grad_norm": 4.719283103942871, "learning_rate": 2.425151057401813e-05, "loss": 0.4479, "step": 6075 }, { "epoch": 6.659748771163299, "grad_norm": 13.306458473205566, "learning_rate": 2.4223187311178248e-05, "loss": 0.5259, "step": 6100 }, { "epoch": 6.687056253413435, "grad_norm": 10.625594139099121, "learning_rate": 2.419486404833837e-05, "loss": 0.4439, "step": 6125 }, { "epoch": 6.7143637356635715, "grad_norm": 3.877786636352539, "learning_rate": 2.416654078549849e-05, "loss": 0.4746, "step": 6150 }, { "epoch": 6.741671217913709, "grad_norm": 7.8219709396362305, "learning_rate": 2.4138217522658612e-05, "loss": 0.3977, "step": 6175 }, { "epoch": 6.768978700163845, "grad_norm": 7.253026485443115, "learning_rate": 2.410989425981873e-05, "loss": 0.5426, "step": 6200 }, { "epoch": 6.796286182413981, "grad_norm": 5.014729022979736, "learning_rate": 2.408157099697885e-05, "loss": 0.4266, "step": 6225 }, { "epoch": 6.823593664664118, "grad_norm": 4.930147171020508, "learning_rate": 2.4053247734138974e-05, "loss": 0.4647, "step": 6250 }, { "epoch": 6.8509011469142544, "grad_norm": 4.871290683746338, "learning_rate": 2.4024924471299096e-05, "loss": 0.4323, "step": 6275 }, { "epoch": 6.878208629164391, "grad_norm": 13.384846687316895, "learning_rate": 2.3996601208459216e-05, "loss": 0.5079, "step": 6300 }, { "epoch": 6.905516111414528, "grad_norm": 7.975968837738037, "learning_rate": 2.3968277945619335e-05, "loss": 0.4315, "step": 6325 }, { "epoch": 6.932823593664664, "grad_norm": 11.474884033203125, "learning_rate": 2.3939954682779454e-05, "loss": 0.4607, "step": 6350 }, { "epoch": 6.9601310759148, "grad_norm": 3.6631875038146973, "learning_rate": 2.391163141993958e-05, "loss": 0.4207, "step": 6375 }, { "epoch": 6.987438558164937, "grad_norm": 12.051356315612793, "learning_rate": 2.38833081570997e-05, "loss": 0.4687, "step": 6400 }, { "epoch": 7.014199890770071, "grad_norm": 4.678115367889404, "learning_rate": 2.385498489425982e-05, "loss": 0.4164, "step": 6425 }, { "epoch": 7.041507373020208, "grad_norm": 6.941125392913818, "learning_rate": 2.3826661631419938e-05, "loss": 0.4205, "step": 6450 }, { "epoch": 7.068814855270344, "grad_norm": 7.14270544052124, "learning_rate": 2.3798338368580064e-05, "loss": 0.4106, "step": 6475 }, { "epoch": 7.09612233752048, "grad_norm": 5.145579814910889, "learning_rate": 2.3770015105740183e-05, "loss": 0.4072, "step": 6500 }, { "epoch": 7.123429819770617, "grad_norm": 8.598185539245605, "learning_rate": 2.3741691842900303e-05, "loss": 0.4466, "step": 6525 }, { "epoch": 7.150737302020754, "grad_norm": 7.154458999633789, "learning_rate": 2.3713368580060422e-05, "loss": 0.3793, "step": 6550 }, { "epoch": 7.17804478427089, "grad_norm": 4.877124786376953, "learning_rate": 2.368504531722054e-05, "loss": 0.4658, "step": 6575 }, { "epoch": 7.205352266521027, "grad_norm": 5.1544013023376465, "learning_rate": 2.3656722054380667e-05, "loss": 0.4118, "step": 6600 }, { "epoch": 7.232659748771163, "grad_norm": 9.574334144592285, "learning_rate": 2.3628398791540787e-05, "loss": 0.4458, "step": 6625 }, { "epoch": 7.2599672310212995, "grad_norm": 5.446975231170654, "learning_rate": 2.3600075528700906e-05, "loss": 0.4237, "step": 6650 }, { "epoch": 7.287274713271437, "grad_norm": 9.190759658813477, "learning_rate": 2.3571752265861025e-05, "loss": 0.4058, "step": 6675 }, { "epoch": 7.314582195521573, "grad_norm": 8.488012313842773, "learning_rate": 2.354342900302115e-05, "loss": 0.4108, "step": 6700 }, { "epoch": 7.341889677771709, "grad_norm": 13.022942543029785, "learning_rate": 2.351510574018127e-05, "loss": 0.4472, "step": 6725 }, { "epoch": 7.369197160021846, "grad_norm": 9.548989295959473, "learning_rate": 2.348678247734139e-05, "loss": 0.381, "step": 6750 }, { "epoch": 7.3965046422719825, "grad_norm": 5.614551067352295, "learning_rate": 2.345845921450151e-05, "loss": 0.4434, "step": 6775 }, { "epoch": 7.423812124522119, "grad_norm": 17.168210983276367, "learning_rate": 2.3430135951661632e-05, "loss": 0.3832, "step": 6800 }, { "epoch": 7.451119606772256, "grad_norm": 4.933121681213379, "learning_rate": 2.3401812688821755e-05, "loss": 0.4904, "step": 6825 }, { "epoch": 7.478427089022392, "grad_norm": 5.47880220413208, "learning_rate": 2.3373489425981874e-05, "loss": 0.4292, "step": 6850 }, { "epoch": 7.505734571272528, "grad_norm": 7.074849605560303, "learning_rate": 2.3345166163141993e-05, "loss": 0.474, "step": 6875 }, { "epoch": 7.533042053522665, "grad_norm": 11.039775848388672, "learning_rate": 2.3316842900302116e-05, "loss": 0.4002, "step": 6900 }, { "epoch": 7.560349535772802, "grad_norm": 11.666525840759277, "learning_rate": 2.3288519637462235e-05, "loss": 0.428, "step": 6925 }, { "epoch": 7.587657018022938, "grad_norm": 14.233920097351074, "learning_rate": 2.3260196374622358e-05, "loss": 0.4261, "step": 6950 }, { "epoch": 7.614964500273075, "grad_norm": 22.354055404663086, "learning_rate": 2.3231873111782477e-05, "loss": 0.4428, "step": 6975 }, { "epoch": 7.642271982523211, "grad_norm": 9.623329162597656, "learning_rate": 2.32035498489426e-05, "loss": 0.4609, "step": 7000 }, { "epoch": 7.642271982523211, "eval_cer": 0.149570729759409, "eval_loss": 0.701633870601654, "eval_model_preparation_time": 0.005, "eval_runtime": 119.4863, "eval_samples_per_second": 42.139, "eval_steps_per_second": 5.273, "eval_wer": 0.3782975729862821, "step": 7000 }, { "epoch": 7.6695794647733475, "grad_norm": 5.884428024291992, "learning_rate": 2.317522658610272e-05, "loss": 0.4892, "step": 7025 }, { "epoch": 7.696886947023485, "grad_norm": 10.167741775512695, "learning_rate": 2.314690332326284e-05, "loss": 0.4235, "step": 7050 }, { "epoch": 7.724194429273621, "grad_norm": 7.017126083374023, "learning_rate": 2.311858006042296e-05, "loss": 0.4243, "step": 7075 }, { "epoch": 7.751501911523757, "grad_norm": 6.440131664276123, "learning_rate": 2.3090256797583084e-05, "loss": 0.4033, "step": 7100 }, { "epoch": 7.778809393773894, "grad_norm": 26.1104679107666, "learning_rate": 2.3061933534743203e-05, "loss": 0.4217, "step": 7125 }, { "epoch": 7.8061168760240305, "grad_norm": 5.83247709274292, "learning_rate": 2.3033610271903322e-05, "loss": 0.3388, "step": 7150 }, { "epoch": 7.833424358274167, "grad_norm": 8.168228149414062, "learning_rate": 2.3005287009063445e-05, "loss": 0.4037, "step": 7175 }, { "epoch": 7.860731840524304, "grad_norm": 10.858320236206055, "learning_rate": 2.2976963746223568e-05, "loss": 0.407, "step": 7200 }, { "epoch": 7.88803932277444, "grad_norm": 7.752684116363525, "learning_rate": 2.2948640483383687e-05, "loss": 0.4769, "step": 7225 }, { "epoch": 7.915346805024576, "grad_norm": 6.056533336639404, "learning_rate": 2.2920317220543806e-05, "loss": 0.3683, "step": 7250 }, { "epoch": 7.9426542872747135, "grad_norm": 4.319801330566406, "learning_rate": 2.289199395770393e-05, "loss": 0.3878, "step": 7275 }, { "epoch": 7.96996176952485, "grad_norm": 13.979938507080078, "learning_rate": 2.2863670694864048e-05, "loss": 0.3732, "step": 7300 }, { "epoch": 7.997269251774986, "grad_norm": 6.35064697265625, "learning_rate": 2.283534743202417e-05, "loss": 0.4511, "step": 7325 }, { "epoch": 8.024030584380121, "grad_norm": 5.30713415145874, "learning_rate": 2.280702416918429e-05, "loss": 0.3883, "step": 7350 }, { "epoch": 8.051338066630256, "grad_norm": 19.310928344726562, "learning_rate": 2.277870090634441e-05, "loss": 0.3729, "step": 7375 }, { "epoch": 8.078645548880393, "grad_norm": 5.623440742492676, "learning_rate": 2.2750377643504532e-05, "loss": 0.37, "step": 7400 }, { "epoch": 8.10595303113053, "grad_norm": 18.010700225830078, "learning_rate": 2.2722054380664655e-05, "loss": 0.417, "step": 7425 }, { "epoch": 8.133260513380666, "grad_norm": 8.831953048706055, "learning_rate": 2.2693731117824774e-05, "loss": 0.3735, "step": 7450 }, { "epoch": 8.160567995630803, "grad_norm": 7.183424472808838, "learning_rate": 2.2665407854984893e-05, "loss": 0.4005, "step": 7475 }, { "epoch": 8.18787547788094, "grad_norm": 7.7185468673706055, "learning_rate": 2.2637084592145016e-05, "loss": 0.3834, "step": 7500 }, { "epoch": 8.215182960131076, "grad_norm": 10.280498504638672, "learning_rate": 2.260876132930514e-05, "loss": 0.3721, "step": 7525 }, { "epoch": 8.242490442381213, "grad_norm": 7.704532146453857, "learning_rate": 2.2580438066465258e-05, "loss": 0.3631, "step": 7550 }, { "epoch": 8.26979792463135, "grad_norm": 11.213274002075195, "learning_rate": 2.2552114803625377e-05, "loss": 0.4643, "step": 7575 }, { "epoch": 8.297105406881485, "grad_norm": 3.7501258850097656, "learning_rate": 2.2523791540785496e-05, "loss": 0.3519, "step": 7600 }, { "epoch": 8.324412889131622, "grad_norm": 4.03598165512085, "learning_rate": 2.2495468277945623e-05, "loss": 0.3716, "step": 7625 }, { "epoch": 8.35172037138176, "grad_norm": 7.549093246459961, "learning_rate": 2.2467145015105742e-05, "loss": 0.3701, "step": 7650 }, { "epoch": 8.379027853631895, "grad_norm": 19.76717758178711, "learning_rate": 2.243882175226586e-05, "loss": 0.4034, "step": 7675 }, { "epoch": 8.406335335882032, "grad_norm": 6.59949254989624, "learning_rate": 2.241049848942598e-05, "loss": 0.3644, "step": 7700 }, { "epoch": 8.433642818132169, "grad_norm": 80.17059326171875, "learning_rate": 2.2382175226586103e-05, "loss": 0.4272, "step": 7725 }, { "epoch": 8.460950300382304, "grad_norm": 4.888047218322754, "learning_rate": 2.2353851963746226e-05, "loss": 0.3803, "step": 7750 }, { "epoch": 8.488257782632441, "grad_norm": 10.054312705993652, "learning_rate": 2.2325528700906345e-05, "loss": 0.468, "step": 7775 }, { "epoch": 8.515565264882579, "grad_norm": 4.133181095123291, "learning_rate": 2.2297205438066464e-05, "loss": 0.3618, "step": 7800 }, { "epoch": 8.542872747132714, "grad_norm": 9.80252742767334, "learning_rate": 2.2268882175226587e-05, "loss": 0.4029, "step": 7825 }, { "epoch": 8.570180229382851, "grad_norm": 7.9863667488098145, "learning_rate": 2.224055891238671e-05, "loss": 0.3716, "step": 7850 }, { "epoch": 8.597487711632988, "grad_norm": 8.875330924987793, "learning_rate": 2.221223564954683e-05, "loss": 0.4779, "step": 7875 }, { "epoch": 8.624795193883124, "grad_norm": 5.930929183959961, "learning_rate": 2.2183912386706948e-05, "loss": 0.4358, "step": 7900 }, { "epoch": 8.65210267613326, "grad_norm": 96.91864013671875, "learning_rate": 2.215558912386707e-05, "loss": 0.4064, "step": 7925 }, { "epoch": 8.679410158383398, "grad_norm": 8.179421424865723, "learning_rate": 2.212726586102719e-05, "loss": 0.3627, "step": 7950 }, { "epoch": 8.706717640633533, "grad_norm": 20.27647590637207, "learning_rate": 2.2098942598187313e-05, "loss": 0.416, "step": 7975 }, { "epoch": 8.73402512288367, "grad_norm": 4.789956092834473, "learning_rate": 2.2070619335347432e-05, "loss": 0.3716, "step": 8000 }, { "epoch": 8.73402512288367, "eval_cer": 0.14886193471099132, "eval_loss": 0.6733256578445435, "eval_model_preparation_time": 0.005, "eval_runtime": 116.457, "eval_samples_per_second": 43.235, "eval_steps_per_second": 5.41, "eval_wer": 0.3816830812521984, "step": 8000 }, { "epoch": 8.761332605133807, "grad_norm": 7.864968776702881, "learning_rate": 2.2042296072507555e-05, "loss": 0.3858, "step": 8025 }, { "epoch": 8.788640087383943, "grad_norm": 6.6363372802734375, "learning_rate": 2.2013972809667674e-05, "loss": 0.3649, "step": 8050 }, { "epoch": 8.81594756963408, "grad_norm": 9.595897674560547, "learning_rate": 2.1985649546827797e-05, "loss": 0.3551, "step": 8075 }, { "epoch": 8.843255051884217, "grad_norm": 8.371260643005371, "learning_rate": 2.1957326283987916e-05, "loss": 0.3999, "step": 8100 }, { "epoch": 8.870562534134352, "grad_norm": 4.741596221923828, "learning_rate": 2.1929003021148035e-05, "loss": 0.4119, "step": 8125 }, { "epoch": 8.89787001638449, "grad_norm": 5.410317420959473, "learning_rate": 2.1900679758308158e-05, "loss": 0.3525, "step": 8150 }, { "epoch": 8.925177498634627, "grad_norm": 10.64633846282959, "learning_rate": 2.1872356495468277e-05, "loss": 0.4344, "step": 8175 }, { "epoch": 8.952484980884762, "grad_norm": 6.2715163230896, "learning_rate": 2.18440332326284e-05, "loss": 0.4076, "step": 8200 }, { "epoch": 8.979792463134899, "grad_norm": 13.759658813476562, "learning_rate": 2.181570996978852e-05, "loss": 0.3981, "step": 8225 }, { "epoch": 9.006553795740032, "grad_norm": 4.842432022094727, "learning_rate": 2.1787386706948642e-05, "loss": 0.3659, "step": 8250 }, { "epoch": 9.03386127799017, "grad_norm": 7.686051845550537, "learning_rate": 2.175906344410876e-05, "loss": 0.3392, "step": 8275 }, { "epoch": 9.061168760240307, "grad_norm": 3.8665130138397217, "learning_rate": 2.173074018126888e-05, "loss": 0.461, "step": 8300 }, { "epoch": 9.088476242490442, "grad_norm": 5.737161636352539, "learning_rate": 2.1702416918429003e-05, "loss": 0.3618, "step": 8325 }, { "epoch": 9.115783724740579, "grad_norm": 7.1294264793396, "learning_rate": 2.1674093655589126e-05, "loss": 0.3874, "step": 8350 }, { "epoch": 9.143091206990716, "grad_norm": 10.268749237060547, "learning_rate": 2.1645770392749245e-05, "loss": 0.3538, "step": 8375 }, { "epoch": 9.170398689240852, "grad_norm": 7.267090320587158, "learning_rate": 2.1617447129909364e-05, "loss": 0.3863, "step": 8400 }, { "epoch": 9.197706171490989, "grad_norm": 5.078249454498291, "learning_rate": 2.1589123867069487e-05, "loss": 0.3417, "step": 8425 }, { "epoch": 9.225013653741126, "grad_norm": 3.0620856285095215, "learning_rate": 2.156080060422961e-05, "loss": 0.3765, "step": 8450 }, { "epoch": 9.252321135991261, "grad_norm": 9.452494621276855, "learning_rate": 2.153247734138973e-05, "loss": 0.3059, "step": 8475 }, { "epoch": 9.279628618241398, "grad_norm": 4.6139068603515625, "learning_rate": 2.150415407854985e-05, "loss": 0.4077, "step": 8500 }, { "epoch": 9.306936100491535, "grad_norm": 9.233602523803711, "learning_rate": 2.1475830815709968e-05, "loss": 0.373, "step": 8525 }, { "epoch": 9.33424358274167, "grad_norm": 3.496276617050171, "learning_rate": 2.1447507552870094e-05, "loss": 0.4323, "step": 8550 }, { "epoch": 9.361551064991808, "grad_norm": 3.6153175830841064, "learning_rate": 2.1419184290030213e-05, "loss": 0.3341, "step": 8575 }, { "epoch": 9.388858547241945, "grad_norm": 3.8380043506622314, "learning_rate": 2.1390861027190332e-05, "loss": 0.3472, "step": 8600 }, { "epoch": 9.41616602949208, "grad_norm": 21.394275665283203, "learning_rate": 2.136253776435045e-05, "loss": 0.3128, "step": 8625 }, { "epoch": 9.443473511742217, "grad_norm": 5.455437183380127, "learning_rate": 2.1334214501510578e-05, "loss": 0.4666, "step": 8650 }, { "epoch": 9.470780993992355, "grad_norm": 11.697492599487305, "learning_rate": 2.1305891238670697e-05, "loss": 0.3459, "step": 8675 }, { "epoch": 9.49808847624249, "grad_norm": 7.250217437744141, "learning_rate": 2.1277567975830816e-05, "loss": 0.4675, "step": 8700 }, { "epoch": 9.525395958492627, "grad_norm": 7.173760414123535, "learning_rate": 2.1249244712990936e-05, "loss": 0.335, "step": 8725 }, { "epoch": 9.552703440742764, "grad_norm": 5.6985578536987305, "learning_rate": 2.1220921450151058e-05, "loss": 0.4619, "step": 8750 }, { "epoch": 9.5800109229929, "grad_norm": 11.427886962890625, "learning_rate": 2.119259818731118e-05, "loss": 0.3598, "step": 8775 }, { "epoch": 9.607318405243037, "grad_norm": 5.715959548950195, "learning_rate": 2.11642749244713e-05, "loss": 0.3541, "step": 8800 }, { "epoch": 9.634625887493174, "grad_norm": 7.3820672035217285, "learning_rate": 2.113595166163142e-05, "loss": 0.3469, "step": 8825 }, { "epoch": 9.66193336974331, "grad_norm": 7.4520769119262695, "learning_rate": 2.110762839879154e-05, "loss": 0.4164, "step": 8850 }, { "epoch": 9.689240851993446, "grad_norm": 7.345829010009766, "learning_rate": 2.107930513595166e-05, "loss": 0.3607, "step": 8875 }, { "epoch": 9.716548334243583, "grad_norm": 4.674245357513428, "learning_rate": 2.1050981873111784e-05, "loss": 0.3614, "step": 8900 }, { "epoch": 9.743855816493719, "grad_norm": 5.370650768280029, "learning_rate": 2.1022658610271903e-05, "loss": 0.3659, "step": 8925 }, { "epoch": 9.771163298743856, "grad_norm": 5.764029502868652, "learning_rate": 2.0994335347432023e-05, "loss": 0.4037, "step": 8950 }, { "epoch": 9.798470780993993, "grad_norm": 6.055963039398193, "learning_rate": 2.0966012084592145e-05, "loss": 0.3345, "step": 8975 }, { "epoch": 9.825778263244128, "grad_norm": 8.228117942810059, "learning_rate": 2.0937688821752268e-05, "loss": 0.3296, "step": 9000 }, { "epoch": 9.825778263244128, "eval_cer": 0.14654587201757013, "eval_loss": 0.6827245950698853, "eval_model_preparation_time": 0.005, "eval_runtime": 115.7003, "eval_samples_per_second": 43.518, "eval_steps_per_second": 5.445, "eval_wer": 0.36455768554344004, "step": 9000 }, { "epoch": 9.853085745494266, "grad_norm": 5.367037296295166, "learning_rate": 2.0909365558912387e-05, "loss": 0.3203, "step": 9025 }, { "epoch": 9.880393227744403, "grad_norm": 5.565965175628662, "learning_rate": 2.0881042296072507e-05, "loss": 0.3535, "step": 9050 }, { "epoch": 9.907700709994538, "grad_norm": 6.359456539154053, "learning_rate": 2.085271903323263e-05, "loss": 0.3112, "step": 9075 }, { "epoch": 9.935008192244675, "grad_norm": 23.050487518310547, "learning_rate": 2.082439577039275e-05, "loss": 0.4099, "step": 9100 }, { "epoch": 9.962315674494812, "grad_norm": 5.588339328765869, "learning_rate": 2.079607250755287e-05, "loss": 0.339, "step": 9125 }, { "epoch": 9.989623156744948, "grad_norm": 9.931501388549805, "learning_rate": 2.076774924471299e-05, "loss": 0.4101, "step": 9150 }, { "epoch": 10.016384489350083, "grad_norm": 3.725722551345825, "learning_rate": 2.0739425981873113e-05, "loss": 0.3379, "step": 9175 }, { "epoch": 10.043691971600218, "grad_norm": 5.649452209472656, "learning_rate": 2.0711102719033233e-05, "loss": 0.3433, "step": 9200 }, { "epoch": 10.070999453850355, "grad_norm": 16.90140151977539, "learning_rate": 2.0682779456193355e-05, "loss": 0.3665, "step": 9225 }, { "epoch": 10.098306936100492, "grad_norm": 12.65540599822998, "learning_rate": 2.0654456193353474e-05, "loss": 0.299, "step": 9250 }, { "epoch": 10.125614418350628, "grad_norm": 3.57829213142395, "learning_rate": 2.0626132930513597e-05, "loss": 0.3335, "step": 9275 }, { "epoch": 10.152921900600765, "grad_norm": 10.337313652038574, "learning_rate": 2.0597809667673716e-05, "loss": 0.2935, "step": 9300 }, { "epoch": 10.180229382850902, "grad_norm": 7.510273456573486, "learning_rate": 2.0569486404833836e-05, "loss": 0.3571, "step": 9325 }, { "epoch": 10.207536865101037, "grad_norm": 8.998873710632324, "learning_rate": 2.054116314199396e-05, "loss": 0.3376, "step": 9350 }, { "epoch": 10.234844347351174, "grad_norm": 5.616570949554443, "learning_rate": 2.051283987915408e-05, "loss": 0.3756, "step": 9375 }, { "epoch": 10.262151829601311, "grad_norm": 16.173696517944336, "learning_rate": 2.04845166163142e-05, "loss": 0.3649, "step": 9400 }, { "epoch": 10.289459311851447, "grad_norm": 4.307038307189941, "learning_rate": 2.045619335347432e-05, "loss": 0.3579, "step": 9425 }, { "epoch": 10.316766794101584, "grad_norm": 23.82764434814453, "learning_rate": 2.0427870090634442e-05, "loss": 0.3183, "step": 9450 }, { "epoch": 10.344074276351721, "grad_norm": 4.360101699829102, "learning_rate": 2.0399546827794565e-05, "loss": 0.3579, "step": 9475 }, { "epoch": 10.371381758601856, "grad_norm": 26.451988220214844, "learning_rate": 2.0371223564954684e-05, "loss": 0.3749, "step": 9500 }, { "epoch": 10.398689240851994, "grad_norm": 9.364786148071289, "learning_rate": 2.0342900302114804e-05, "loss": 0.3347, "step": 9525 }, { "epoch": 10.42599672310213, "grad_norm": 11.351816177368164, "learning_rate": 2.0314577039274923e-05, "loss": 0.3337, "step": 9550 }, { "epoch": 10.453304205352266, "grad_norm": 6.750742435455322, "learning_rate": 2.0286253776435046e-05, "loss": 0.3124, "step": 9575 }, { "epoch": 10.480611687602403, "grad_norm": 13.647221565246582, "learning_rate": 2.0257930513595168e-05, "loss": 0.341, "step": 9600 }, { "epoch": 10.50791916985254, "grad_norm": 3.5396621227264404, "learning_rate": 2.0229607250755288e-05, "loss": 0.3389, "step": 9625 }, { "epoch": 10.535226652102676, "grad_norm": 6.184154987335205, "learning_rate": 2.0201283987915407e-05, "loss": 0.2747, "step": 9650 }, { "epoch": 10.562534134352813, "grad_norm": 7.413362503051758, "learning_rate": 2.0172960725075526e-05, "loss": 0.3586, "step": 9675 }, { "epoch": 10.58984161660295, "grad_norm": 5.403658390045166, "learning_rate": 2.0144637462235652e-05, "loss": 0.3411, "step": 9700 }, { "epoch": 10.617149098853085, "grad_norm": 8.056621551513672, "learning_rate": 2.011631419939577e-05, "loss": 0.3675, "step": 9725 }, { "epoch": 10.644456581103222, "grad_norm": 11.285257339477539, "learning_rate": 2.008799093655589e-05, "loss": 0.303, "step": 9750 }, { "epoch": 10.67176406335336, "grad_norm": 7.333703994750977, "learning_rate": 2.005966767371601e-05, "loss": 0.3534, "step": 9775 }, { "epoch": 10.699071545603495, "grad_norm": 15.239602088928223, "learning_rate": 2.0031344410876136e-05, "loss": 0.3164, "step": 9800 }, { "epoch": 10.726379027853632, "grad_norm": 4.279262542724609, "learning_rate": 2.0003021148036255e-05, "loss": 0.3602, "step": 9825 }, { "epoch": 10.753686510103769, "grad_norm": 12.147598266601562, "learning_rate": 1.9974697885196375e-05, "loss": 0.3301, "step": 9850 }, { "epoch": 10.780993992353904, "grad_norm": 3.4046525955200195, "learning_rate": 1.9946374622356494e-05, "loss": 0.3642, "step": 9875 }, { "epoch": 10.808301474604042, "grad_norm": 11.226025581359863, "learning_rate": 1.9918051359516617e-05, "loss": 0.3326, "step": 9900 }, { "epoch": 10.835608956854179, "grad_norm": 12.747579574584961, "learning_rate": 1.988972809667674e-05, "loss": 0.3401, "step": 9925 }, { "epoch": 10.862916439104314, "grad_norm": 4.020759105682373, "learning_rate": 1.986140483383686e-05, "loss": 0.36, "step": 9950 }, { "epoch": 10.890223921354451, "grad_norm": 9.239838600158691, "learning_rate": 1.9833081570996978e-05, "loss": 0.3284, "step": 9975 }, { "epoch": 10.917531403604588, "grad_norm": 6.028027057647705, "learning_rate": 1.98047583081571e-05, "loss": 0.3257, "step": 10000 }, { "epoch": 10.917531403604588, "eval_cer": 0.1435110312468803, "eval_loss": 0.6644772291183472, "eval_model_preparation_time": 0.005, "eval_runtime": 113.1675, "eval_samples_per_second": 44.492, "eval_steps_per_second": 5.567, "eval_wer": 0.3647335561027084, "step": 10000 }, { "epoch": 10.944838885854724, "grad_norm": 5.252279758453369, "learning_rate": 1.9776435045317223e-05, "loss": 0.3285, "step": 10025 }, { "epoch": 10.97214636810486, "grad_norm": 13.370328903198242, "learning_rate": 1.9748111782477342e-05, "loss": 0.3471, "step": 10050 }, { "epoch": 10.999453850354998, "grad_norm": 5.866436004638672, "learning_rate": 1.9719788519637462e-05, "loss": 0.3565, "step": 10075 }, { "epoch": 11.026215182960131, "grad_norm": 5.021336555480957, "learning_rate": 1.9691465256797584e-05, "loss": 0.3083, "step": 10100 }, { "epoch": 11.053522665210268, "grad_norm": 17.237905502319336, "learning_rate": 1.9663141993957704e-05, "loss": 0.3881, "step": 10125 }, { "epoch": 11.080830147460404, "grad_norm": 4.234619617462158, "learning_rate": 1.9634818731117826e-05, "loss": 0.2704, "step": 10150 }, { "epoch": 11.10813762971054, "grad_norm": 3.8946785926818848, "learning_rate": 1.9606495468277946e-05, "loss": 0.2968, "step": 10175 }, { "epoch": 11.135445111960678, "grad_norm": 5.5384087562561035, "learning_rate": 1.957817220543807e-05, "loss": 0.3108, "step": 10200 }, { "epoch": 11.162752594210813, "grad_norm": 18.46824836730957, "learning_rate": 1.9549848942598188e-05, "loss": 0.3427, "step": 10225 }, { "epoch": 11.19006007646095, "grad_norm": 18.986160278320312, "learning_rate": 1.9521525679758307e-05, "loss": 0.2661, "step": 10250 }, { "epoch": 11.217367558711087, "grad_norm": 18.38584327697754, "learning_rate": 1.949320241691843e-05, "loss": 0.3316, "step": 10275 }, { "epoch": 11.244675040961223, "grad_norm": 5.019933700561523, "learning_rate": 1.946487915407855e-05, "loss": 0.2561, "step": 10300 }, { "epoch": 11.27198252321136, "grad_norm": 14.68535327911377, "learning_rate": 1.943655589123867e-05, "loss": 0.3231, "step": 10325 }, { "epoch": 11.299290005461497, "grad_norm": 6.081609725952148, "learning_rate": 1.940823262839879e-05, "loss": 0.264, "step": 10350 }, { "epoch": 11.326597487711632, "grad_norm": 14.706550598144531, "learning_rate": 1.9379909365558914e-05, "loss": 0.3183, "step": 10375 }, { "epoch": 11.35390496996177, "grad_norm": 9.04871940612793, "learning_rate": 1.9351586102719033e-05, "loss": 0.3335, "step": 10400 }, { "epoch": 11.381212452211907, "grad_norm": 16.926143646240234, "learning_rate": 1.9323262839879156e-05, "loss": 0.3838, "step": 10425 }, { "epoch": 11.408519934462042, "grad_norm": 11.612327575683594, "learning_rate": 1.9294939577039275e-05, "loss": 0.3308, "step": 10450 }, { "epoch": 11.43582741671218, "grad_norm": 9.491094589233398, "learning_rate": 1.9266616314199394e-05, "loss": 0.3988, "step": 10475 }, { "epoch": 11.463134898962316, "grad_norm": 6.503066062927246, "learning_rate": 1.9238293051359517e-05, "loss": 0.3438, "step": 10500 }, { "epoch": 11.490442381212452, "grad_norm": 8.216390609741211, "learning_rate": 1.920996978851964e-05, "loss": 0.3742, "step": 10525 }, { "epoch": 11.517749863462589, "grad_norm": 3.90631365776062, "learning_rate": 1.918164652567976e-05, "loss": 0.2533, "step": 10550 }, { "epoch": 11.545057345712726, "grad_norm": 6.465386390686035, "learning_rate": 1.9153323262839878e-05, "loss": 0.3355, "step": 10575 }, { "epoch": 11.572364827962861, "grad_norm": 6.217861175537109, "learning_rate": 1.9125e-05, "loss": 0.2754, "step": 10600 }, { "epoch": 11.599672310212998, "grad_norm": 19.01822280883789, "learning_rate": 1.9096676737160123e-05, "loss": 0.3347, "step": 10625 }, { "epoch": 11.626979792463136, "grad_norm": 7.891463756561279, "learning_rate": 1.9068353474320243e-05, "loss": 0.2743, "step": 10650 }, { "epoch": 11.65428727471327, "grad_norm": 14.562522888183594, "learning_rate": 1.9040030211480362e-05, "loss": 0.312, "step": 10675 }, { "epoch": 11.681594756963408, "grad_norm": 9.30823040008545, "learning_rate": 1.901170694864048e-05, "loss": 0.2682, "step": 10700 }, { "epoch": 11.708902239213545, "grad_norm": 14.836087226867676, "learning_rate": 1.8983383685800607e-05, "loss": 0.3092, "step": 10725 }, { "epoch": 11.73620972146368, "grad_norm": 6.448333740234375, "learning_rate": 1.8955060422960727e-05, "loss": 0.3098, "step": 10750 }, { "epoch": 11.763517203713818, "grad_norm": 8.701180458068848, "learning_rate": 1.8926737160120846e-05, "loss": 0.4094, "step": 10775 }, { "epoch": 11.790824685963955, "grad_norm": 2.8268656730651855, "learning_rate": 1.8898413897280965e-05, "loss": 0.2773, "step": 10800 }, { "epoch": 11.81813216821409, "grad_norm": 8.97842025756836, "learning_rate": 1.8870090634441088e-05, "loss": 0.3457, "step": 10825 }, { "epoch": 11.845439650464227, "grad_norm": 4.506399154663086, "learning_rate": 1.884176737160121e-05, "loss": 0.2818, "step": 10850 }, { "epoch": 11.872747132714364, "grad_norm": 6.2234416007995605, "learning_rate": 1.881344410876133e-05, "loss": 0.2716, "step": 10875 }, { "epoch": 11.9000546149645, "grad_norm": 3.798288106918335, "learning_rate": 1.878512084592145e-05, "loss": 0.2832, "step": 10900 }, { "epoch": 11.927362097214637, "grad_norm": 14.553479194641113, "learning_rate": 1.8756797583081572e-05, "loss": 0.3755, "step": 10925 }, { "epoch": 11.954669579464774, "grad_norm": 7.604412078857422, "learning_rate": 1.8728474320241694e-05, "loss": 0.2727, "step": 10950 }, { "epoch": 11.98197706171491, "grad_norm": 23.101869583129883, "learning_rate": 1.8700151057401814e-05, "loss": 0.3444, "step": 10975 }, { "epoch": 12.008738394320044, "grad_norm": 6.472280025482178, "learning_rate": 1.8671827794561933e-05, "loss": 0.3303, "step": 11000 }, { "epoch": 12.008738394320044, "eval_cer": 0.14463412199261255, "eval_loss": 0.7103880047798157, "eval_model_preparation_time": 0.005, "eval_runtime": 115.8318, "eval_samples_per_second": 43.468, "eval_steps_per_second": 5.439, "eval_wer": 0.36838287020752725, "step": 11000 }, { "epoch": 12.03604587657018, "grad_norm": 6.654875755310059, "learning_rate": 1.8643504531722056e-05, "loss": 0.291, "step": 11025 }, { "epoch": 12.063353358820317, "grad_norm": 3.3532609939575195, "learning_rate": 1.8615181268882175e-05, "loss": 0.3403, "step": 11050 }, { "epoch": 12.090660841070454, "grad_norm": 16.545148849487305, "learning_rate": 1.8586858006042298e-05, "loss": 0.271, "step": 11075 }, { "epoch": 12.11796832332059, "grad_norm": 15.24442195892334, "learning_rate": 1.8558534743202417e-05, "loss": 0.3112, "step": 11100 }, { "epoch": 12.145275805570726, "grad_norm": 3.4361345767974854, "learning_rate": 1.8530211480362536e-05, "loss": 0.2523, "step": 11125 }, { "epoch": 12.172583287820864, "grad_norm": 4.4615654945373535, "learning_rate": 1.850188821752266e-05, "loss": 0.3055, "step": 11150 }, { "epoch": 12.199890770070999, "grad_norm": 10.298237800598145, "learning_rate": 1.847356495468278e-05, "loss": 0.2693, "step": 11175 }, { "epoch": 12.227198252321136, "grad_norm": 5.64368200302124, "learning_rate": 1.84452416918429e-05, "loss": 0.3253, "step": 11200 }, { "epoch": 12.254505734571273, "grad_norm": 6.641918659210205, "learning_rate": 1.841691842900302e-05, "loss": 0.3024, "step": 11225 }, { "epoch": 12.281813216821408, "grad_norm": 7.182716369628906, "learning_rate": 1.8388595166163143e-05, "loss": 0.3555, "step": 11250 }, { "epoch": 12.309120699071546, "grad_norm": 5.664862155914307, "learning_rate": 1.8360271903323262e-05, "loss": 0.2986, "step": 11275 }, { "epoch": 12.336428181321683, "grad_norm": 4.865562438964844, "learning_rate": 1.8331948640483385e-05, "loss": 0.3836, "step": 11300 }, { "epoch": 12.363735663571818, "grad_norm": 14.41945743560791, "learning_rate": 1.8303625377643504e-05, "loss": 0.2814, "step": 11325 }, { "epoch": 12.391043145821955, "grad_norm": 4.414587497711182, "learning_rate": 1.8275302114803627e-05, "loss": 0.3092, "step": 11350 }, { "epoch": 12.418350628072092, "grad_norm": 9.21458625793457, "learning_rate": 1.8246978851963746e-05, "loss": 0.3159, "step": 11375 }, { "epoch": 12.445658110322228, "grad_norm": 6.262660026550293, "learning_rate": 1.821865558912387e-05, "loss": 0.3008, "step": 11400 }, { "epoch": 12.472965592572365, "grad_norm": 8.06177043914795, "learning_rate": 1.8190332326283988e-05, "loss": 0.3113, "step": 11425 }, { "epoch": 12.500273074822502, "grad_norm": 10.917487144470215, "learning_rate": 1.816200906344411e-05, "loss": 0.2687, "step": 11450 }, { "epoch": 12.527580557072637, "grad_norm": 8.37775993347168, "learning_rate": 1.813368580060423e-05, "loss": 0.2962, "step": 11475 }, { "epoch": 12.554888039322774, "grad_norm": 5.037398815155029, "learning_rate": 1.810536253776435e-05, "loss": 0.3151, "step": 11500 }, { "epoch": 12.582195521572912, "grad_norm": 5.338317394256592, "learning_rate": 1.8077039274924472e-05, "loss": 0.2471, "step": 11525 }, { "epoch": 12.609503003823047, "grad_norm": 2.3449442386627197, "learning_rate": 1.8048716012084595e-05, "loss": 0.2677, "step": 11550 }, { "epoch": 12.636810486073184, "grad_norm": 5.687106609344482, "learning_rate": 1.8020392749244714e-05, "loss": 0.2428, "step": 11575 }, { "epoch": 12.664117968323321, "grad_norm": 2.4217689037323, "learning_rate": 1.7992069486404833e-05, "loss": 0.3466, "step": 11600 }, { "epoch": 12.691425450573457, "grad_norm": 4.891263008117676, "learning_rate": 1.7963746223564952e-05, "loss": 0.2813, "step": 11625 }, { "epoch": 12.718732932823594, "grad_norm": 3.173252582550049, "learning_rate": 1.793542296072508e-05, "loss": 0.2827, "step": 11650 }, { "epoch": 12.74604041507373, "grad_norm": 8.445141792297363, "learning_rate": 1.7907099697885198e-05, "loss": 0.2708, "step": 11675 }, { "epoch": 12.773347897323866, "grad_norm": 5.173661708831787, "learning_rate": 1.7878776435045317e-05, "loss": 0.3151, "step": 11700 }, { "epoch": 12.800655379574003, "grad_norm": 6.202101230621338, "learning_rate": 1.7850453172205436e-05, "loss": 0.266, "step": 11725 }, { "epoch": 12.82796286182414, "grad_norm": 8.43380069732666, "learning_rate": 1.7822129909365562e-05, "loss": 0.3176, "step": 11750 }, { "epoch": 12.855270344074276, "grad_norm": 6.218353748321533, "learning_rate": 1.7793806646525682e-05, "loss": 0.2655, "step": 11775 }, { "epoch": 12.882577826324413, "grad_norm": 5.463388442993164, "learning_rate": 1.77654833836858e-05, "loss": 0.3113, "step": 11800 }, { "epoch": 12.90988530857455, "grad_norm": 13.730123519897461, "learning_rate": 1.773716012084592e-05, "loss": 0.2707, "step": 11825 }, { "epoch": 12.937192790824685, "grad_norm": 3.7278361320495605, "learning_rate": 1.770883685800604e-05, "loss": 0.3254, "step": 11850 }, { "epoch": 12.964500273074822, "grad_norm": 7.486217975616455, "learning_rate": 1.7680513595166166e-05, "loss": 0.2775, "step": 11875 }, { "epoch": 12.99180775532496, "grad_norm": 9.704665184020996, "learning_rate": 1.7652190332326285e-05, "loss": 0.3313, "step": 11900 }, { "epoch": 13.018569087930093, "grad_norm": 4.015198707580566, "learning_rate": 1.7623867069486404e-05, "loss": 0.2883, "step": 11925 }, { "epoch": 13.04587657018023, "grad_norm": 9.856317520141602, "learning_rate": 1.7595543806646524e-05, "loss": 0.2839, "step": 11950 }, { "epoch": 13.073184052430365, "grad_norm": 4.229045867919922, "learning_rate": 1.756722054380665e-05, "loss": 0.2893, "step": 11975 }, { "epoch": 13.100491534680502, "grad_norm": 8.201128005981445, "learning_rate": 1.753889728096677e-05, "loss": 0.2669, "step": 12000 }, { "epoch": 13.100491534680502, "eval_cer": 0.14148946790456224, "eval_loss": 0.6929749250411987, "eval_model_preparation_time": 0.005, "eval_runtime": 116.7829, "eval_samples_per_second": 43.114, "eval_steps_per_second": 5.395, "eval_wer": 0.3624032711924024, "step": 12000 }, { "epoch": 13.12779901693064, "grad_norm": 4.207302093505859, "learning_rate": 1.7510574018126888e-05, "loss": 0.2661, "step": 12025 }, { "epoch": 13.155106499180775, "grad_norm": 7.19203519821167, "learning_rate": 1.7482250755287007e-05, "loss": 0.2449, "step": 12050 }, { "epoch": 13.182413981430912, "grad_norm": 6.6457719802856445, "learning_rate": 1.745392749244713e-05, "loss": 0.2441, "step": 12075 }, { "epoch": 13.20972146368105, "grad_norm": 3.877528190612793, "learning_rate": 1.7425604229607253e-05, "loss": 0.2815, "step": 12100 }, { "epoch": 13.237028945931185, "grad_norm": 6.255873203277588, "learning_rate": 1.7397280966767372e-05, "loss": 0.2922, "step": 12125 }, { "epoch": 13.264336428181322, "grad_norm": 14.403861045837402, "learning_rate": 1.736895770392749e-05, "loss": 0.2909, "step": 12150 }, { "epoch": 13.291643910431459, "grad_norm": 8.654411315917969, "learning_rate": 1.7340634441087614e-05, "loss": 0.2585, "step": 12175 }, { "epoch": 13.318951392681594, "grad_norm": 12.441396713256836, "learning_rate": 1.7312311178247733e-05, "loss": 0.2717, "step": 12200 }, { "epoch": 13.346258874931731, "grad_norm": 10.804973602294922, "learning_rate": 1.7283987915407856e-05, "loss": 0.2596, "step": 12225 }, { "epoch": 13.373566357181868, "grad_norm": 9.30710506439209, "learning_rate": 1.7255664652567975e-05, "loss": 0.2698, "step": 12250 }, { "epoch": 13.400873839432004, "grad_norm": 29.006147384643555, "learning_rate": 1.7227341389728098e-05, "loss": 0.2839, "step": 12275 }, { "epoch": 13.42818132168214, "grad_norm": 9.001348495483398, "learning_rate": 1.7199018126888217e-05, "loss": 0.2781, "step": 12300 }, { "epoch": 13.455488803932278, "grad_norm": 4.4662933349609375, "learning_rate": 1.717069486404834e-05, "loss": 0.2872, "step": 12325 }, { "epoch": 13.482796286182413, "grad_norm": 11.566439628601074, "learning_rate": 1.714237160120846e-05, "loss": 0.3204, "step": 12350 }, { "epoch": 13.51010376843255, "grad_norm": 4.186769485473633, "learning_rate": 1.7114048338368582e-05, "loss": 0.2991, "step": 12375 }, { "epoch": 13.537411250682688, "grad_norm": 9.200356483459473, "learning_rate": 1.70857250755287e-05, "loss": 0.3018, "step": 12400 }, { "epoch": 13.564718732932823, "grad_norm": 5.827724456787109, "learning_rate": 1.705740181268882e-05, "loss": 0.2711, "step": 12425 }, { "epoch": 13.59202621518296, "grad_norm": 18.082752227783203, "learning_rate": 1.7029078549848943e-05, "loss": 0.2365, "step": 12450 }, { "epoch": 13.619333697433097, "grad_norm": 5.274575710296631, "learning_rate": 1.7000755287009066e-05, "loss": 0.3041, "step": 12475 }, { "epoch": 13.646641179683233, "grad_norm": 8.484864234924316, "learning_rate": 1.6972432024169185e-05, "loss": 0.2883, "step": 12500 }, { "epoch": 13.67394866193337, "grad_norm": 10.410056114196777, "learning_rate": 1.6944108761329304e-05, "loss": 0.3227, "step": 12525 }, { "epoch": 13.701256144183507, "grad_norm": 18.8059139251709, "learning_rate": 1.6915785498489427e-05, "loss": 0.2768, "step": 12550 }, { "epoch": 13.728563626433642, "grad_norm": 7.438997745513916, "learning_rate": 1.6887462235649546e-05, "loss": 0.2981, "step": 12575 }, { "epoch": 13.75587110868378, "grad_norm": 10.786835670471191, "learning_rate": 1.685913897280967e-05, "loss": 0.2742, "step": 12600 }, { "epoch": 13.783178590933916, "grad_norm": 15.947209358215332, "learning_rate": 1.683081570996979e-05, "loss": 0.2883, "step": 12625 }, { "epoch": 13.810486073184052, "grad_norm": 10.696394920349121, "learning_rate": 1.6802492447129908e-05, "loss": 0.288, "step": 12650 }, { "epoch": 13.837793555434189, "grad_norm": 10.1856689453125, "learning_rate": 1.677416918429003e-05, "loss": 0.2833, "step": 12675 }, { "epoch": 13.865101037684326, "grad_norm": 20.514667510986328, "learning_rate": 1.6745845921450153e-05, "loss": 0.262, "step": 12700 }, { "epoch": 13.892408519934461, "grad_norm": 12.570696830749512, "learning_rate": 1.6717522658610272e-05, "loss": 0.27, "step": 12725 }, { "epoch": 13.919716002184598, "grad_norm": 6.038543701171875, "learning_rate": 1.668919939577039e-05, "loss": 0.2969, "step": 12750 }, { "epoch": 13.947023484434736, "grad_norm": 5.487522125244141, "learning_rate": 1.6660876132930514e-05, "loss": 0.2875, "step": 12775 }, { "epoch": 13.974330966684871, "grad_norm": 10.967700958251953, "learning_rate": 1.6632552870090637e-05, "loss": 0.2865, "step": 12800 }, { "epoch": 14.001092299290006, "grad_norm": 4.778906345367432, "learning_rate": 1.6604229607250756e-05, "loss": 0.3363, "step": 12825 }, { "epoch": 14.028399781540141, "grad_norm": 4.755268096923828, "learning_rate": 1.6575906344410875e-05, "loss": 0.2686, "step": 12850 }, { "epoch": 14.055707263790278, "grad_norm": 3.9035816192626953, "learning_rate": 1.6547583081570995e-05, "loss": 0.3019, "step": 12875 }, { "epoch": 14.083014746040416, "grad_norm": 18.743507385253906, "learning_rate": 1.651925981873112e-05, "loss": 0.2327, "step": 12900 }, { "epoch": 14.110322228290551, "grad_norm": 7.223848819732666, "learning_rate": 1.649093655589124e-05, "loss": 0.2864, "step": 12925 }, { "epoch": 14.137629710540688, "grad_norm": 9.283727645874023, "learning_rate": 1.646261329305136e-05, "loss": 0.2224, "step": 12950 }, { "epoch": 14.164937192790825, "grad_norm": 17.71445655822754, "learning_rate": 1.643429003021148e-05, "loss": 0.2825, "step": 12975 }, { "epoch": 14.19224467504096, "grad_norm": 9.462109565734863, "learning_rate": 1.64059667673716e-05, "loss": 0.2286, "step": 13000 }, { "epoch": 14.19224467504096, "eval_cer": 0.14322152341020267, "eval_loss": 0.7066394090652466, "eval_model_preparation_time": 0.005, "eval_runtime": 117.4253, "eval_samples_per_second": 42.878, "eval_steps_per_second": 5.365, "eval_wer": 0.36213946535349983, "step": 13000 }, { "epoch": 14.219552157291098, "grad_norm": 6.125277519226074, "learning_rate": 1.6377643504531724e-05, "loss": 0.2919, "step": 13025 }, { "epoch": 14.246859639541235, "grad_norm": 5.871737957000732, "learning_rate": 1.6349320241691843e-05, "loss": 0.2082, "step": 13050 }, { "epoch": 14.27416712179137, "grad_norm": 3.509709596633911, "learning_rate": 1.6320996978851963e-05, "loss": 0.3113, "step": 13075 }, { "epoch": 14.301474604041507, "grad_norm": 5.780418872833252, "learning_rate": 1.6292673716012085e-05, "loss": 0.2077, "step": 13100 }, { "epoch": 14.328782086291644, "grad_norm": 3.801102876663208, "learning_rate": 1.6264350453172208e-05, "loss": 0.2445, "step": 13125 }, { "epoch": 14.35608956854178, "grad_norm": 6.263430595397949, "learning_rate": 1.6236027190332327e-05, "loss": 0.2441, "step": 13150 }, { "epoch": 14.383397050791917, "grad_norm": 11.203507423400879, "learning_rate": 1.6207703927492447e-05, "loss": 0.3084, "step": 13175 }, { "epoch": 14.410704533042054, "grad_norm": 3.950488328933716, "learning_rate": 1.617938066465257e-05, "loss": 0.2386, "step": 13200 }, { "epoch": 14.43801201529219, "grad_norm": 5.700371265411377, "learning_rate": 1.615105740181269e-05, "loss": 0.317, "step": 13225 }, { "epoch": 14.465319497542326, "grad_norm": 7.61651611328125, "learning_rate": 1.612273413897281e-05, "loss": 0.2432, "step": 13250 }, { "epoch": 14.492626979792464, "grad_norm": 11.604390144348145, "learning_rate": 1.609441087613293e-05, "loss": 0.2628, "step": 13275 }, { "epoch": 14.519934462042599, "grad_norm": 4.056069374084473, "learning_rate": 1.6066087613293053e-05, "loss": 0.221, "step": 13300 }, { "epoch": 14.547241944292736, "grad_norm": 5.950977325439453, "learning_rate": 1.6037764350453172e-05, "loss": 0.2937, "step": 13325 }, { "epoch": 14.574549426542873, "grad_norm": 10.080986022949219, "learning_rate": 1.6009441087613295e-05, "loss": 0.2407, "step": 13350 }, { "epoch": 14.601856908793009, "grad_norm": 8.45311164855957, "learning_rate": 1.5981117824773414e-05, "loss": 0.3289, "step": 13375 }, { "epoch": 14.629164391043146, "grad_norm": 5.346462726593018, "learning_rate": 1.5952794561933534e-05, "loss": 0.2307, "step": 13400 }, { "epoch": 14.656471873293283, "grad_norm": 3.503274440765381, "learning_rate": 1.5924471299093656e-05, "loss": 0.3872, "step": 13425 }, { "epoch": 14.683779355543418, "grad_norm": 28.96894645690918, "learning_rate": 1.5896148036253776e-05, "loss": 0.2617, "step": 13450 }, { "epoch": 14.711086837793555, "grad_norm": 3.9760630130767822, "learning_rate": 1.58678247734139e-05, "loss": 0.3701, "step": 13475 }, { "epoch": 14.738394320043692, "grad_norm": 6.902076244354248, "learning_rate": 1.5839501510574018e-05, "loss": 0.2726, "step": 13500 }, { "epoch": 14.765701802293828, "grad_norm": 24.660058975219727, "learning_rate": 1.581117824773414e-05, "loss": 0.2937, "step": 13525 }, { "epoch": 14.793009284543965, "grad_norm": 6.177881717681885, "learning_rate": 1.578285498489426e-05, "loss": 0.2297, "step": 13550 }, { "epoch": 14.820316766794102, "grad_norm": 3.1081602573394775, "learning_rate": 1.575453172205438e-05, "loss": 0.2791, "step": 13575 }, { "epoch": 14.847624249044237, "grad_norm": 9.378297805786133, "learning_rate": 1.57262084592145e-05, "loss": 0.2174, "step": 13600 }, { "epoch": 14.874931731294375, "grad_norm": 3.71321177482605, "learning_rate": 1.5697885196374624e-05, "loss": 0.2482, "step": 13625 }, { "epoch": 14.902239213544512, "grad_norm": 9.858495712280273, "learning_rate": 1.5669561933534743e-05, "loss": 0.2466, "step": 13650 }, { "epoch": 14.929546695794647, "grad_norm": 9.513136863708496, "learning_rate": 1.5641238670694863e-05, "loss": 0.3143, "step": 13675 }, { "epoch": 14.956854178044784, "grad_norm": 42.5731086730957, "learning_rate": 1.5612915407854985e-05, "loss": 0.2076, "step": 13700 }, { "epoch": 14.984161660294921, "grad_norm": 3.4782097339630127, "learning_rate": 1.5584592145015108e-05, "loss": 0.2997, "step": 13725 }, { "epoch": 15.010922992900054, "grad_norm": 3.7906293869018555, "learning_rate": 1.5556268882175227e-05, "loss": 0.2431, "step": 13750 }, { "epoch": 15.038230475150192, "grad_norm": 19.897266387939453, "learning_rate": 1.5527945619335347e-05, "loss": 0.2492, "step": 13775 }, { "epoch": 15.065537957400327, "grad_norm": 12.366985321044922, "learning_rate": 1.5499622356495466e-05, "loss": 0.3193, "step": 13800 }, { "epoch": 15.092845439650464, "grad_norm": 10.721641540527344, "learning_rate": 1.5471299093655592e-05, "loss": 0.2326, "step": 13825 }, { "epoch": 15.120152921900601, "grad_norm": 6.9664530754089355, "learning_rate": 1.544297583081571e-05, "loss": 0.2341, "step": 13850 }, { "epoch": 15.147460404150737, "grad_norm": 3.753093719482422, "learning_rate": 1.541465256797583e-05, "loss": 0.2298, "step": 13875 }, { "epoch": 15.174767886400874, "grad_norm": 2.4418745040893555, "learning_rate": 1.538632930513595e-05, "loss": 0.2632, "step": 13900 }, { "epoch": 15.20207536865101, "grad_norm": 7.023916721343994, "learning_rate": 1.5358006042296076e-05, "loss": 0.2363, "step": 13925 }, { "epoch": 15.229382850901146, "grad_norm": 7.436373233795166, "learning_rate": 1.5329682779456195e-05, "loss": 0.2899, "step": 13950 }, { "epoch": 15.256690333151283, "grad_norm": 18.109189987182617, "learning_rate": 1.5301359516616315e-05, "loss": 0.2648, "step": 13975 }, { "epoch": 15.28399781540142, "grad_norm": 5.149053573608398, "learning_rate": 1.5273036253776434e-05, "loss": 0.2433, "step": 14000 }, { "epoch": 15.28399781540142, "eval_cer": 0.14123490066886293, "eval_loss": 0.7281185388565063, "eval_model_preparation_time": 0.005, "eval_runtime": 136.0266, "eval_samples_per_second": 37.015, "eval_steps_per_second": 4.631, "eval_wer": 0.35651160745691174, "step": 14000 }, { "epoch": 15.311305297651556, "grad_norm": 9.882929801940918, "learning_rate": 1.5244712990936558e-05, "loss": 0.2584, "step": 14025 }, { "epoch": 15.338612779901693, "grad_norm": 4.326746463775635, "learning_rate": 1.5216389728096678e-05, "loss": 0.2591, "step": 14050 }, { "epoch": 15.36592026215183, "grad_norm": 8.021541595458984, "learning_rate": 1.5188066465256798e-05, "loss": 0.2497, "step": 14075 }, { "epoch": 15.393227744401965, "grad_norm": 5.988555908203125, "learning_rate": 1.5159743202416918e-05, "loss": 0.291, "step": 14100 }, { "epoch": 15.420535226652103, "grad_norm": 7.510362148284912, "learning_rate": 1.5131419939577039e-05, "loss": 0.2204, "step": 14125 }, { "epoch": 15.44784270890224, "grad_norm": 6.247300624847412, "learning_rate": 1.5103096676737161e-05, "loss": 0.2767, "step": 14150 }, { "epoch": 15.475150191152375, "grad_norm": 3.7041385173797607, "learning_rate": 1.507477341389728e-05, "loss": 0.2334, "step": 14175 }, { "epoch": 15.502457673402512, "grad_norm": 4.977872848510742, "learning_rate": 1.5046450151057402e-05, "loss": 0.242, "step": 14200 }, { "epoch": 15.52976515565265, "grad_norm": 11.095629692077637, "learning_rate": 1.5018126888217521e-05, "loss": 0.2511, "step": 14225 }, { "epoch": 15.557072637902785, "grad_norm": 24.645048141479492, "learning_rate": 1.4989803625377644e-05, "loss": 0.3149, "step": 14250 }, { "epoch": 15.584380120152922, "grad_norm": 7.208953380584717, "learning_rate": 1.4961480362537765e-05, "loss": 0.2092, "step": 14275 }, { "epoch": 15.611687602403059, "grad_norm": 3.514556646347046, "learning_rate": 1.4933157099697886e-05, "loss": 0.3138, "step": 14300 }, { "epoch": 15.638995084653194, "grad_norm": 10.65534782409668, "learning_rate": 1.4904833836858007e-05, "loss": 0.2327, "step": 14325 }, { "epoch": 15.666302566903331, "grad_norm": 5.540506362915039, "learning_rate": 1.4876510574018128e-05, "loss": 0.273, "step": 14350 }, { "epoch": 15.693610049153468, "grad_norm": 9.557373046875, "learning_rate": 1.4848187311178249e-05, "loss": 0.2274, "step": 14375 }, { "epoch": 15.720917531403604, "grad_norm": 2.341871738433838, "learning_rate": 1.4819864048338368e-05, "loss": 0.2212, "step": 14400 }, { "epoch": 15.748225013653741, "grad_norm": 4.712292194366455, "learning_rate": 1.479154078549849e-05, "loss": 0.2922, "step": 14425 }, { "epoch": 15.775532495903878, "grad_norm": 5.306559085845947, "learning_rate": 1.476321752265861e-05, "loss": 0.2448, "step": 14450 }, { "epoch": 15.802839978154013, "grad_norm": 18.241809844970703, "learning_rate": 1.4734894259818732e-05, "loss": 0.2617, "step": 14475 }, { "epoch": 15.83014746040415, "grad_norm": 18.331239700317383, "learning_rate": 1.4706570996978852e-05, "loss": 0.2434, "step": 14500 }, { "epoch": 15.857454942654288, "grad_norm": 10.277040481567383, "learning_rate": 1.4678247734138974e-05, "loss": 0.2746, "step": 14525 }, { "epoch": 15.884762424904423, "grad_norm": 9.365705490112305, "learning_rate": 1.4649924471299094e-05, "loss": 0.2834, "step": 14550 }, { "epoch": 15.91206990715456, "grad_norm": 22.495763778686523, "learning_rate": 1.4621601208459215e-05, "loss": 0.2187, "step": 14575 }, { "epoch": 15.939377389404697, "grad_norm": 3.7131130695343018, "learning_rate": 1.4593277945619336e-05, "loss": 0.2504, "step": 14600 }, { "epoch": 15.966684871654833, "grad_norm": 4.95455265045166, "learning_rate": 1.4564954682779457e-05, "loss": 0.2378, "step": 14625 }, { "epoch": 15.99399235390497, "grad_norm": 2.6789755821228027, "learning_rate": 1.4536631419939578e-05, "loss": 0.2646, "step": 14650 }, { "epoch": 16.020753686510105, "grad_norm": 6.716854572296143, "learning_rate": 1.4508308157099699e-05, "loss": 0.2062, "step": 14675 }, { "epoch": 16.048061168760242, "grad_norm": 13.50964641571045, "learning_rate": 1.447998489425982e-05, "loss": 0.2292, "step": 14700 }, { "epoch": 16.075368651010375, "grad_norm": 4.608625411987305, "learning_rate": 1.445166163141994e-05, "loss": 0.2461, "step": 14725 }, { "epoch": 16.102676133260513, "grad_norm": 7.282268524169922, "learning_rate": 1.442333836858006e-05, "loss": 0.2409, "step": 14750 }, { "epoch": 16.12998361551065, "grad_norm": 6.0458455085754395, "learning_rate": 1.4395015105740183e-05, "loss": 0.2846, "step": 14775 }, { "epoch": 16.157291097760787, "grad_norm": 8.913979530334473, "learning_rate": 1.4366691842900302e-05, "loss": 0.2595, "step": 14800 }, { "epoch": 16.184598580010924, "grad_norm": 4.441019058227539, "learning_rate": 1.4338368580060423e-05, "loss": 0.2385, "step": 14825 }, { "epoch": 16.21190606226106, "grad_norm": 9.358894348144531, "learning_rate": 1.4310045317220544e-05, "loss": 0.2251, "step": 14850 }, { "epoch": 16.239213544511195, "grad_norm": 6.088677406311035, "learning_rate": 1.4281722054380665e-05, "loss": 0.223, "step": 14875 }, { "epoch": 16.266521026761332, "grad_norm": 35.75179672241211, "learning_rate": 1.4253398791540786e-05, "loss": 0.2167, "step": 14900 }, { "epoch": 16.29382850901147, "grad_norm": 5.356253623962402, "learning_rate": 1.4225075528700907e-05, "loss": 0.2704, "step": 14925 }, { "epoch": 16.321135991261606, "grad_norm": 11.521393775939941, "learning_rate": 1.4196752265861028e-05, "loss": 0.2628, "step": 14950 }, { "epoch": 16.348443473511743, "grad_norm": 7.684365272521973, "learning_rate": 1.4168429003021147e-05, "loss": 0.3086, "step": 14975 }, { "epoch": 16.37575095576188, "grad_norm": 6.752121448516846, "learning_rate": 1.414010574018127e-05, "loss": 0.2251, "step": 15000 }, { "epoch": 16.37575095576188, "eval_cer": 0.13930817610062893, "eval_loss": 0.7178555727005005, "eval_model_preparation_time": 0.005, "eval_runtime": 113.513, "eval_samples_per_second": 44.356, "eval_steps_per_second": 5.55, "eval_wer": 0.3539614843475202, "step": 15000 }, { "epoch": 16.403058438012014, "grad_norm": 10.246522903442383, "learning_rate": 1.4111782477341389e-05, "loss": 0.2757, "step": 15025 }, { "epoch": 16.43036592026215, "grad_norm": 16.91344451904297, "learning_rate": 1.4083459214501512e-05, "loss": 0.2591, "step": 15050 }, { "epoch": 16.457673402512288, "grad_norm": 5.796011447906494, "learning_rate": 1.4055135951661631e-05, "loss": 0.2089, "step": 15075 }, { "epoch": 16.484980884762425, "grad_norm": 8.164037704467773, "learning_rate": 1.4026812688821754e-05, "loss": 0.2417, "step": 15100 }, { "epoch": 16.512288367012562, "grad_norm": 15.447609901428223, "learning_rate": 1.3998489425981873e-05, "loss": 0.2205, "step": 15125 }, { "epoch": 16.5395958492627, "grad_norm": 18.767681121826172, "learning_rate": 1.3970166163141994e-05, "loss": 0.257, "step": 15150 }, { "epoch": 16.566903331512833, "grad_norm": 2.680795907974243, "learning_rate": 1.3941842900302115e-05, "loss": 0.2148, "step": 15175 }, { "epoch": 16.59421081376297, "grad_norm": 5.706177234649658, "learning_rate": 1.3913519637462236e-05, "loss": 0.2471, "step": 15200 }, { "epoch": 16.621518296013107, "grad_norm": 5.816301345825195, "learning_rate": 1.3885196374622357e-05, "loss": 0.2608, "step": 15225 }, { "epoch": 16.648825778263245, "grad_norm": 20.594526290893555, "learning_rate": 1.3856873111782478e-05, "loss": 0.2462, "step": 15250 }, { "epoch": 16.67613326051338, "grad_norm": 2.347273826599121, "learning_rate": 1.3828549848942599e-05, "loss": 0.2288, "step": 15275 }, { "epoch": 16.70344074276352, "grad_norm": 5.950928688049316, "learning_rate": 1.380022658610272e-05, "loss": 0.2443, "step": 15300 }, { "epoch": 16.730748225013652, "grad_norm": 5.35457706451416, "learning_rate": 1.377190332326284e-05, "loss": 0.2389, "step": 15325 }, { "epoch": 16.75805570726379, "grad_norm": 7.508402347564697, "learning_rate": 1.3743580060422962e-05, "loss": 0.2464, "step": 15350 }, { "epoch": 16.785363189513927, "grad_norm": 3.006135940551758, "learning_rate": 1.3715256797583081e-05, "loss": 0.2021, "step": 15375 }, { "epoch": 16.812670671764064, "grad_norm": 29.98521614074707, "learning_rate": 1.3686933534743204e-05, "loss": 0.2471, "step": 15400 }, { "epoch": 16.8399781540142, "grad_norm": 4.480222225189209, "learning_rate": 1.3658610271903323e-05, "loss": 0.2164, "step": 15425 }, { "epoch": 16.867285636264338, "grad_norm": 5.716984748840332, "learning_rate": 1.3630287009063446e-05, "loss": 0.2426, "step": 15450 }, { "epoch": 16.89459311851447, "grad_norm": 5.612983703613281, "learning_rate": 1.3601963746223565e-05, "loss": 0.2176, "step": 15475 }, { "epoch": 16.92190060076461, "grad_norm": 6.431447505950928, "learning_rate": 1.3573640483383688e-05, "loss": 0.2307, "step": 15500 }, { "epoch": 16.949208083014746, "grad_norm": 2.398240566253662, "learning_rate": 1.3545317220543807e-05, "loss": 0.263, "step": 15525 }, { "epoch": 16.976515565264883, "grad_norm": 18.563369750976562, "learning_rate": 1.3516993957703928e-05, "loss": 0.2278, "step": 15550 }, { "epoch": 17.003276897870016, "grad_norm": 5.476058006286621, "learning_rate": 1.3488670694864049e-05, "loss": 0.2722, "step": 15575 }, { "epoch": 17.030584380120153, "grad_norm": 5.298595905303955, "learning_rate": 1.3460347432024168e-05, "loss": 0.2182, "step": 15600 }, { "epoch": 17.05789186237029, "grad_norm": 2.9727327823638916, "learning_rate": 1.3432024169184291e-05, "loss": 0.2316, "step": 15625 }, { "epoch": 17.085199344620428, "grad_norm": 5.847138404846191, "learning_rate": 1.340370090634441e-05, "loss": 0.1956, "step": 15650 }, { "epoch": 17.11250682687056, "grad_norm": 2.3735570907592773, "learning_rate": 1.3375377643504533e-05, "loss": 0.2291, "step": 15675 }, { "epoch": 17.1398143091207, "grad_norm": 4.967169284820557, "learning_rate": 1.3347054380664652e-05, "loss": 0.2089, "step": 15700 }, { "epoch": 17.167121791370835, "grad_norm": 3.368574619293213, "learning_rate": 1.3318731117824773e-05, "loss": 0.2403, "step": 15725 }, { "epoch": 17.194429273620973, "grad_norm": 3.802225351333618, "learning_rate": 1.3290407854984894e-05, "loss": 0.168, "step": 15750 }, { "epoch": 17.22173675587111, "grad_norm": 5.438094615936279, "learning_rate": 1.3262084592145015e-05, "loss": 0.3152, "step": 15775 }, { "epoch": 17.249044238121247, "grad_norm": 49.53477478027344, "learning_rate": 1.3233761329305136e-05, "loss": 0.2178, "step": 15800 }, { "epoch": 17.27635172037138, "grad_norm": 15.599020957946777, "learning_rate": 1.3205438066465257e-05, "loss": 0.3078, "step": 15825 }, { "epoch": 17.303659202621517, "grad_norm": 4.3582048416137695, "learning_rate": 1.3177114803625378e-05, "loss": 0.1965, "step": 15850 }, { "epoch": 17.330966684871655, "grad_norm": 12.83676815032959, "learning_rate": 1.3148791540785499e-05, "loss": 0.2924, "step": 15875 }, { "epoch": 17.35827416712179, "grad_norm": 6.862155437469482, "learning_rate": 1.312046827794562e-05, "loss": 0.2223, "step": 15900 }, { "epoch": 17.38558164937193, "grad_norm": 5.615942478179932, "learning_rate": 1.3092145015105741e-05, "loss": 0.2736, "step": 15925 }, { "epoch": 17.412889131622066, "grad_norm": 3.966806173324585, "learning_rate": 1.306382175226586e-05, "loss": 0.1937, "step": 15950 }, { "epoch": 17.4401966138722, "grad_norm": 6.684864521026611, "learning_rate": 1.3035498489425983e-05, "loss": 0.2869, "step": 15975 }, { "epoch": 17.467504096122337, "grad_norm": 16.13443374633789, "learning_rate": 1.3007175226586102e-05, "loss": 0.1781, "step": 16000 }, { "epoch": 17.467504096122337, "eval_cer": 0.13811520415294, "eval_loss": 0.733893632888794, "eval_model_preparation_time": 0.005, "eval_runtime": 118.2912, "eval_samples_per_second": 42.564, "eval_steps_per_second": 5.326, "eval_wer": 0.35268642279282447, "step": 16000 }, { "epoch": 17.494811578372474, "grad_norm": 17.215211868286133, "learning_rate": 1.2978851963746225e-05, "loss": 0.2604, "step": 16025 }, { "epoch": 17.52211906062261, "grad_norm": 7.223541259765625, "learning_rate": 1.2950528700906344e-05, "loss": 0.1861, "step": 16050 }, { "epoch": 17.549426542872748, "grad_norm": 42.307403564453125, "learning_rate": 1.2922205438066467e-05, "loss": 0.3129, "step": 16075 }, { "epoch": 17.576734025122885, "grad_norm": 19.543766021728516, "learning_rate": 1.2893882175226586e-05, "loss": 0.2051, "step": 16100 }, { "epoch": 17.60404150737302, "grad_norm": 3.4581665992736816, "learning_rate": 1.2865558912386707e-05, "loss": 0.2348, "step": 16125 }, { "epoch": 17.631348989623156, "grad_norm": 7.685246467590332, "learning_rate": 1.2837235649546828e-05, "loss": 0.2046, "step": 16150 }, { "epoch": 17.658656471873293, "grad_norm": 4.313009262084961, "learning_rate": 1.2808912386706949e-05, "loss": 0.2607, "step": 16175 }, { "epoch": 17.68596395412343, "grad_norm": 3.2778303623199463, "learning_rate": 1.278058912386707e-05, "loss": 0.2133, "step": 16200 }, { "epoch": 17.713271436373567, "grad_norm": 4.379212379455566, "learning_rate": 1.2752265861027191e-05, "loss": 0.2181, "step": 16225 }, { "epoch": 17.740578918623704, "grad_norm": 11.715255737304688, "learning_rate": 1.2723942598187312e-05, "loss": 0.1923, "step": 16250 }, { "epoch": 17.767886400873838, "grad_norm": 5.89854097366333, "learning_rate": 1.2695619335347433e-05, "loss": 0.2107, "step": 16275 }, { "epoch": 17.795193883123975, "grad_norm": 11.6561861038208, "learning_rate": 1.2667296072507554e-05, "loss": 0.2006, "step": 16300 }, { "epoch": 17.822501365374112, "grad_norm": 6.303864479064941, "learning_rate": 1.2638972809667673e-05, "loss": 0.267, "step": 16325 }, { "epoch": 17.84980884762425, "grad_norm": 9.764203071594238, "learning_rate": 1.2610649546827794e-05, "loss": 0.2355, "step": 16350 }, { "epoch": 17.877116329874386, "grad_norm": 2.418781280517578, "learning_rate": 1.2582326283987915e-05, "loss": 0.2351, "step": 16375 }, { "epoch": 17.904423812124524, "grad_norm": 5.107450485229492, "learning_rate": 1.2554003021148036e-05, "loss": 0.2076, "step": 16400 }, { "epoch": 17.931731294374657, "grad_norm": 5.235945701599121, "learning_rate": 1.2525679758308157e-05, "loss": 0.2327, "step": 16425 }, { "epoch": 17.959038776624794, "grad_norm": 4.249573707580566, "learning_rate": 1.2497356495468278e-05, "loss": 0.1982, "step": 16450 }, { "epoch": 17.98634625887493, "grad_norm": 3.8613998889923096, "learning_rate": 1.24690332326284e-05, "loss": 0.2505, "step": 16475 }, { "epoch": 18.013107591480065, "grad_norm": 5.7005934715271, "learning_rate": 1.244070996978852e-05, "loss": 0.2021, "step": 16500 }, { "epoch": 18.040415073730202, "grad_norm": 6.706546783447266, "learning_rate": 1.241238670694864e-05, "loss": 0.18, "step": 16525 }, { "epoch": 18.06772255598034, "grad_norm": 4.378579139709473, "learning_rate": 1.2384063444108762e-05, "loss": 0.2004, "step": 16550 }, { "epoch": 18.095030038230476, "grad_norm": 7.8048601150512695, "learning_rate": 1.2355740181268881e-05, "loss": 0.2322, "step": 16575 }, { "epoch": 18.122337520480613, "grad_norm": 7.397279262542725, "learning_rate": 1.2327416918429004e-05, "loss": 0.2289, "step": 16600 }, { "epoch": 18.149645002730747, "grad_norm": 3.800093650817871, "learning_rate": 1.2299093655589123e-05, "loss": 0.1785, "step": 16625 }, { "epoch": 18.176952484980884, "grad_norm": 3.938688278198242, "learning_rate": 1.2270770392749246e-05, "loss": 0.1911, "step": 16650 }, { "epoch": 18.20425996723102, "grad_norm": 6.401691913604736, "learning_rate": 1.2242447129909365e-05, "loss": 0.177, "step": 16675 }, { "epoch": 18.231567449481158, "grad_norm": 4.716271877288818, "learning_rate": 1.2214123867069486e-05, "loss": 0.2542, "step": 16700 }, { "epoch": 18.258874931731295, "grad_norm": 4.8476080894470215, "learning_rate": 1.2185800604229607e-05, "loss": 0.1852, "step": 16725 }, { "epoch": 18.286182413981432, "grad_norm": 12.746408462524414, "learning_rate": 1.2157477341389728e-05, "loss": 0.205, "step": 16750 }, { "epoch": 18.313489896231566, "grad_norm": 2.8395423889160156, "learning_rate": 1.212915407854985e-05, "loss": 0.1956, "step": 16775 }, { "epoch": 18.340797378481703, "grad_norm": 2.555696725845337, "learning_rate": 1.210083081570997e-05, "loss": 0.2556, "step": 16800 }, { "epoch": 18.36810486073184, "grad_norm": 7.5078253746032715, "learning_rate": 1.2072507552870091e-05, "loss": 0.2159, "step": 16825 }, { "epoch": 18.395412342981977, "grad_norm": 10.618643760681152, "learning_rate": 1.2044184290030212e-05, "loss": 0.2505, "step": 16850 }, { "epoch": 18.422719825232114, "grad_norm": 2.813826322555542, "learning_rate": 1.2015861027190333e-05, "loss": 0.2052, "step": 16875 }, { "epoch": 18.45002730748225, "grad_norm": 3.9112982749938965, "learning_rate": 1.1987537764350454e-05, "loss": 0.2513, "step": 16900 }, { "epoch": 18.477334789732385, "grad_norm": 7.779905319213867, "learning_rate": 1.1959214501510573e-05, "loss": 0.1906, "step": 16925 }, { "epoch": 18.504642271982522, "grad_norm": 5.81519079208374, "learning_rate": 1.1930891238670696e-05, "loss": 0.2248, "step": 16950 }, { "epoch": 18.53194975423266, "grad_norm": 4.393283843994141, "learning_rate": 1.1902567975830815e-05, "loss": 0.2114, "step": 16975 }, { "epoch": 18.559257236482797, "grad_norm": 4.763165473937988, "learning_rate": 1.1874244712990938e-05, "loss": 0.2163, "step": 17000 }, { "epoch": 18.559257236482797, "eval_cer": 0.1401317759808326, "eval_loss": 0.724510133266449, "eval_model_preparation_time": 0.005, "eval_runtime": 113.5812, "eval_samples_per_second": 44.33, "eval_steps_per_second": 5.547, "eval_wer": 0.3588638761871263, "step": 17000 }, { "epoch": 18.586564718732934, "grad_norm": 4.05600643157959, "learning_rate": 1.1845921450151057e-05, "loss": 0.1855, "step": 17025 }, { "epoch": 18.61387220098307, "grad_norm": 4.192597389221191, "learning_rate": 1.181759818731118e-05, "loss": 0.2186, "step": 17050 }, { "epoch": 18.641179683233204, "grad_norm": 4.396621227264404, "learning_rate": 1.17892749244713e-05, "loss": 0.2083, "step": 17075 }, { "epoch": 18.66848716548334, "grad_norm": 4.536408424377441, "learning_rate": 1.1760951661631419e-05, "loss": 0.2231, "step": 17100 }, { "epoch": 18.69579464773348, "grad_norm": 14.891438484191895, "learning_rate": 1.1732628398791541e-05, "loss": 0.2102, "step": 17125 }, { "epoch": 18.723102129983616, "grad_norm": 6.053021430969238, "learning_rate": 1.170430513595166e-05, "loss": 0.2071, "step": 17150 }, { "epoch": 18.750409612233753, "grad_norm": 44.98186111450195, "learning_rate": 1.1675981873111783e-05, "loss": 0.181, "step": 17175 }, { "epoch": 18.77771709448389, "grad_norm": 13.360466003417969, "learning_rate": 1.1647658610271903e-05, "loss": 0.2348, "step": 17200 }, { "epoch": 18.805024576734024, "grad_norm": 5.033117771148682, "learning_rate": 1.1619335347432025e-05, "loss": 0.2316, "step": 17225 }, { "epoch": 18.83233205898416, "grad_norm": 4.777559757232666, "learning_rate": 1.1591012084592144e-05, "loss": 0.2103, "step": 17250 }, { "epoch": 18.859639541234298, "grad_norm": 6.499094009399414, "learning_rate": 1.1562688821752267e-05, "loss": 0.222, "step": 17275 }, { "epoch": 18.886947023484435, "grad_norm": 7.2833476066589355, "learning_rate": 1.1534365558912386e-05, "loss": 0.2172, "step": 17300 }, { "epoch": 18.914254505734572, "grad_norm": 3.4236626625061035, "learning_rate": 1.1506042296072507e-05, "loss": 0.198, "step": 17325 }, { "epoch": 18.94156198798471, "grad_norm": 6.557002067565918, "learning_rate": 1.1477719033232628e-05, "loss": 0.2794, "step": 17350 }, { "epoch": 18.968869470234843, "grad_norm": 10.484264373779297, "learning_rate": 1.144939577039275e-05, "loss": 0.1959, "step": 17375 }, { "epoch": 18.99617695248498, "grad_norm": 9.944290161132812, "learning_rate": 1.142107250755287e-05, "loss": 0.2224, "step": 17400 }, { "epoch": 19.022938285090113, "grad_norm": 5.705021858215332, "learning_rate": 1.1392749244712991e-05, "loss": 0.2042, "step": 17425 }, { "epoch": 19.05024576734025, "grad_norm": 8.89173412322998, "learning_rate": 1.1364425981873112e-05, "loss": 0.2321, "step": 17450 }, { "epoch": 19.077553249590387, "grad_norm": 78.8735580444336, "learning_rate": 1.1336102719033233e-05, "loss": 0.1802, "step": 17475 }, { "epoch": 19.104860731840525, "grad_norm": 9.61742115020752, "learning_rate": 1.1307779456193353e-05, "loss": 0.225, "step": 17500 }, { "epoch": 19.13216821409066, "grad_norm": 6.6852827072143555, "learning_rate": 1.1279456193353475e-05, "loss": 0.2121, "step": 17525 }, { "epoch": 19.1594756963408, "grad_norm": 69.73776245117188, "learning_rate": 1.1251132930513595e-05, "loss": 0.2036, "step": 17550 }, { "epoch": 19.186783178590932, "grad_norm": 4.272860527038574, "learning_rate": 1.1222809667673717e-05, "loss": 0.2344, "step": 17575 }, { "epoch": 19.21409066084107, "grad_norm": 12.850485801696777, "learning_rate": 1.1194486404833837e-05, "loss": 0.2044, "step": 17600 }, { "epoch": 19.241398143091207, "grad_norm": 10.330352783203125, "learning_rate": 1.116616314199396e-05, "loss": 0.21, "step": 17625 }, { "epoch": 19.268705625341344, "grad_norm": 10.894495010375977, "learning_rate": 1.1137839879154079e-05, "loss": 0.248, "step": 17650 }, { "epoch": 19.29601310759148, "grad_norm": 5.251489162445068, "learning_rate": 1.1109516616314201e-05, "loss": 0.175, "step": 17675 }, { "epoch": 19.323320589841618, "grad_norm": 8.815728187561035, "learning_rate": 1.108119335347432e-05, "loss": 0.1893, "step": 17700 }, { "epoch": 19.35062807209175, "grad_norm": 6.117236614227295, "learning_rate": 1.1052870090634441e-05, "loss": 0.1994, "step": 17725 }, { "epoch": 19.37793555434189, "grad_norm": 6.992276191711426, "learning_rate": 1.1024546827794562e-05, "loss": 0.216, "step": 17750 }, { "epoch": 19.405243036592026, "grad_norm": 9.3810396194458, "learning_rate": 1.0996223564954683e-05, "loss": 0.2425, "step": 17775 }, { "epoch": 19.432550518842163, "grad_norm": 4.415302753448486, "learning_rate": 1.0967900302114804e-05, "loss": 0.19, "step": 17800 }, { "epoch": 19.4598580010923, "grad_norm": 4.029896259307861, "learning_rate": 1.0939577039274924e-05, "loss": 0.1787, "step": 17825 }, { "epoch": 19.487165483342437, "grad_norm": 15.452176094055176, "learning_rate": 1.0911253776435046e-05, "loss": 0.2427, "step": 17850 }, { "epoch": 19.51447296559257, "grad_norm": 7.432787895202637, "learning_rate": 1.0882930513595166e-05, "loss": 0.1982, "step": 17875 }, { "epoch": 19.541780447842708, "grad_norm": 6.447238922119141, "learning_rate": 1.0854607250755287e-05, "loss": 0.2044, "step": 17900 }, { "epoch": 19.569087930092845, "grad_norm": 3.499976873397827, "learning_rate": 1.0826283987915408e-05, "loss": 0.1991, "step": 17925 }, { "epoch": 19.596395412342982, "grad_norm": 6.633445739746094, "learning_rate": 1.0797960725075529e-05, "loss": 0.2103, "step": 17950 }, { "epoch": 19.62370289459312, "grad_norm": 4.407260417938232, "learning_rate": 1.076963746223565e-05, "loss": 0.2041, "step": 17975 }, { "epoch": 19.651010376843256, "grad_norm": 6.96198034286499, "learning_rate": 1.074131419939577e-05, "loss": 0.2013, "step": 18000 }, { "epoch": 19.651010376843256, "eval_cer": 0.1365778177098932, "eval_loss": 0.7598505020141602, "eval_model_preparation_time": 0.005, "eval_runtime": 113.4883, "eval_samples_per_second": 44.366, "eval_steps_per_second": 5.551, "eval_wer": 0.3485094970102005, "step": 18000 }, { "epoch": 19.67831785909339, "grad_norm": 5.1107401847839355, "learning_rate": 1.0712990936555892e-05, "loss": 0.192, "step": 18025 }, { "epoch": 19.705625341343527, "grad_norm": 14.349475860595703, "learning_rate": 1.0684667673716013e-05, "loss": 0.2143, "step": 18050 }, { "epoch": 19.732932823593664, "grad_norm": 4.009053707122803, "learning_rate": 1.0656344410876132e-05, "loss": 0.1897, "step": 18075 }, { "epoch": 19.7602403058438, "grad_norm": 7.854366302490234, "learning_rate": 1.0628021148036254e-05, "loss": 0.2031, "step": 18100 }, { "epoch": 19.78754778809394, "grad_norm": 7.2458319664001465, "learning_rate": 1.0599697885196374e-05, "loss": 0.188, "step": 18125 }, { "epoch": 19.814855270344076, "grad_norm": 17.989526748657227, "learning_rate": 1.0571374622356496e-05, "loss": 0.2197, "step": 18150 }, { "epoch": 19.84216275259421, "grad_norm": 12.014328002929688, "learning_rate": 1.0543051359516616e-05, "loss": 0.1948, "step": 18175 }, { "epoch": 19.869470234844346, "grad_norm": 6.952954292297363, "learning_rate": 1.0514728096676738e-05, "loss": 0.2316, "step": 18200 }, { "epoch": 19.896777717094484, "grad_norm": 6.147061347961426, "learning_rate": 1.0486404833836858e-05, "loss": 0.2109, "step": 18225 }, { "epoch": 19.92408519934462, "grad_norm": 7.00918436050415, "learning_rate": 1.045808157099698e-05, "loss": 0.1831, "step": 18250 }, { "epoch": 19.951392681594758, "grad_norm": 5.870749473571777, "learning_rate": 1.04297583081571e-05, "loss": 0.2208, "step": 18275 }, { "epoch": 19.978700163844895, "grad_norm": 4.460132122039795, "learning_rate": 1.040143504531722e-05, "loss": 0.2171, "step": 18300 }, { "epoch": 20.005461496450028, "grad_norm": 7.277628421783447, "learning_rate": 1.0373111782477342e-05, "loss": 0.2301, "step": 18325 }, { "epoch": 20.032768978700165, "grad_norm": 6.43742036819458, "learning_rate": 1.0344788519637463e-05, "loss": 0.1675, "step": 18350 }, { "epoch": 20.0600764609503, "grad_norm": 13.237269401550293, "learning_rate": 1.0316465256797584e-05, "loss": 0.1711, "step": 18375 }, { "epoch": 20.087383943200436, "grad_norm": 6.0299224853515625, "learning_rate": 1.0288141993957705e-05, "loss": 0.1857, "step": 18400 }, { "epoch": 20.114691425450573, "grad_norm": 7.192684173583984, "learning_rate": 1.0259818731117826e-05, "loss": 0.2043, "step": 18425 }, { "epoch": 20.14199890770071, "grad_norm": 6.2722344398498535, "learning_rate": 1.0231495468277947e-05, "loss": 0.175, "step": 18450 }, { "epoch": 20.169306389950847, "grad_norm": 4.916136741638184, "learning_rate": 1.0203172205438066e-05, "loss": 0.1881, "step": 18475 }, { "epoch": 20.196613872200984, "grad_norm": 4.946478366851807, "learning_rate": 1.0174848942598188e-05, "loss": 0.1669, "step": 18500 }, { "epoch": 20.223921354451118, "grad_norm": 2.791131019592285, "learning_rate": 1.0146525679758308e-05, "loss": 0.1952, "step": 18525 }, { "epoch": 20.251228836701255, "grad_norm": 6.574548244476318, "learning_rate": 1.011820241691843e-05, "loss": 0.1893, "step": 18550 }, { "epoch": 20.278536318951392, "grad_norm": 4.7845964431762695, "learning_rate": 1.008987915407855e-05, "loss": 0.2305, "step": 18575 }, { "epoch": 20.30584380120153, "grad_norm": 5.583712100982666, "learning_rate": 1.006155589123867e-05, "loss": 0.1856, "step": 18600 }, { "epoch": 20.333151283451667, "grad_norm": 4.1546101570129395, "learning_rate": 1.0033232628398792e-05, "loss": 0.1855, "step": 18625 }, { "epoch": 20.360458765701804, "grad_norm": 3.7651526927948, "learning_rate": 1.0004909365558913e-05, "loss": 0.1485, "step": 18650 }, { "epoch": 20.387766247951937, "grad_norm": 6.65753698348999, "learning_rate": 9.976586102719034e-06, "loss": 0.2068, "step": 18675 }, { "epoch": 20.415073730202074, "grad_norm": 10.027351379394531, "learning_rate": 9.948262839879153e-06, "loss": 0.1614, "step": 18700 }, { "epoch": 20.44238121245221, "grad_norm": 2.925196647644043, "learning_rate": 9.919939577039276e-06, "loss": 0.2187, "step": 18725 }, { "epoch": 20.46968869470235, "grad_norm": 9.77160930633545, "learning_rate": 9.891616314199395e-06, "loss": 0.1481, "step": 18750 }, { "epoch": 20.496996176952486, "grad_norm": 13.306212425231934, "learning_rate": 9.863293051359518e-06, "loss": 0.2294, "step": 18775 }, { "epoch": 20.524303659202623, "grad_norm": 8.447687149047852, "learning_rate": 9.834969788519637e-06, "loss": 0.1656, "step": 18800 }, { "epoch": 20.551611141452756, "grad_norm": 3.837935447692871, "learning_rate": 9.80664652567976e-06, "loss": 0.2033, "step": 18825 }, { "epoch": 20.578918623702894, "grad_norm": 5.454202651977539, "learning_rate": 9.778323262839879e-06, "loss": 0.1717, "step": 18850 }, { "epoch": 20.60622610595303, "grad_norm": 20.66252899169922, "learning_rate": 9.75e-06, "loss": 0.2001, "step": 18875 }, { "epoch": 20.633533588203168, "grad_norm": 3.1599998474121094, "learning_rate": 9.72167673716012e-06, "loss": 0.1566, "step": 18900 }, { "epoch": 20.660841070453305, "grad_norm": 11.816332817077637, "learning_rate": 9.693353474320242e-06, "loss": 0.2379, "step": 18925 }, { "epoch": 20.688148552703442, "grad_norm": 4.934301376342773, "learning_rate": 9.665030211480363e-06, "loss": 0.1693, "step": 18950 }, { "epoch": 20.715456034953576, "grad_norm": 3.6345667839050293, "learning_rate": 9.636706948640484e-06, "loss": 0.1984, "step": 18975 }, { "epoch": 20.742763517203713, "grad_norm": 3.2392148971557617, "learning_rate": 9.608383685800605e-06, "loss": 0.1735, "step": 19000 }, { "epoch": 20.742763517203713, "eval_cer": 0.13478586403114706, "eval_loss": 0.7414463758468628, "eval_model_preparation_time": 0.005, "eval_runtime": 113.2655, "eval_samples_per_second": 44.453, "eval_steps_per_second": 5.562, "eval_wer": 0.3470585648962364, "step": 19000 }, { "epoch": 20.77007099945385, "grad_norm": 2.2580018043518066, "learning_rate": 9.580060422960726e-06, "loss": 0.2803, "step": 19025 }, { "epoch": 20.797378481703987, "grad_norm": 6.964929103851318, "learning_rate": 9.551737160120847e-06, "loss": 0.1935, "step": 19050 }, { "epoch": 20.824685963954124, "grad_norm": 3.8643765449523926, "learning_rate": 9.523413897280968e-06, "loss": 0.2065, "step": 19075 }, { "epoch": 20.85199344620426, "grad_norm": 6.309064865112305, "learning_rate": 9.495090634441087e-06, "loss": 0.163, "step": 19100 }, { "epoch": 20.879300928454395, "grad_norm": 4.935329437255859, "learning_rate": 9.46676737160121e-06, "loss": 0.2016, "step": 19125 }, { "epoch": 20.906608410704532, "grad_norm": 3.096048355102539, "learning_rate": 9.438444108761329e-06, "loss": 0.1889, "step": 19150 }, { "epoch": 20.93391589295467, "grad_norm": 4.483667850494385, "learning_rate": 9.410120845921452e-06, "loss": 0.2463, "step": 19175 }, { "epoch": 20.961223375204806, "grad_norm": 9.028972625732422, "learning_rate": 9.381797583081571e-06, "loss": 0.1612, "step": 19200 }, { "epoch": 20.988530857454943, "grad_norm": 17.47925567626953, "learning_rate": 9.353474320241694e-06, "loss": 0.2072, "step": 19225 }, { "epoch": 21.015292190060077, "grad_norm": 4.751630783081055, "learning_rate": 9.325151057401813e-06, "loss": 0.1501, "step": 19250 }, { "epoch": 21.042599672310214, "grad_norm": 3.349579334259033, "learning_rate": 9.296827794561934e-06, "loss": 0.1787, "step": 19275 }, { "epoch": 21.06990715456035, "grad_norm": 3.1237471103668213, "learning_rate": 9.268504531722055e-06, "loss": 0.1573, "step": 19300 }, { "epoch": 21.097214636810484, "grad_norm": 11.762648582458496, "learning_rate": 9.240181268882176e-06, "loss": 0.1873, "step": 19325 }, { "epoch": 21.12452211906062, "grad_norm": 4.976815700531006, "learning_rate": 9.211858006042297e-06, "loss": 0.2199, "step": 19350 }, { "epoch": 21.15182960131076, "grad_norm": 5.215498924255371, "learning_rate": 9.183534743202416e-06, "loss": 0.1839, "step": 19375 }, { "epoch": 21.179137083560896, "grad_norm": 6.340145111083984, "learning_rate": 9.155211480362539e-06, "loss": 0.2129, "step": 19400 }, { "epoch": 21.206444565811033, "grad_norm": 2.890324354171753, "learning_rate": 9.126888217522658e-06, "loss": 0.1721, "step": 19425 }, { "epoch": 21.23375204806117, "grad_norm": 6.746542453765869, "learning_rate": 9.098564954682779e-06, "loss": 0.1674, "step": 19450 }, { "epoch": 21.261059530311304, "grad_norm": 12.744902610778809, "learning_rate": 9.0702416918429e-06, "loss": 0.1514, "step": 19475 }, { "epoch": 21.28836701256144, "grad_norm": 4.227482795715332, "learning_rate": 9.041918429003021e-06, "loss": 0.1897, "step": 19500 }, { "epoch": 21.315674494811578, "grad_norm": 20.825092315673828, "learning_rate": 9.013595166163142e-06, "loss": 0.1489, "step": 19525 }, { "epoch": 21.342981977061715, "grad_norm": 2.9716689586639404, "learning_rate": 8.985271903323263e-06, "loss": 0.1573, "step": 19550 }, { "epoch": 21.370289459311852, "grad_norm": 25.41474723815918, "learning_rate": 8.956948640483384e-06, "loss": 0.1717, "step": 19575 }, { "epoch": 21.39759694156199, "grad_norm": 6.929635524749756, "learning_rate": 8.928625377643505e-06, "loss": 0.1773, "step": 19600 }, { "epoch": 21.424904423812123, "grad_norm": 8.243063926696777, "learning_rate": 8.900302114803626e-06, "loss": 0.1651, "step": 19625 }, { "epoch": 21.45221190606226, "grad_norm": 5.195458889007568, "learning_rate": 8.871978851963747e-06, "loss": 0.1908, "step": 19650 }, { "epoch": 21.479519388312397, "grad_norm": 6.830530643463135, "learning_rate": 8.843655589123866e-06, "loss": 0.1697, "step": 19675 }, { "epoch": 21.506826870562534, "grad_norm": 8.38962173461914, "learning_rate": 8.815332326283989e-06, "loss": 0.174, "step": 19700 }, { "epoch": 21.53413435281267, "grad_norm": 7.874443531036377, "learning_rate": 8.787009063444108e-06, "loss": 0.2194, "step": 19725 }, { "epoch": 21.56144183506281, "grad_norm": 5.002943515777588, "learning_rate": 8.75868580060423e-06, "loss": 0.2039, "step": 19750 }, { "epoch": 21.588749317312942, "grad_norm": 14.03562068939209, "learning_rate": 8.73036253776435e-06, "loss": 0.1549, "step": 19775 }, { "epoch": 21.61605679956308, "grad_norm": 5.666706085205078, "learning_rate": 8.702039274924473e-06, "loss": 0.188, "step": 19800 }, { "epoch": 21.643364281813216, "grad_norm": 9.43155288696289, "learning_rate": 8.673716012084592e-06, "loss": 0.2054, "step": 19825 }, { "epoch": 21.670671764063353, "grad_norm": 19.584341049194336, "learning_rate": 8.645392749244713e-06, "loss": 0.1774, "step": 19850 }, { "epoch": 21.69797924631349, "grad_norm": 13.64315414428711, "learning_rate": 8.617069486404834e-06, "loss": 0.1673, "step": 19875 }, { "epoch": 21.725286728563628, "grad_norm": 3.290343999862671, "learning_rate": 8.588746223564955e-06, "loss": 0.1665, "step": 19900 }, { "epoch": 21.75259421081376, "grad_norm": 8.065378189086914, "learning_rate": 8.560422960725076e-06, "loss": 0.2178, "step": 19925 }, { "epoch": 21.7799016930639, "grad_norm": 4.723469257354736, "learning_rate": 8.532099697885197e-06, "loss": 0.1518, "step": 19950 }, { "epoch": 21.807209175314036, "grad_norm": 4.789799213409424, "learning_rate": 8.503776435045318e-06, "loss": 0.196, "step": 19975 }, { "epoch": 21.834516657564173, "grad_norm": 6.750443458557129, "learning_rate": 8.475453172205439e-06, "loss": 0.2111, "step": 20000 }, { "epoch": 21.834516657564173, "eval_cer": 0.13863931316761505, "eval_loss": 0.787697434425354, "eval_model_preparation_time": 0.005, "eval_runtime": 113.6025, "eval_samples_per_second": 44.321, "eval_steps_per_second": 5.546, "eval_wer": 0.35187302145620825, "step": 20000 }, { "epoch": 21.86182413981431, "grad_norm": 13.616910934448242, "learning_rate": 8.44712990936556e-06, "loss": 0.1721, "step": 20025 }, { "epoch": 21.889131622064447, "grad_norm": 5.102693557739258, "learning_rate": 8.418806646525681e-06, "loss": 0.1988, "step": 20050 }, { "epoch": 21.91643910431458, "grad_norm": 3.427734136581421, "learning_rate": 8.3904833836858e-06, "loss": 0.1783, "step": 20075 }, { "epoch": 21.943746586564718, "grad_norm": 6.64509391784668, "learning_rate": 8.362160120845921e-06, "loss": 0.1918, "step": 20100 }, { "epoch": 21.971054068814855, "grad_norm": 6.014474391937256, "learning_rate": 8.333836858006042e-06, "loss": 0.1507, "step": 20125 }, { "epoch": 21.998361551064992, "grad_norm": 8.297235488891602, "learning_rate": 8.305513595166163e-06, "loss": 0.2235, "step": 20150 }, { "epoch": 22.025122883670125, "grad_norm": 4.82846212387085, "learning_rate": 8.277190332326284e-06, "loss": 0.1424, "step": 20175 }, { "epoch": 22.052430365920262, "grad_norm": 15.366902351379395, "learning_rate": 8.248867069486405e-06, "loss": 0.1691, "step": 20200 }, { "epoch": 22.0797378481704, "grad_norm": 3.708165407180786, "learning_rate": 8.220543806646526e-06, "loss": 0.161, "step": 20225 }, { "epoch": 22.107045330420537, "grad_norm": 9.258438110351562, "learning_rate": 8.192220543806645e-06, "loss": 0.2028, "step": 20250 }, { "epoch": 22.13435281267067, "grad_norm": 8.437280654907227, "learning_rate": 8.163897280966768e-06, "loss": 0.1574, "step": 20275 }, { "epoch": 22.161660294920807, "grad_norm": 11.519179344177246, "learning_rate": 8.135574018126887e-06, "loss": 0.184, "step": 20300 }, { "epoch": 22.188967777170944, "grad_norm": 2.6008100509643555, "learning_rate": 8.10725075528701e-06, "loss": 0.1639, "step": 20325 }, { "epoch": 22.21627525942108, "grad_norm": 5.935661792755127, "learning_rate": 8.07892749244713e-06, "loss": 0.1861, "step": 20350 }, { "epoch": 22.24358274167122, "grad_norm": 4.22892951965332, "learning_rate": 8.050604229607252e-06, "loss": 0.1639, "step": 20375 }, { "epoch": 22.270890223921356, "grad_norm": 6.042015075683594, "learning_rate": 8.022280966767371e-06, "loss": 0.1912, "step": 20400 }, { "epoch": 22.29819770617149, "grad_norm": 2.402381658554077, "learning_rate": 7.993957703927492e-06, "loss": 0.1677, "step": 20425 }, { "epoch": 22.325505188421626, "grad_norm": 13.012516975402832, "learning_rate": 7.965634441087613e-06, "loss": 0.1782, "step": 20450 }, { "epoch": 22.352812670671764, "grad_norm": 5.9585185050964355, "learning_rate": 7.937311178247734e-06, "loss": 0.1534, "step": 20475 }, { "epoch": 22.3801201529219, "grad_norm": 6.761185169219971, "learning_rate": 7.908987915407855e-06, "loss": 0.2087, "step": 20500 }, { "epoch": 22.407427635172038, "grad_norm": 4.899408340454102, "learning_rate": 7.880664652567976e-06, "loss": 0.1655, "step": 20525 }, { "epoch": 22.434735117422175, "grad_norm": 7.953681468963623, "learning_rate": 7.852341389728097e-06, "loss": 0.203, "step": 20550 }, { "epoch": 22.46204259967231, "grad_norm": 2.677629232406616, "learning_rate": 7.824018126888218e-06, "loss": 0.156, "step": 20575 }, { "epoch": 22.489350081922446, "grad_norm": 9.2720947265625, "learning_rate": 7.795694864048339e-06, "loss": 0.1826, "step": 20600 }, { "epoch": 22.516657564172583, "grad_norm": 5.543266773223877, "learning_rate": 7.76737160120846e-06, "loss": 0.1945, "step": 20625 }, { "epoch": 22.54396504642272, "grad_norm": 3.1021201610565186, "learning_rate": 7.73904833836858e-06, "loss": 0.1832, "step": 20650 }, { "epoch": 22.571272528672857, "grad_norm": 2.4122323989868164, "learning_rate": 7.710725075528702e-06, "loss": 0.1496, "step": 20675 }, { "epoch": 22.598580010922994, "grad_norm": 13.32986068725586, "learning_rate": 7.682401812688821e-06, "loss": 0.2023, "step": 20700 }, { "epoch": 22.625887493173128, "grad_norm": 2.612722635269165, "learning_rate": 7.654078549848944e-06, "loss": 0.1455, "step": 20725 }, { "epoch": 22.653194975423265, "grad_norm": 19.371938705444336, "learning_rate": 7.625755287009063e-06, "loss": 0.1777, "step": 20750 }, { "epoch": 22.680502457673402, "grad_norm": 3.658571243286133, "learning_rate": 7.597432024169185e-06, "loss": 0.1641, "step": 20775 }, { "epoch": 22.70780993992354, "grad_norm": 50.841800689697266, "learning_rate": 7.569108761329305e-06, "loss": 0.1747, "step": 20800 }, { "epoch": 22.735117422173676, "grad_norm": 21.6749210357666, "learning_rate": 7.540785498489427e-06, "loss": 0.1927, "step": 20825 }, { "epoch": 22.762424904423813, "grad_norm": 24.51923942565918, "learning_rate": 7.512462235649547e-06, "loss": 0.1729, "step": 20850 }, { "epoch": 22.789732386673947, "grad_norm": 2.4759416580200195, "learning_rate": 7.484138972809668e-06, "loss": 0.1655, "step": 20875 }, { "epoch": 22.817039868924084, "grad_norm": 14.733073234558105, "learning_rate": 7.455815709969789e-06, "loss": 0.1706, "step": 20900 }, { "epoch": 22.84434735117422, "grad_norm": 7.581936359405518, "learning_rate": 7.427492447129909e-06, "loss": 0.1182, "step": 20925 }, { "epoch": 22.87165483342436, "grad_norm": 26.281291961669922, "learning_rate": 7.39916918429003e-06, "loss": 0.2076, "step": 20950 }, { "epoch": 22.898962315674495, "grad_norm": 3.956470012664795, "learning_rate": 7.370845921450151e-06, "loss": 0.1834, "step": 20975 }, { "epoch": 22.926269797924633, "grad_norm": 5.649031639099121, "learning_rate": 7.342522658610272e-06, "loss": 0.1937, "step": 21000 }, { "epoch": 22.926269797924633, "eval_cer": 0.13482579614655085, "eval_loss": 0.759463906288147, "eval_model_preparation_time": 0.005, "eval_runtime": 114.4831, "eval_samples_per_second": 43.98, "eval_steps_per_second": 5.503, "eval_wer": 0.3421561730566303, "step": 21000 }, { "epoch": 22.953577280174766, "grad_norm": 5.332810401916504, "learning_rate": 7.314199395770393e-06, "loss": 0.1691, "step": 21025 }, { "epoch": 22.980884762424903, "grad_norm": 2.941478729248047, "learning_rate": 7.285876132930514e-06, "loss": 0.1804, "step": 21050 }, { "epoch": 23.007646095030037, "grad_norm": 3.0560503005981445, "learning_rate": 7.257552870090635e-06, "loss": 0.171, "step": 21075 }, { "epoch": 23.034953577280174, "grad_norm": 6.302406311035156, "learning_rate": 7.229229607250756e-06, "loss": 0.1655, "step": 21100 }, { "epoch": 23.06226105953031, "grad_norm": 4.29976749420166, "learning_rate": 7.200906344410876e-06, "loss": 0.1768, "step": 21125 }, { "epoch": 23.089568541780448, "grad_norm": 6.231293201446533, "learning_rate": 7.172583081570997e-06, "loss": 0.1509, "step": 21150 }, { "epoch": 23.116876024030585, "grad_norm": 2.916163206100464, "learning_rate": 7.144259818731118e-06, "loss": 0.2028, "step": 21175 }, { "epoch": 23.144183506280722, "grad_norm": 3.371551513671875, "learning_rate": 7.115936555891239e-06, "loss": 0.1372, "step": 21200 }, { "epoch": 23.171490988530856, "grad_norm": 2.6216118335723877, "learning_rate": 7.087613293051359e-06, "loss": 0.1769, "step": 21225 }, { "epoch": 23.198798470780993, "grad_norm": 5.322129249572754, "learning_rate": 7.05929003021148e-06, "loss": 0.1405, "step": 21250 }, { "epoch": 23.22610595303113, "grad_norm": 10.875866889953613, "learning_rate": 7.030966767371601e-06, "loss": 0.1628, "step": 21275 }, { "epoch": 23.253413435281267, "grad_norm": 5.03515625, "learning_rate": 7.002643504531722e-06, "loss": 0.1886, "step": 21300 }, { "epoch": 23.280720917531404, "grad_norm": 3.5104548931121826, "learning_rate": 6.9743202416918425e-06, "loss": 0.1736, "step": 21325 }, { "epoch": 23.30802839978154, "grad_norm": 5.782333850860596, "learning_rate": 6.9459969788519634e-06, "loss": 0.1518, "step": 21350 }, { "epoch": 23.335335882031675, "grad_norm": 3.144768476486206, "learning_rate": 6.917673716012084e-06, "loss": 0.1638, "step": 21375 }, { "epoch": 23.362643364281812, "grad_norm": 4.13077974319458, "learning_rate": 6.889350453172205e-06, "loss": 0.1278, "step": 21400 }, { "epoch": 23.38995084653195, "grad_norm": 7.370959281921387, "learning_rate": 6.861027190332326e-06, "loss": 0.1638, "step": 21425 }, { "epoch": 23.417258328782086, "grad_norm": 4.714965343475342, "learning_rate": 6.832703927492447e-06, "loss": 0.1592, "step": 21450 }, { "epoch": 23.444565811032223, "grad_norm": 6.3375983238220215, "learning_rate": 6.804380664652568e-06, "loss": 0.1686, "step": 21475 }, { "epoch": 23.47187329328236, "grad_norm": 4.671372413635254, "learning_rate": 6.7760574018126885e-06, "loss": 0.154, "step": 21500 }, { "epoch": 23.499180775532494, "grad_norm": 4.222255706787109, "learning_rate": 6.7477341389728095e-06, "loss": 0.1563, "step": 21525 }, { "epoch": 23.52648825778263, "grad_norm": 5.797029495239258, "learning_rate": 6.7194108761329304e-06, "loss": 0.184, "step": 21550 }, { "epoch": 23.55379574003277, "grad_norm": 4.035573959350586, "learning_rate": 6.691087613293051e-06, "loss": 0.188, "step": 21575 }, { "epoch": 23.581103222282906, "grad_norm": 12.175357818603516, "learning_rate": 6.662764350453172e-06, "loss": 0.1777, "step": 21600 }, { "epoch": 23.608410704533043, "grad_norm": 4.059628009796143, "learning_rate": 6.634441087613293e-06, "loss": 0.2078, "step": 21625 }, { "epoch": 23.63571818678318, "grad_norm": 10.670134544372559, "learning_rate": 6.606117824773414e-06, "loss": 0.1574, "step": 21650 }, { "epoch": 23.663025669033317, "grad_norm": 5.905535697937012, "learning_rate": 6.577794561933535e-06, "loss": 0.1743, "step": 21675 }, { "epoch": 23.69033315128345, "grad_norm": 8.450610160827637, "learning_rate": 6.5494712990936555e-06, "loss": 0.1371, "step": 21700 }, { "epoch": 23.717640633533588, "grad_norm": 14.728644371032715, "learning_rate": 6.5211480362537765e-06, "loss": 0.1966, "step": 21725 }, { "epoch": 23.744948115783725, "grad_norm": 6.932831287384033, "learning_rate": 6.4928247734138974e-06, "loss": 0.1554, "step": 21750 }, { "epoch": 23.772255598033862, "grad_norm": 5.476907730102539, "learning_rate": 6.4645015105740184e-06, "loss": 0.2075, "step": 21775 }, { "epoch": 23.799563080284, "grad_norm": 6.361727714538574, "learning_rate": 6.436178247734139e-06, "loss": 0.1352, "step": 21800 }, { "epoch": 23.826870562534133, "grad_norm": 3.356269121170044, "learning_rate": 6.40785498489426e-06, "loss": 0.2333, "step": 21825 }, { "epoch": 23.85417804478427, "grad_norm": 7.86431360244751, "learning_rate": 6.379531722054381e-06, "loss": 0.1428, "step": 21850 }, { "epoch": 23.881485527034407, "grad_norm": 2.9966883659362793, "learning_rate": 6.351208459214502e-06, "loss": 0.2194, "step": 21875 }, { "epoch": 23.908793009284544, "grad_norm": 3.616443634033203, "learning_rate": 6.3228851963746225e-06, "loss": 0.1384, "step": 21900 }, { "epoch": 23.93610049153468, "grad_norm": 2.9987924098968506, "learning_rate": 6.2945619335347435e-06, "loss": 0.246, "step": 21925 }, { "epoch": 23.96340797378482, "grad_norm": 12.043025970458984, "learning_rate": 6.2662386706948645e-06, "loss": 0.1484, "step": 21950 }, { "epoch": 23.990715456034955, "grad_norm": 3.3618757724761963, "learning_rate": 6.237915407854985e-06, "loss": 0.2283, "step": 21975 }, { "epoch": 24.01747678864009, "grad_norm": 5.381952285766602, "learning_rate": 6.2095921450151056e-06, "loss": 0.1556, "step": 22000 }, { "epoch": 24.01747678864009, "eval_cer": 0.1347708894878706, "eval_loss": 0.7993986010551453, "eval_model_preparation_time": 0.005, "eval_runtime": 117.8341, "eval_samples_per_second": 42.73, "eval_steps_per_second": 5.346, "eval_wer": 0.34299155821315513, "step": 22000 }, { "epoch": 24.044784270890222, "grad_norm": 3.668212413787842, "learning_rate": 6.1812688821752265e-06, "loss": 0.1192, "step": 22025 }, { "epoch": 24.07209175314036, "grad_norm": 4.826764106750488, "learning_rate": 6.1529456193353475e-06, "loss": 0.162, "step": 22050 }, { "epoch": 24.099399235390496, "grad_norm": 12.960419654846191, "learning_rate": 6.1246223564954685e-06, "loss": 0.1666, "step": 22075 }, { "epoch": 24.126706717640634, "grad_norm": 3.9786198139190674, "learning_rate": 6.096299093655589e-06, "loss": 0.1462, "step": 22100 }, { "epoch": 24.15401419989077, "grad_norm": 1.7786451578140259, "learning_rate": 6.06797583081571e-06, "loss": 0.1689, "step": 22125 }, { "epoch": 24.181321682140908, "grad_norm": 13.860063552856445, "learning_rate": 6.039652567975831e-06, "loss": 0.1691, "step": 22150 }, { "epoch": 24.20862916439104, "grad_norm": 23.523971557617188, "learning_rate": 6.011329305135952e-06, "loss": 0.1926, "step": 22175 }, { "epoch": 24.23593664664118, "grad_norm": 8.396967887878418, "learning_rate": 5.9830060422960726e-06, "loss": 0.1862, "step": 22200 }, { "epoch": 24.263244128891316, "grad_norm": 8.082077026367188, "learning_rate": 5.9546827794561936e-06, "loss": 0.1531, "step": 22225 }, { "epoch": 24.290551611141453, "grad_norm": 6.358676910400391, "learning_rate": 5.9263595166163145e-06, "loss": 0.1503, "step": 22250 }, { "epoch": 24.31785909339159, "grad_norm": 10.676382064819336, "learning_rate": 5.8980362537764355e-06, "loss": 0.1911, "step": 22275 }, { "epoch": 24.345166575641727, "grad_norm": 9.948719024658203, "learning_rate": 5.869712990936556e-06, "loss": 0.175, "step": 22300 }, { "epoch": 24.372474057891864, "grad_norm": 52.42145538330078, "learning_rate": 5.841389728096677e-06, "loss": 0.1688, "step": 22325 }, { "epoch": 24.399781540141998, "grad_norm": 2.957223892211914, "learning_rate": 5.813066465256798e-06, "loss": 0.1572, "step": 22350 }, { "epoch": 24.427089022392135, "grad_norm": 6.417930603027344, "learning_rate": 5.784743202416919e-06, "loss": 0.1436, "step": 22375 }, { "epoch": 24.454396504642272, "grad_norm": 5.883756637573242, "learning_rate": 5.7564199395770396e-06, "loss": 0.155, "step": 22400 }, { "epoch": 24.48170398689241, "grad_norm": 14.40699577331543, "learning_rate": 5.7280966767371606e-06, "loss": 0.1502, "step": 22425 }, { "epoch": 24.509011469142546, "grad_norm": 6.338261604309082, "learning_rate": 5.6997734138972815e-06, "loss": 0.143, "step": 22450 }, { "epoch": 24.53631895139268, "grad_norm": 11.917652130126953, "learning_rate": 5.6714501510574025e-06, "loss": 0.1598, "step": 22475 }, { "epoch": 24.563626433642817, "grad_norm": 10.784324645996094, "learning_rate": 5.643126888217523e-06, "loss": 0.1364, "step": 22500 }, { "epoch": 24.590933915892954, "grad_norm": 6.954102516174316, "learning_rate": 5.614803625377644e-06, "loss": 0.1449, "step": 22525 }, { "epoch": 24.61824139814309, "grad_norm": 2.9454121589660645, "learning_rate": 5.586480362537765e-06, "loss": 0.1458, "step": 22550 }, { "epoch": 24.64554888039323, "grad_norm": 11.300567626953125, "learning_rate": 5.558157099697886e-06, "loss": 0.1497, "step": 22575 }, { "epoch": 24.672856362643365, "grad_norm": 3.563420295715332, "learning_rate": 5.529833836858007e-06, "loss": 0.1462, "step": 22600 }, { "epoch": 24.700163844893503, "grad_norm": 7.293315410614014, "learning_rate": 5.5015105740181276e-06, "loss": 0.1937, "step": 22625 }, { "epoch": 24.727471327143636, "grad_norm": 4.287593841552734, "learning_rate": 5.4731873111782485e-06, "loss": 0.1608, "step": 22650 }, { "epoch": 24.754778809393773, "grad_norm": 11.869462013244629, "learning_rate": 5.444864048338369e-06, "loss": 0.1553, "step": 22675 }, { "epoch": 24.78208629164391, "grad_norm": 4.550280570983887, "learning_rate": 5.41654078549849e-06, "loss": 0.1557, "step": 22700 }, { "epoch": 24.809393773894048, "grad_norm": 4.5214033126831055, "learning_rate": 5.38821752265861e-06, "loss": 0.1507, "step": 22725 }, { "epoch": 24.836701256144185, "grad_norm": 4.5059380531311035, "learning_rate": 5.359894259818731e-06, "loss": 0.1497, "step": 22750 }, { "epoch": 24.86400873839432, "grad_norm": 20.30832862854004, "learning_rate": 5.331570996978852e-06, "loss": 0.1677, "step": 22775 }, { "epoch": 24.891316220644455, "grad_norm": 17.30992889404297, "learning_rate": 5.303247734138973e-06, "loss": 0.1549, "step": 22800 }, { "epoch": 24.918623702894592, "grad_norm": 8.119895935058594, "learning_rate": 5.274924471299094e-06, "loss": 0.1521, "step": 22825 }, { "epoch": 24.94593118514473, "grad_norm": 6.706144332885742, "learning_rate": 5.246601208459215e-06, "loss": 0.1829, "step": 22850 }, { "epoch": 24.973238667394867, "grad_norm": 1.985439419746399, "learning_rate": 5.218277945619335e-06, "loss": 0.1434, "step": 22875 }, { "epoch": 25.0, "grad_norm": 4.487745761871338, "learning_rate": 5.189954682779456e-06, "loss": 0.1929, "step": 22900 }, { "epoch": 25.027307482250137, "grad_norm": 18.072263717651367, "learning_rate": 5.161631419939577e-06, "loss": 0.123, "step": 22925 }, { "epoch": 25.054614964500274, "grad_norm": 19.11573028564453, "learning_rate": 5.133308157099698e-06, "loss": 0.2041, "step": 22950 }, { "epoch": 25.08192244675041, "grad_norm": 3.0868401527404785, "learning_rate": 5.104984894259819e-06, "loss": 0.1215, "step": 22975 }, { "epoch": 25.109229929000545, "grad_norm": 8.652731895446777, "learning_rate": 5.07666163141994e-06, "loss": 0.1719, "step": 23000 }, { "epoch": 25.109229929000545, "eval_cer": 0.13459119496855346, "eval_loss": 0.8182443380355835, "eval_model_preparation_time": 0.005, "eval_runtime": 113.5237, "eval_samples_per_second": 44.352, "eval_steps_per_second": 5.55, "eval_wer": 0.34569556806190643, "step": 23000 }, { "epoch": 25.136537411250682, "grad_norm": 9.375747680664062, "learning_rate": 5.048338368580061e-06, "loss": 0.1619, "step": 23025 }, { "epoch": 25.16384489350082, "grad_norm": 8.43506145477295, "learning_rate": 5.020015105740182e-06, "loss": 0.1768, "step": 23050 }, { "epoch": 25.191152375750956, "grad_norm": 3.910248041152954, "learning_rate": 4.991691842900302e-06, "loss": 0.1305, "step": 23075 }, { "epoch": 25.218459858001093, "grad_norm": 14.783105850219727, "learning_rate": 4.963368580060423e-06, "loss": 0.1677, "step": 23100 }, { "epoch": 25.24576734025123, "grad_norm": 5.4932475090026855, "learning_rate": 4.935045317220544e-06, "loss": 0.1319, "step": 23125 }, { "epoch": 25.273074822501364, "grad_norm": 7.635251998901367, "learning_rate": 4.906722054380665e-06, "loss": 0.2352, "step": 23150 }, { "epoch": 25.3003823047515, "grad_norm": 2.8288440704345703, "learning_rate": 4.878398791540786e-06, "loss": 0.1225, "step": 23175 }, { "epoch": 25.32768978700164, "grad_norm": 7.981184482574463, "learning_rate": 4.850075528700907e-06, "loss": 0.1656, "step": 23200 }, { "epoch": 25.354997269251776, "grad_norm": 4.847443580627441, "learning_rate": 4.821752265861028e-06, "loss": 0.1349, "step": 23225 }, { "epoch": 25.382304751501913, "grad_norm": 12.004508018493652, "learning_rate": 4.793429003021149e-06, "loss": 0.1914, "step": 23250 }, { "epoch": 25.40961223375205, "grad_norm": 5.195298194885254, "learning_rate": 4.765105740181269e-06, "loss": 0.1058, "step": 23275 }, { "epoch": 25.436919716002183, "grad_norm": 27.71015167236328, "learning_rate": 4.73678247734139e-06, "loss": 0.2116, "step": 23300 }, { "epoch": 25.46422719825232, "grad_norm": 8.159578323364258, "learning_rate": 4.708459214501511e-06, "loss": 0.1231, "step": 23325 }, { "epoch": 25.491534680502458, "grad_norm": 9.730722427368164, "learning_rate": 4.680135951661632e-06, "loss": 0.2134, "step": 23350 }, { "epoch": 25.518842162752595, "grad_norm": 3.403055191040039, "learning_rate": 4.651812688821753e-06, "loss": 0.1146, "step": 23375 }, { "epoch": 25.546149645002732, "grad_norm": 40.338279724121094, "learning_rate": 4.623489425981874e-06, "loss": 0.1689, "step": 23400 }, { "epoch": 25.573457127252865, "grad_norm": 15.45770263671875, "learning_rate": 4.595166163141995e-06, "loss": 0.1366, "step": 23425 }, { "epoch": 25.600764609503003, "grad_norm": 20.444324493408203, "learning_rate": 4.566842900302116e-06, "loss": 0.184, "step": 23450 }, { "epoch": 25.62807209175314, "grad_norm": 2.9765207767486572, "learning_rate": 4.538519637462235e-06, "loss": 0.1114, "step": 23475 }, { "epoch": 25.655379574003277, "grad_norm": 13.186593055725098, "learning_rate": 4.510196374622356e-06, "loss": 0.1645, "step": 23500 }, { "epoch": 25.682687056253414, "grad_norm": 7.1551289558410645, "learning_rate": 4.481873111782477e-06, "loss": 0.1304, "step": 23525 }, { "epoch": 25.70999453850355, "grad_norm": 1.2013965845108032, "learning_rate": 4.453549848942598e-06, "loss": 0.1581, "step": 23550 }, { "epoch": 25.73730202075369, "grad_norm": 4.986629009246826, "learning_rate": 4.425226586102719e-06, "loss": 0.1187, "step": 23575 }, { "epoch": 25.764609503003822, "grad_norm": 24.304019927978516, "learning_rate": 4.39690332326284e-06, "loss": 0.1606, "step": 23600 }, { "epoch": 25.79191698525396, "grad_norm": 6.64349365234375, "learning_rate": 4.368580060422961e-06, "loss": 0.1203, "step": 23625 }, { "epoch": 25.819224467504096, "grad_norm": 12.818842887878418, "learning_rate": 4.340256797583082e-06, "loss": 0.1611, "step": 23650 }, { "epoch": 25.846531949754233, "grad_norm": 15.453790664672852, "learning_rate": 4.311933534743202e-06, "loss": 0.1355, "step": 23675 }, { "epoch": 25.87383943200437, "grad_norm": 16.039459228515625, "learning_rate": 4.283610271903323e-06, "loss": 0.1971, "step": 23700 }, { "epoch": 25.901146914254507, "grad_norm": 6.751923084259033, "learning_rate": 4.255287009063444e-06, "loss": 0.1435, "step": 23725 }, { "epoch": 25.92845439650464, "grad_norm": 27.51203155517578, "learning_rate": 4.226963746223565e-06, "loss": 0.1815, "step": 23750 }, { "epoch": 25.955761878754778, "grad_norm": 3.291168689727783, "learning_rate": 4.198640483383686e-06, "loss": 0.1264, "step": 23775 }, { "epoch": 25.983069361004915, "grad_norm": 9.80710220336914, "learning_rate": 4.170317220543807e-06, "loss": 0.2019, "step": 23800 }, { "epoch": 26.00983069361005, "grad_norm": 7.560935020446777, "learning_rate": 4.141993957703928e-06, "loss": 0.1386, "step": 23825 }, { "epoch": 26.037138175860186, "grad_norm": 4.137426853179932, "learning_rate": 4.113670694864048e-06, "loss": 0.1301, "step": 23850 }, { "epoch": 26.064445658110323, "grad_norm": 2.6282198429107666, "learning_rate": 4.085347432024169e-06, "loss": 0.1469, "step": 23875 }, { "epoch": 26.09175314036046, "grad_norm": 3.7678542137145996, "learning_rate": 4.05702416918429e-06, "loss": 0.1295, "step": 23900 }, { "epoch": 26.119060622610597, "grad_norm": 2.6782007217407227, "learning_rate": 4.028700906344411e-06, "loss": 0.149, "step": 23925 }, { "epoch": 26.14636810486073, "grad_norm": 5.459723949432373, "learning_rate": 4.000377643504532e-06, "loss": 0.1437, "step": 23950 }, { "epoch": 26.173675587110868, "grad_norm": 2.3939287662506104, "learning_rate": 3.972054380664653e-06, "loss": 0.1389, "step": 23975 }, { "epoch": 26.200983069361005, "grad_norm": 14.149048805236816, "learning_rate": 3.943731117824774e-06, "loss": 0.1303, "step": 24000 }, { "epoch": 26.200983069361005, "eval_cer": 0.13328341818907857, "eval_loss": 0.8067132234573364, "eval_model_preparation_time": 0.005, "eval_runtime": 113.9312, "eval_samples_per_second": 44.193, "eval_steps_per_second": 5.53, "eval_wer": 0.34167252901864226, "step": 24000 }, { "epoch": 26.228290551611142, "grad_norm": 2.2718427181243896, "learning_rate": 3.915407854984895e-06, "loss": 0.1478, "step": 24025 }, { "epoch": 26.25559803386128, "grad_norm": 9.609166145324707, "learning_rate": 3.887084592145015e-06, "loss": 0.1323, "step": 24050 }, { "epoch": 26.282905516111416, "grad_norm": 5.665581703186035, "learning_rate": 3.858761329305136e-06, "loss": 0.189, "step": 24075 }, { "epoch": 26.31021299836155, "grad_norm": 3.680436372756958, "learning_rate": 3.830438066465257e-06, "loss": 0.1421, "step": 24100 }, { "epoch": 26.337520480611687, "grad_norm": 5.79925012588501, "learning_rate": 3.802114803625378e-06, "loss": 0.1384, "step": 24125 }, { "epoch": 26.364827962861824, "grad_norm": 2.6729979515075684, "learning_rate": 3.773791540785499e-06, "loss": 0.1185, "step": 24150 }, { "epoch": 26.39213544511196, "grad_norm": 1.298000454902649, "learning_rate": 3.7454682779456195e-06, "loss": 0.1571, "step": 24175 }, { "epoch": 26.4194429273621, "grad_norm": 6.907313823699951, "learning_rate": 3.71714501510574e-06, "loss": 0.1309, "step": 24200 }, { "epoch": 26.446750409612235, "grad_norm": 16.673606872558594, "learning_rate": 3.688821752265861e-06, "loss": 0.148, "step": 24225 }, { "epoch": 26.47405789186237, "grad_norm": 4.539681434631348, "learning_rate": 3.660498489425982e-06, "loss": 0.1439, "step": 24250 }, { "epoch": 26.501365374112506, "grad_norm": 6.110681056976318, "learning_rate": 3.632175226586103e-06, "loss": 0.1517, "step": 24275 }, { "epoch": 26.528672856362643, "grad_norm": 4.354330062866211, "learning_rate": 3.6038519637462236e-06, "loss": 0.1137, "step": 24300 }, { "epoch": 26.55598033861278, "grad_norm": 5.048337936401367, "learning_rate": 3.5755287009063446e-06, "loss": 0.1374, "step": 24325 }, { "epoch": 26.583287820862918, "grad_norm": 2.247526168823242, "learning_rate": 3.5472054380664655e-06, "loss": 0.1286, "step": 24350 }, { "epoch": 26.610595303113055, "grad_norm": 36.097171783447266, "learning_rate": 3.5188821752265865e-06, "loss": 0.1744, "step": 24375 }, { "epoch": 26.63790278536319, "grad_norm": 5.4545369148254395, "learning_rate": 3.490558912386707e-06, "loss": 0.1608, "step": 24400 }, { "epoch": 26.665210267613325, "grad_norm": 1.7586616277694702, "learning_rate": 3.4622356495468276e-06, "loss": 0.1342, "step": 24425 }, { "epoch": 26.692517749863462, "grad_norm": 3.2136390209198, "learning_rate": 3.4339123867069486e-06, "loss": 0.1488, "step": 24450 }, { "epoch": 26.7198252321136, "grad_norm": 9.836718559265137, "learning_rate": 3.4055891238670696e-06, "loss": 0.1992, "step": 24475 }, { "epoch": 26.747132714363737, "grad_norm": 7.709250450134277, "learning_rate": 3.37726586102719e-06, "loss": 0.1392, "step": 24500 }, { "epoch": 26.774440196613874, "grad_norm": 8.725049018859863, "learning_rate": 3.348942598187311e-06, "loss": 0.1353, "step": 24525 }, { "epoch": 26.801747678864007, "grad_norm": 6.946866512298584, "learning_rate": 3.320619335347432e-06, "loss": 0.1078, "step": 24550 }, { "epoch": 26.829055161114145, "grad_norm": 1.971701979637146, "learning_rate": 3.292296072507553e-06, "loss": 0.1733, "step": 24575 }, { "epoch": 26.85636264336428, "grad_norm": 3.241364002227783, "learning_rate": 3.2639728096676737e-06, "loss": 0.1299, "step": 24600 }, { "epoch": 26.88367012561442, "grad_norm": 13.194534301757812, "learning_rate": 3.2356495468277946e-06, "loss": 0.171, "step": 24625 }, { "epoch": 26.910977607864556, "grad_norm": 4.072185039520264, "learning_rate": 3.2073262839879156e-06, "loss": 0.1396, "step": 24650 }, { "epoch": 26.938285090114693, "grad_norm": 6.929079055786133, "learning_rate": 3.1790030211480366e-06, "loss": 0.1477, "step": 24675 }, { "epoch": 26.965592572364827, "grad_norm": 14.125633239746094, "learning_rate": 3.150679758308157e-06, "loss": 0.1403, "step": 24700 }, { "epoch": 26.992900054614964, "grad_norm": 47.151607513427734, "learning_rate": 3.122356495468278e-06, "loss": 0.1473, "step": 24725 }, { "epoch": 27.019661387220097, "grad_norm": 3.2802226543426514, "learning_rate": 3.094033232628399e-06, "loss": 0.1229, "step": 24750 }, { "epoch": 27.046968869470234, "grad_norm": 14.055026054382324, "learning_rate": 3.06570996978852e-06, "loss": 0.1476, "step": 24775 }, { "epoch": 27.07427635172037, "grad_norm": 2.3790218830108643, "learning_rate": 3.0373867069486402e-06, "loss": 0.1467, "step": 24800 }, { "epoch": 27.10158383397051, "grad_norm": 6.707015514373779, "learning_rate": 3.0090634441087612e-06, "loss": 0.1383, "step": 24825 }, { "epoch": 27.128891316220646, "grad_norm": 4.12552547454834, "learning_rate": 2.980740181268882e-06, "loss": 0.154, "step": 24850 }, { "epoch": 27.156198798470783, "grad_norm": 14.731881141662598, "learning_rate": 2.952416918429003e-06, "loss": 0.1358, "step": 24875 }, { "epoch": 27.183506280720916, "grad_norm": 7.776090145111084, "learning_rate": 2.9240936555891237e-06, "loss": 0.1307, "step": 24900 }, { "epoch": 27.210813762971053, "grad_norm": 10.954771995544434, "learning_rate": 2.8957703927492447e-06, "loss": 0.1322, "step": 24925 }, { "epoch": 27.23812124522119, "grad_norm": 8.190482139587402, "learning_rate": 2.8674471299093657e-06, "loss": 0.1525, "step": 24950 }, { "epoch": 27.265428727471328, "grad_norm": 5.55907678604126, "learning_rate": 2.8391238670694867e-06, "loss": 0.1412, "step": 24975 }, { "epoch": 27.292736209721465, "grad_norm": 6.2519145011901855, "learning_rate": 2.8108006042296072e-06, "loss": 0.1797, "step": 25000 }, { "epoch": 27.292736209721465, "eval_cer": 0.13243486073674754, "eval_loss": 0.8132222890853882, "eval_model_preparation_time": 0.005, "eval_runtime": 113.4553, "eval_samples_per_second": 44.379, "eval_steps_per_second": 5.553, "eval_wer": 0.33767147379528667, "step": 25000 }, { "epoch": 27.320043691971602, "grad_norm": 6.00858211517334, "learning_rate": 2.7824773413897282e-06, "loss": 0.1624, "step": 25025 }, { "epoch": 27.347351174221735, "grad_norm": 7.496774196624756, "learning_rate": 2.7541540785498492e-06, "loss": 0.148, "step": 25050 }, { "epoch": 27.374658656471873, "grad_norm": 20.278301239013672, "learning_rate": 2.7258308157099698e-06, "loss": 0.1495, "step": 25075 }, { "epoch": 27.40196613872201, "grad_norm": 6.210268974304199, "learning_rate": 2.6975075528700908e-06, "loss": 0.1626, "step": 25100 }, { "epoch": 27.429273620972147, "grad_norm": 9.767916679382324, "learning_rate": 2.6691842900302117e-06, "loss": 0.16, "step": 25125 }, { "epoch": 27.456581103222284, "grad_norm": 2.150510311126709, "learning_rate": 2.6408610271903327e-06, "loss": 0.1385, "step": 25150 }, { "epoch": 27.48388858547242, "grad_norm": 11.14136791229248, "learning_rate": 2.612537764350453e-06, "loss": 0.1691, "step": 25175 }, { "epoch": 27.511196067722555, "grad_norm": 6.3137078285217285, "learning_rate": 2.584214501510574e-06, "loss": 0.1336, "step": 25200 }, { "epoch": 27.538503549972692, "grad_norm": 13.01694107055664, "learning_rate": 2.555891238670695e-06, "loss": 0.141, "step": 25225 }, { "epoch": 27.56581103222283, "grad_norm": 2.643338203430176, "learning_rate": 2.527567975830816e-06, "loss": 0.1683, "step": 25250 }, { "epoch": 27.593118514472966, "grad_norm": 4.154228687286377, "learning_rate": 2.4992447129909364e-06, "loss": 0.1163, "step": 25275 }, { "epoch": 27.620425996723103, "grad_norm": 4.003206253051758, "learning_rate": 2.4709214501510573e-06, "loss": 0.1363, "step": 25300 }, { "epoch": 27.64773347897324, "grad_norm": 9.178617477416992, "learning_rate": 2.4425981873111783e-06, "loss": 0.1274, "step": 25325 }, { "epoch": 27.675040961223374, "grad_norm": 4.008249282836914, "learning_rate": 2.4142749244712993e-06, "loss": 0.1282, "step": 25350 }, { "epoch": 27.70234844347351, "grad_norm": 8.496767044067383, "learning_rate": 2.38595166163142e-06, "loss": 0.1695, "step": 25375 }, { "epoch": 27.729655925723648, "grad_norm": 5.961015701293945, "learning_rate": 2.357628398791541e-06, "loss": 0.1893, "step": 25400 }, { "epoch": 27.756963407973785, "grad_norm": 12.951229095458984, "learning_rate": 2.329305135951662e-06, "loss": 0.1362, "step": 25425 }, { "epoch": 27.784270890223922, "grad_norm": 9.348018646240234, "learning_rate": 2.300981873111783e-06, "loss": 0.1456, "step": 25450 }, { "epoch": 27.81157837247406, "grad_norm": 4.806414604187012, "learning_rate": 2.2726586102719034e-06, "loss": 0.1294, "step": 25475 }, { "epoch": 27.838885854724193, "grad_norm": 6.1510396003723145, "learning_rate": 2.2443353474320243e-06, "loss": 0.152, "step": 25500 }, { "epoch": 27.86619333697433, "grad_norm": 11.23769474029541, "learning_rate": 2.2160120845921453e-06, "loss": 0.1682, "step": 25525 }, { "epoch": 27.893500819224467, "grad_norm": 11.250290870666504, "learning_rate": 2.187688821752266e-06, "loss": 0.1541, "step": 25550 }, { "epoch": 27.920808301474604, "grad_norm": 9.1241455078125, "learning_rate": 2.1593655589123864e-06, "loss": 0.1521, "step": 25575 }, { "epoch": 27.94811578372474, "grad_norm": 6.179391384124756, "learning_rate": 2.1310422960725074e-06, "loss": 0.1456, "step": 25600 }, { "epoch": 27.97542326597488, "grad_norm": 4.329941749572754, "learning_rate": 2.1027190332326284e-06, "loss": 0.1239, "step": 25625 }, { "epoch": 28.002184598580012, "grad_norm": 7.115732669830322, "learning_rate": 2.0743957703927494e-06, "loss": 0.1548, "step": 25650 }, { "epoch": 28.02949208083015, "grad_norm": 6.863306999206543, "learning_rate": 2.04607250755287e-06, "loss": 0.144, "step": 25675 }, { "epoch": 28.056799563080283, "grad_norm": 3.0386147499084473, "learning_rate": 2.017749244712991e-06, "loss": 0.1949, "step": 25700 }, { "epoch": 28.08410704533042, "grad_norm": 15.29764175415039, "learning_rate": 1.989425981873112e-06, "loss": 0.1589, "step": 25725 }, { "epoch": 28.111414527580557, "grad_norm": 2.351290464401245, "learning_rate": 1.961102719033233e-06, "loss": 0.1429, "step": 25750 }, { "epoch": 28.138722009830694, "grad_norm": 3.9380381107330322, "learning_rate": 1.9327794561933534e-06, "loss": 0.1272, "step": 25775 }, { "epoch": 28.16602949208083, "grad_norm": 16.843408584594727, "learning_rate": 1.9044561933534744e-06, "loss": 0.1784, "step": 25800 }, { "epoch": 28.19333697433097, "grad_norm": 10.511516571044922, "learning_rate": 1.8761329305135954e-06, "loss": 0.118, "step": 25825 }, { "epoch": 28.220644456581102, "grad_norm": 6.011808395385742, "learning_rate": 1.847809667673716e-06, "loss": 0.1769, "step": 25850 }, { "epoch": 28.24795193883124, "grad_norm": 3.042947769165039, "learning_rate": 1.819486404833837e-06, "loss": 0.1005, "step": 25875 }, { "epoch": 28.275259421081376, "grad_norm": 3.715228319168091, "learning_rate": 1.7911631419939577e-06, "loss": 0.1406, "step": 25900 }, { "epoch": 28.302566903331513, "grad_norm": 4.655735492706299, "learning_rate": 1.7628398791540787e-06, "loss": 0.1161, "step": 25925 }, { "epoch": 28.32987438558165, "grad_norm": 7.189239501953125, "learning_rate": 1.7345166163141995e-06, "loss": 0.1771, "step": 25950 }, { "epoch": 28.357181867831788, "grad_norm": 4.750720500946045, "learning_rate": 1.7061933534743204e-06, "loss": 0.1203, "step": 25975 }, { "epoch": 28.38448935008192, "grad_norm": 4.8236541748046875, "learning_rate": 1.6778700906344412e-06, "loss": 0.16, "step": 26000 }, { "epoch": 28.38448935008192, "eval_cer": 0.13231506439053609, "eval_loss": 0.8327809572219849, "eval_model_preparation_time": 0.005, "eval_runtime": 113.5869, "eval_samples_per_second": 44.327, "eval_steps_per_second": 5.546, "eval_wer": 0.3383309883925431, "step": 26000 }, { "epoch": 28.411796832332058, "grad_norm": 5.597462177276611, "learning_rate": 1.649546827794562e-06, "loss": 0.0981, "step": 26025 }, { "epoch": 28.439104314582195, "grad_norm": 3.356710195541382, "learning_rate": 1.6212235649546828e-06, "loss": 0.1508, "step": 26050 }, { "epoch": 28.466411796832332, "grad_norm": 2.8238823413848877, "learning_rate": 1.5929003021148037e-06, "loss": 0.0936, "step": 26075 }, { "epoch": 28.49371927908247, "grad_norm": 10.34155559539795, "learning_rate": 1.5645770392749245e-06, "loss": 0.1569, "step": 26100 }, { "epoch": 28.521026761332607, "grad_norm": 3.343918800354004, "learning_rate": 1.5362537764350455e-06, "loss": 0.1226, "step": 26125 }, { "epoch": 28.54833424358274, "grad_norm": 3.8382480144500732, "learning_rate": 1.5079305135951663e-06, "loss": 0.1774, "step": 26150 }, { "epoch": 28.575641725832877, "grad_norm": 6.86454963684082, "learning_rate": 1.4796072507552872e-06, "loss": 0.1212, "step": 26175 }, { "epoch": 28.602949208083015, "grad_norm": 2.4399051666259766, "learning_rate": 1.4512839879154078e-06, "loss": 0.1429, "step": 26200 }, { "epoch": 28.63025669033315, "grad_norm": 2.7658746242523193, "learning_rate": 1.4229607250755286e-06, "loss": 0.1207, "step": 26225 }, { "epoch": 28.65756417258329, "grad_norm": 2.578535318374634, "learning_rate": 1.3946374622356495e-06, "loss": 0.1694, "step": 26250 }, { "epoch": 28.684871654833426, "grad_norm": 5.958818435668945, "learning_rate": 1.3663141993957703e-06, "loss": 0.1569, "step": 26275 }, { "epoch": 28.71217913708356, "grad_norm": 4.579010009765625, "learning_rate": 1.3379909365558913e-06, "loss": 0.1266, "step": 26300 }, { "epoch": 28.739486619333697, "grad_norm": 7.273629665374756, "learning_rate": 1.309667673716012e-06, "loss": 0.1064, "step": 26325 }, { "epoch": 28.766794101583834, "grad_norm": 7.377293109893799, "learning_rate": 1.281344410876133e-06, "loss": 0.2017, "step": 26350 }, { "epoch": 28.79410158383397, "grad_norm": 4.418712139129639, "learning_rate": 1.2530211480362538e-06, "loss": 0.1162, "step": 26375 }, { "epoch": 28.821409066084108, "grad_norm": 4.11737060546875, "learning_rate": 1.2246978851963746e-06, "loss": 0.1604, "step": 26400 }, { "epoch": 28.848716548334245, "grad_norm": 5.172785758972168, "learning_rate": 1.1963746223564954e-06, "loss": 0.1141, "step": 26425 }, { "epoch": 28.87602403058438, "grad_norm": 4.2987260818481445, "learning_rate": 1.1680513595166163e-06, "loss": 0.1505, "step": 26450 }, { "epoch": 28.903331512834516, "grad_norm": 2.4171829223632812, "learning_rate": 1.1397280966767371e-06, "loss": 0.1127, "step": 26475 }, { "epoch": 28.930638995084653, "grad_norm": 5.550589561462402, "learning_rate": 1.111404833836858e-06, "loss": 0.1815, "step": 26500 }, { "epoch": 28.95794647733479, "grad_norm": 1.4270178079605103, "learning_rate": 1.0830815709969789e-06, "loss": 0.0976, "step": 26525 }, { "epoch": 28.985253959584927, "grad_norm": 4.346420764923096, "learning_rate": 1.0547583081570998e-06, "loss": 0.1548, "step": 26550 }, { "epoch": 29.01201529219006, "grad_norm": 10.451245307922363, "learning_rate": 1.0264350453172204e-06, "loss": 0.13, "step": 26575 }, { "epoch": 29.039322774440198, "grad_norm": 6.4142913818359375, "learning_rate": 9.981117824773414e-07, "loss": 0.1179, "step": 26600 }, { "epoch": 29.066630256690335, "grad_norm": 7.021634101867676, "learning_rate": 9.697885196374622e-07, "loss": 0.1197, "step": 26625 }, { "epoch": 29.09393773894047, "grad_norm": 55.022064208984375, "learning_rate": 9.414652567975831e-07, "loss": 0.1274, "step": 26650 }, { "epoch": 29.121245221190605, "grad_norm": 4.15505838394165, "learning_rate": 9.13141993957704e-07, "loss": 0.1639, "step": 26675 }, { "epoch": 29.148552703440743, "grad_norm": 4.0243821144104, "learning_rate": 8.848187311178249e-07, "loss": 0.1219, "step": 26700 }, { "epoch": 29.17586018569088, "grad_norm": 2.0986721515655518, "learning_rate": 8.564954682779457e-07, "loss": 0.1103, "step": 26725 }, { "epoch": 29.203167667941017, "grad_norm": 13.572419166564941, "learning_rate": 8.281722054380665e-07, "loss": 0.1212, "step": 26750 }, { "epoch": 29.230475150191154, "grad_norm": 3.5433740615844727, "learning_rate": 7.998489425981874e-07, "loss": 0.1466, "step": 26775 }, { "epoch": 29.257782632441288, "grad_norm": 4.1075825691223145, "learning_rate": 7.715256797583082e-07, "loss": 0.1051, "step": 26800 }, { "epoch": 29.285090114691425, "grad_norm": 2.95479154586792, "learning_rate": 7.432024169184289e-07, "loss": 0.1448, "step": 26825 }, { "epoch": 29.31239759694156, "grad_norm": 29.802701950073242, "learning_rate": 7.148791540785498e-07, "loss": 0.1348, "step": 26850 }, { "epoch": 29.3397050791917, "grad_norm": 5.300603866577148, "learning_rate": 6.865558912386707e-07, "loss": 0.1455, "step": 26875 }, { "epoch": 29.367012561441836, "grad_norm": 3.060485601425171, "learning_rate": 6.582326283987915e-07, "loss": 0.1298, "step": 26900 }, { "epoch": 29.394320043691973, "grad_norm": 3.40439510345459, "learning_rate": 6.299093655589123e-07, "loss": 0.131, "step": 26925 }, { "epoch": 29.421627525942107, "grad_norm": 16.113086700439453, "learning_rate": 6.015861027190332e-07, "loss": 0.1208, "step": 26950 }, { "epoch": 29.448935008192244, "grad_norm": 7.676608562469482, "learning_rate": 5.732628398791541e-07, "loss": 0.1647, "step": 26975 }, { "epoch": 29.47624249044238, "grad_norm": 7.1134819984436035, "learning_rate": 5.449395770392749e-07, "loss": 0.1159, "step": 27000 }, { "epoch": 29.47624249044238, "eval_cer": 0.13191075172207248, "eval_loss": 0.8367248773574829, "eval_model_preparation_time": 0.005, "eval_runtime": 113.7479, "eval_samples_per_second": 44.265, "eval_steps_per_second": 5.539, "eval_wer": 0.3375175870559268, "step": 27000 }, { "epoch": 29.503549972692518, "grad_norm": 6.415709495544434, "learning_rate": 5.166163141993957e-07, "loss": 0.15, "step": 27025 }, { "epoch": 29.530857454942655, "grad_norm": 7.134923458099365, "learning_rate": 4.882930513595166e-07, "loss": 0.1247, "step": 27050 }, { "epoch": 29.558164937192792, "grad_norm": 4.381241798400879, "learning_rate": 4.599697885196375e-07, "loss": 0.1272, "step": 27075 }, { "epoch": 29.585472419442926, "grad_norm": 10.17823600769043, "learning_rate": 4.316465256797583e-07, "loss": 0.1384, "step": 27100 }, { "epoch": 29.612779901693063, "grad_norm": 5.930269241333008, "learning_rate": 4.0332326283987913e-07, "loss": 0.2023, "step": 27125 }, { "epoch": 29.6400873839432, "grad_norm": 5.858401775360107, "learning_rate": 3.75e-07, "loss": 0.1128, "step": 27150 }, { "epoch": 29.667394866193337, "grad_norm": 3.994352102279663, "learning_rate": 3.4667673716012083e-07, "loss": 0.1216, "step": 27175 }, { "epoch": 29.694702348443474, "grad_norm": 5.917628288269043, "learning_rate": 3.183534743202417e-07, "loss": 0.1219, "step": 27200 }, { "epoch": 29.72200983069361, "grad_norm": 6.165762901306152, "learning_rate": 2.9003021148036253e-07, "loss": 0.1407, "step": 27225 }, { "epoch": 29.749317312943745, "grad_norm": 6.73039436340332, "learning_rate": 2.6170694864048335e-07, "loss": 0.1438, "step": 27250 }, { "epoch": 29.776624795193882, "grad_norm": 3.5101068019866943, "learning_rate": 2.3338368580060423e-07, "loss": 0.1357, "step": 27275 }, { "epoch": 29.80393227744402, "grad_norm": 8.990194320678711, "learning_rate": 2.0506042296072508e-07, "loss": 0.1393, "step": 27300 }, { "epoch": 29.831239759694157, "grad_norm": 8.98389720916748, "learning_rate": 1.7673716012084593e-07, "loss": 0.1815, "step": 27325 }, { "epoch": 29.858547241944294, "grad_norm": 6.830402851104736, "learning_rate": 1.4841389728096678e-07, "loss": 0.1216, "step": 27350 }, { "epoch": 29.88585472419443, "grad_norm": 3.883934497833252, "learning_rate": 1.2009063444108763e-07, "loss": 0.1683, "step": 27375 }, { "epoch": 29.913162206444564, "grad_norm": 9.895020484924316, "learning_rate": 9.176737160120846e-08, "loss": 0.1417, "step": 27400 }, { "epoch": 29.9404696886947, "grad_norm": 8.581982612609863, "learning_rate": 6.344410876132931e-08, "loss": 0.1623, "step": 27425 }, { "epoch": 29.96777717094484, "grad_norm": 3.579073429107666, "learning_rate": 3.512084592145015e-08, "loss": 0.1151, "step": 27450 }, { "epoch": 29.995084653194976, "grad_norm": 8.692906379699707, "learning_rate": 6.797583081570997e-09, "loss": 0.1266, "step": 27475 }, { "epoch": 30.0, "step": 27480, "total_flos": 8.494511885340566e+19, "train_loss": 0.44039332199478426, "train_runtime": 18221.2799, "train_samples_per_second": 12.058, "train_steps_per_second": 1.508 } ], "logging_steps": 25, "max_steps": 27480, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 1000, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.494511885340566e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }