{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.999179655455291, "eval_steps": 500, "global_step": 1371, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0021875854525567405, "grad_norm": 0.017111310735344887, "learning_rate": 2.173913043478261e-05, "loss": 1.059, "step": 1 }, { "epoch": 0.004375170905113481, "grad_norm": 0.018623707816004753, "learning_rate": 4.347826086956522e-05, "loss": 1.315, "step": 2 }, { "epoch": 0.006562756357670222, "grad_norm": 0.018533790484070778, "learning_rate": 6.521739130434782e-05, "loss": 0.9224, "step": 3 }, { "epoch": 0.008750341810226962, "grad_norm": 0.015920396894216537, "learning_rate": 8.695652173913044e-05, "loss": 0.9201, "step": 4 }, { "epoch": 0.010937927262783703, "grad_norm": 0.01558469608426094, "learning_rate": 0.00010869565217391305, "loss": 0.8457, "step": 5 }, { "epoch": 0.013125512715340444, "grad_norm": 0.023962153121829033, "learning_rate": 0.00013043478260869564, "loss": 0.9347, "step": 6 }, { "epoch": 0.015313098167897183, "grad_norm": 0.029316680505871773, "learning_rate": 0.00015217391304347827, "loss": 0.8043, "step": 7 }, { "epoch": 0.017500683620453924, "grad_norm": 0.028927722945809364, "learning_rate": 0.00017391304347826088, "loss": 0.9963, "step": 8 }, { "epoch": 0.019688269073010665, "grad_norm": 0.025050047785043716, "learning_rate": 0.0001956521739130435, "loss": 0.7861, "step": 9 }, { "epoch": 0.021875854525567406, "grad_norm": 0.04486666992306709, "learning_rate": 0.0002173913043478261, "loss": 1.0884, "step": 10 }, { "epoch": 0.024063439978124147, "grad_norm": 0.035062652081251144, "learning_rate": 0.00023913043478260867, "loss": 0.9876, "step": 11 }, { "epoch": 0.026251025430680888, "grad_norm": 0.033111147582530975, "learning_rate": 0.0002608695652173913, "loss": 0.8838, "step": 12 }, { "epoch": 0.028438610883237625, "grad_norm": 0.04362301528453827, "learning_rate": 0.00028260869565217394, "loss": 0.8189, "step": 13 }, { "epoch": 0.030626196335794366, "grad_norm": 0.04369740933179855, "learning_rate": 0.00030434782608695655, "loss": 0.9065, "step": 14 }, { "epoch": 0.03281378178835111, "grad_norm": 0.04280918091535568, "learning_rate": 0.0003260869565217391, "loss": 0.8706, "step": 15 }, { "epoch": 0.03500136724090785, "grad_norm": 0.06369622051715851, "learning_rate": 0.00034782608695652176, "loss": 1.0769, "step": 16 }, { "epoch": 0.03718895269346459, "grad_norm": 0.04528605565428734, "learning_rate": 0.00036956521739130437, "loss": 0.9662, "step": 17 }, { "epoch": 0.03937653814602133, "grad_norm": 0.045731619000434875, "learning_rate": 0.000391304347826087, "loss": 0.7727, "step": 18 }, { "epoch": 0.04156412359857807, "grad_norm": 0.03585459291934967, "learning_rate": 0.0004130434782608696, "loss": 0.7813, "step": 19 }, { "epoch": 0.04375170905113481, "grad_norm": 0.07666835933923721, "learning_rate": 0.0004347826086956522, "loss": 1.1307, "step": 20 }, { "epoch": 0.04593929450369155, "grad_norm": 0.02985576167702675, "learning_rate": 0.0004565217391304348, "loss": 0.785, "step": 21 }, { "epoch": 0.04812687995624829, "grad_norm": 0.02983052283525467, "learning_rate": 0.00047826086956521735, "loss": 0.8323, "step": 22 }, { "epoch": 0.050314465408805034, "grad_norm": 0.09363115578889847, "learning_rate": 0.0005, "loss": 0.6885, "step": 23 }, { "epoch": 0.052502050861361775, "grad_norm": 0.47768905758857727, "learning_rate": 0.0005217391304347826, "loss": 1.0143, "step": 24 }, { "epoch": 0.05468963631391851, "grad_norm": 0.1065075695514679, "learning_rate": 0.0005434782608695652, "loss": 0.8331, "step": 25 }, { "epoch": 0.05687722176647525, "grad_norm": 0.023991186171770096, "learning_rate": 0.0005652173913043479, "loss": 0.837, "step": 26 }, { "epoch": 0.05906480721903199, "grad_norm": 0.020501986145973206, "learning_rate": 0.0005869565217391304, "loss": 0.7117, "step": 27 }, { "epoch": 0.06125239267158873, "grad_norm": 0.052776217460632324, "learning_rate": 0.0006086956521739131, "loss": 0.9429, "step": 28 }, { "epoch": 0.06343997812414548, "grad_norm": 0.0542248971760273, "learning_rate": 0.0006304347826086957, "loss": 0.7759, "step": 29 }, { "epoch": 0.06562756357670221, "grad_norm": 0.061998218297958374, "learning_rate": 0.0006521739130434782, "loss": 0.7904, "step": 30 }, { "epoch": 0.06781514902925896, "grad_norm": 0.03401396796107292, "learning_rate": 0.0006739130434782609, "loss": 0.7386, "step": 31 }, { "epoch": 0.0700027344818157, "grad_norm": 0.027073826640844345, "learning_rate": 0.0006956521739130435, "loss": 0.6702, "step": 32 }, { "epoch": 0.07219031993437244, "grad_norm": 0.0468217171728611, "learning_rate": 0.0007173913043478261, "loss": 0.9944, "step": 33 }, { "epoch": 0.07437790538692918, "grad_norm": 0.1130312904715538, "learning_rate": 0.0007391304347826087, "loss": 0.8396, "step": 34 }, { "epoch": 0.07656549083948591, "grad_norm": 0.05662137269973755, "learning_rate": 0.0007608695652173914, "loss": 0.918, "step": 35 }, { "epoch": 0.07875307629204266, "grad_norm": 0.030792295932769775, "learning_rate": 0.000782608695652174, "loss": 1.0882, "step": 36 }, { "epoch": 0.0809406617445994, "grad_norm": 0.02346004731953144, "learning_rate": 0.0008043478260869566, "loss": 0.9276, "step": 37 }, { "epoch": 0.08312824719715614, "grad_norm": 0.0640161782503128, "learning_rate": 0.0008260869565217392, "loss": 0.9607, "step": 38 }, { "epoch": 0.08531583264971287, "grad_norm": 0.01127663068473339, "learning_rate": 0.0008478260869565217, "loss": 0.7476, "step": 39 }, { "epoch": 0.08750341810226962, "grad_norm": 0.020388390868902206, "learning_rate": 0.0008695652173913044, "loss": 0.9294, "step": 40 }, { "epoch": 0.08969100355482636, "grad_norm": 0.011159627698361874, "learning_rate": 0.0008913043478260869, "loss": 0.7403, "step": 41 }, { "epoch": 0.0918785890073831, "grad_norm": 0.01922360621392727, "learning_rate": 0.0009130434782608696, "loss": 0.9901, "step": 42 }, { "epoch": 0.09406617445993984, "grad_norm": 0.022499792277812958, "learning_rate": 0.0009347826086956521, "loss": 0.8491, "step": 43 }, { "epoch": 0.09625375991249659, "grad_norm": 0.019557347521185875, "learning_rate": 0.0009565217391304347, "loss": 0.8855, "step": 44 }, { "epoch": 0.09844134536505332, "grad_norm": 0.023644007742404938, "learning_rate": 0.0009782608695652175, "loss": 0.6828, "step": 45 }, { "epoch": 0.10062893081761007, "grad_norm": 0.0180030707269907, "learning_rate": 0.001, "loss": 1.0325, "step": 46 }, { "epoch": 0.1028165162701668, "grad_norm": 0.026917221024632454, "learning_rate": 0.0010217391304347826, "loss": 1.0596, "step": 47 }, { "epoch": 0.10500410172272355, "grad_norm": 0.016843197867274284, "learning_rate": 0.0010434782608695651, "loss": 0.6183, "step": 48 }, { "epoch": 0.10719168717528028, "grad_norm": 0.0393221415579319, "learning_rate": 0.001065217391304348, "loss": 0.9009, "step": 49 }, { "epoch": 0.10937927262783702, "grad_norm": 0.025003232061862946, "learning_rate": 0.0010869565217391304, "loss": 0.9701, "step": 50 }, { "epoch": 0.11156685808039377, "grad_norm": 0.029358206316828728, "learning_rate": 0.001108695652173913, "loss": 0.8351, "step": 51 }, { "epoch": 0.1137544435329505, "grad_norm": 0.02484138496220112, "learning_rate": 0.0011304347826086958, "loss": 0.906, "step": 52 }, { "epoch": 0.11594202898550725, "grad_norm": 0.012963383458554745, "learning_rate": 0.0011521739130434783, "loss": 1.098, "step": 53 }, { "epoch": 0.11812961443806398, "grad_norm": 0.022173907607793808, "learning_rate": 0.0011739130434782609, "loss": 0.9086, "step": 54 }, { "epoch": 0.12031719989062073, "grad_norm": 0.018844394013285637, "learning_rate": 0.0011956521739130434, "loss": 0.8087, "step": 55 }, { "epoch": 0.12250478534317746, "grad_norm": 0.01081649400293827, "learning_rate": 0.0012173913043478262, "loss": 0.6714, "step": 56 }, { "epoch": 0.12469237079573421, "grad_norm": 0.012590788304805756, "learning_rate": 0.0012391304347826087, "loss": 0.8224, "step": 57 }, { "epoch": 0.12687995624829096, "grad_norm": 0.007173096761107445, "learning_rate": 0.0012608695652173913, "loss": 0.9208, "step": 58 }, { "epoch": 0.1290675417008477, "grad_norm": 0.023659205064177513, "learning_rate": 0.001282608695652174, "loss": 0.8385, "step": 59 }, { "epoch": 0.13125512715340443, "grad_norm": 0.016100220382213593, "learning_rate": 0.0013043478260869564, "loss": 0.6562, "step": 60 }, { "epoch": 0.13344271260596116, "grad_norm": 0.01680757850408554, "learning_rate": 0.0013260869565217392, "loss": 0.8996, "step": 61 }, { "epoch": 0.13563029805851792, "grad_norm": 0.02230382151901722, "learning_rate": 0.0013478260869565217, "loss": 0.7927, "step": 62 }, { "epoch": 0.13781788351107466, "grad_norm": 0.03682897984981537, "learning_rate": 0.0013695652173913043, "loss": 0.8507, "step": 63 }, { "epoch": 0.1400054689636314, "grad_norm": 0.02873164229094982, "learning_rate": 0.001391304347826087, "loss": 0.7379, "step": 64 }, { "epoch": 0.14219305441618812, "grad_norm": 0.08291471749544144, "learning_rate": 0.0014130434782608696, "loss": 0.9686, "step": 65 }, { "epoch": 0.1443806398687449, "grad_norm": 0.005648148711770773, "learning_rate": 0.0014347826086956522, "loss": 0.9365, "step": 66 }, { "epoch": 0.14656822532130162, "grad_norm": 0.010281619615852833, "learning_rate": 0.0014565217391304347, "loss": 0.7989, "step": 67 }, { "epoch": 0.14875581077385835, "grad_norm": 0.010221214964985847, "learning_rate": 0.0014782608695652175, "loss": 0.7567, "step": 68 }, { "epoch": 0.1509433962264151, "grad_norm": 0.008735002018511295, "learning_rate": 0.0015, "loss": 0.9644, "step": 69 }, { "epoch": 0.15313098167897182, "grad_norm": 0.021798064932227135, "learning_rate": 0.0015217391304347828, "loss": 0.8451, "step": 70 }, { "epoch": 0.15531856713152858, "grad_norm": 0.011285695247352123, "learning_rate": 0.0015434782608695651, "loss": 0.9334, "step": 71 }, { "epoch": 0.15750615258408532, "grad_norm": 0.005321874748915434, "learning_rate": 0.001565217391304348, "loss": 0.9424, "step": 72 }, { "epoch": 0.15969373803664205, "grad_norm": 0.010020822286605835, "learning_rate": 0.0015869565217391305, "loss": 0.7219, "step": 73 }, { "epoch": 0.1618813234891988, "grad_norm": 0.008384345099329948, "learning_rate": 0.0016086956521739132, "loss": 0.8169, "step": 74 }, { "epoch": 0.16406890894175555, "grad_norm": 0.010866906493902206, "learning_rate": 0.0016304347826086956, "loss": 0.7556, "step": 75 }, { "epoch": 0.16625649439431228, "grad_norm": 0.01588907279074192, "learning_rate": 0.0016521739130434783, "loss": 0.766, "step": 76 }, { "epoch": 0.16844407984686902, "grad_norm": 0.018410617485642433, "learning_rate": 0.001673913043478261, "loss": 0.8201, "step": 77 }, { "epoch": 0.17063166529942575, "grad_norm": 0.01711914874613285, "learning_rate": 0.0016956521739130434, "loss": 1.0454, "step": 78 }, { "epoch": 0.1728192507519825, "grad_norm": 0.040495071560144424, "learning_rate": 0.001717391304347826, "loss": 0.9048, "step": 79 }, { "epoch": 0.17500683620453925, "grad_norm": 0.008844586089253426, "learning_rate": 0.0017391304347826088, "loss": 0.9716, "step": 80 }, { "epoch": 0.17719442165709598, "grad_norm": 0.020504243671894073, "learning_rate": 0.0017608695652173915, "loss": 0.706, "step": 81 }, { "epoch": 0.1793820071096527, "grad_norm": 0.005656155291944742, "learning_rate": 0.0017826086956521739, "loss": 0.8948, "step": 82 }, { "epoch": 0.18156959256220945, "grad_norm": 0.011604691855609417, "learning_rate": 0.0018043478260869566, "loss": 0.9465, "step": 83 }, { "epoch": 0.1837571780147662, "grad_norm": 0.004078141879290342, "learning_rate": 0.0018260869565217392, "loss": 0.9462, "step": 84 }, { "epoch": 0.18594476346732294, "grad_norm": 0.008594767190515995, "learning_rate": 0.0018478260869565217, "loss": 0.9287, "step": 85 }, { "epoch": 0.18813234891987968, "grad_norm": 0.008353278040885925, "learning_rate": 0.0018695652173913043, "loss": 0.8743, "step": 86 }, { "epoch": 0.1903199343724364, "grad_norm": 0.010675789788365364, "learning_rate": 0.001891304347826087, "loss": 0.7836, "step": 87 }, { "epoch": 0.19250751982499317, "grad_norm": 0.004695142153650522, "learning_rate": 0.0019130434782608694, "loss": 0.7587, "step": 88 }, { "epoch": 0.1946951052775499, "grad_norm": 0.005462712608277798, "learning_rate": 0.0019347826086956522, "loss": 0.9685, "step": 89 }, { "epoch": 0.19688269073010664, "grad_norm": 0.005652555730193853, "learning_rate": 0.001956521739130435, "loss": 0.7821, "step": 90 }, { "epoch": 0.19907027618266337, "grad_norm": 0.0058873966336250305, "learning_rate": 0.0019782608695652175, "loss": 0.8027, "step": 91 }, { "epoch": 0.20125786163522014, "grad_norm": 0.004672915209084749, "learning_rate": 0.002, "loss": 0.8367, "step": 92 }, { "epoch": 0.20344544708777687, "grad_norm": 0.004535092506557703, "learning_rate": 0.0020217391304347826, "loss": 0.7124, "step": 93 }, { "epoch": 0.2056330325403336, "grad_norm": 0.003576159942895174, "learning_rate": 0.002043478260869565, "loss": 0.8596, "step": 94 }, { "epoch": 0.20782061799289034, "grad_norm": 0.005423200782388449, "learning_rate": 0.0020652173913043477, "loss": 0.8582, "step": 95 }, { "epoch": 0.2100082034454471, "grad_norm": 0.00573402363806963, "learning_rate": 0.0020869565217391303, "loss": 0.6699, "step": 96 }, { "epoch": 0.21219578889800383, "grad_norm": 0.004158989991992712, "learning_rate": 0.0021086956521739132, "loss": 0.9819, "step": 97 }, { "epoch": 0.21438337435056057, "grad_norm": 0.009630310349166393, "learning_rate": 0.002130434782608696, "loss": 1.0568, "step": 98 }, { "epoch": 0.2165709598031173, "grad_norm": 0.006703687831759453, "learning_rate": 0.0021521739130434783, "loss": 0.7342, "step": 99 }, { "epoch": 0.21875854525567404, "grad_norm": 0.008040892891585827, "learning_rate": 0.002173913043478261, "loss": 0.8339, "step": 100 }, { "epoch": 0.2209461307082308, "grad_norm": 0.00757247768342495, "learning_rate": 0.0021956521739130434, "loss": 0.934, "step": 101 }, { "epoch": 0.22313371616078753, "grad_norm": 0.009981849230825901, "learning_rate": 0.002217391304347826, "loss": 0.9303, "step": 102 }, { "epoch": 0.22532130161334427, "grad_norm": 0.007599060423672199, "learning_rate": 0.0022391304347826086, "loss": 0.7928, "step": 103 }, { "epoch": 0.227508887065901, "grad_norm": 0.010048196651041508, "learning_rate": 0.0022608695652173915, "loss": 0.8811, "step": 104 }, { "epoch": 0.22969647251845776, "grad_norm": 0.008185365237295628, "learning_rate": 0.002282608695652174, "loss": 0.8586, "step": 105 }, { "epoch": 0.2318840579710145, "grad_norm": 0.02917146123945713, "learning_rate": 0.0023043478260869566, "loss": 0.8296, "step": 106 }, { "epoch": 0.23407164342357123, "grad_norm": 0.03497055917978287, "learning_rate": 0.002326086956521739, "loss": 0.9071, "step": 107 }, { "epoch": 0.23625922887612796, "grad_norm": 0.0127785699442029, "learning_rate": 0.0023478260869565218, "loss": 0.8497, "step": 108 }, { "epoch": 0.23844681432868473, "grad_norm": 0.007704317104071379, "learning_rate": 0.0023695652173913043, "loss": 0.836, "step": 109 }, { "epoch": 0.24063439978124146, "grad_norm": 0.007215225137770176, "learning_rate": 0.002391304347826087, "loss": 0.9434, "step": 110 }, { "epoch": 0.2428219852337982, "grad_norm": 0.008061757311224937, "learning_rate": 0.00241304347826087, "loss": 0.9648, "step": 111 }, { "epoch": 0.24500957068635493, "grad_norm": 0.007565490435808897, "learning_rate": 0.0024347826086956524, "loss": 0.9409, "step": 112 }, { "epoch": 0.2471971561389117, "grad_norm": 0.00492995698004961, "learning_rate": 0.002456521739130435, "loss": 0.8753, "step": 113 }, { "epoch": 0.24938474159146842, "grad_norm": 0.005053890403360128, "learning_rate": 0.0024782608695652175, "loss": 0.8239, "step": 114 }, { "epoch": 0.25157232704402516, "grad_norm": 0.009602397680282593, "learning_rate": 0.0025, "loss": 0.7825, "step": 115 }, { "epoch": 0.2537599124965819, "grad_norm": 0.004248041659593582, "learning_rate": 0.0025217391304347826, "loss": 0.7295, "step": 116 }, { "epoch": 0.2559474979491386, "grad_norm": 0.009284190833568573, "learning_rate": 0.002543478260869565, "loss": 0.9205, "step": 117 }, { "epoch": 0.2581350834016954, "grad_norm": 0.00780320493504405, "learning_rate": 0.002565217391304348, "loss": 0.8928, "step": 118 }, { "epoch": 0.2603226688542521, "grad_norm": 0.014100235886871815, "learning_rate": 0.0025869565217391307, "loss": 1.0, "step": 119 }, { "epoch": 0.26251025430680885, "grad_norm": 0.0031979018822312355, "learning_rate": 0.002608695652173913, "loss": 0.8661, "step": 120 }, { "epoch": 0.2646978397593656, "grad_norm": 0.010853100568056107, "learning_rate": 0.002630434782608696, "loss": 0.7197, "step": 121 }, { "epoch": 0.2668854252119223, "grad_norm": 0.00902815256267786, "learning_rate": 0.0026521739130434784, "loss": 0.8987, "step": 122 }, { "epoch": 0.2690730106644791, "grad_norm": 0.006882428657263517, "learning_rate": 0.002673913043478261, "loss": 0.7676, "step": 123 }, { "epoch": 0.27126059611703585, "grad_norm": 0.014947020448744297, "learning_rate": 0.0026956521739130435, "loss": 0.8857, "step": 124 }, { "epoch": 0.27344818156959255, "grad_norm": 0.005454353056848049, "learning_rate": 0.002717391304347826, "loss": 0.7277, "step": 125 }, { "epoch": 0.2756357670221493, "grad_norm": 0.0050047156400978565, "learning_rate": 0.0027391304347826086, "loss": 0.9257, "step": 126 }, { "epoch": 0.277823352474706, "grad_norm": 0.008737878873944283, "learning_rate": 0.002760869565217391, "loss": 0.8598, "step": 127 }, { "epoch": 0.2800109379272628, "grad_norm": 0.008086539804935455, "learning_rate": 0.002782608695652174, "loss": 0.9844, "step": 128 }, { "epoch": 0.28219852337981954, "grad_norm": 0.01979847252368927, "learning_rate": 0.0028043478260869567, "loss": 0.9718, "step": 129 }, { "epoch": 0.28438610883237625, "grad_norm": 0.016869032755494118, "learning_rate": 0.002826086956521739, "loss": 0.8311, "step": 130 }, { "epoch": 0.286573694284933, "grad_norm": 0.008929664269089699, "learning_rate": 0.0028478260869565218, "loss": 0.6482, "step": 131 }, { "epoch": 0.2887612797374898, "grad_norm": 0.013361562974750996, "learning_rate": 0.0028695652173913043, "loss": 0.991, "step": 132 }, { "epoch": 0.2909488651900465, "grad_norm": 0.0223986953496933, "learning_rate": 0.002891304347826087, "loss": 0.8566, "step": 133 }, { "epoch": 0.29313645064260324, "grad_norm": 0.00690645445138216, "learning_rate": 0.0029130434782608694, "loss": 0.7706, "step": 134 }, { "epoch": 0.29532403609515995, "grad_norm": 0.007177585270255804, "learning_rate": 0.0029347826086956524, "loss": 0.7896, "step": 135 }, { "epoch": 0.2975116215477167, "grad_norm": 0.024162376299500465, "learning_rate": 0.002956521739130435, "loss": 0.8316, "step": 136 }, { "epoch": 0.29969920700027347, "grad_norm": 0.009236878715455532, "learning_rate": 0.0029782608695652175, "loss": 0.7563, "step": 137 }, { "epoch": 0.3018867924528302, "grad_norm": 0.008098084479570389, "learning_rate": 0.003, "loss": 0.9313, "step": 138 }, { "epoch": 0.30407437790538694, "grad_norm": 0.01629616692662239, "learning_rate": 0.002997566909975669, "loss": 0.7942, "step": 139 }, { "epoch": 0.30626196335794365, "grad_norm": 0.013256334699690342, "learning_rate": 0.0029951338199513382, "loss": 0.819, "step": 140 }, { "epoch": 0.3084495488105004, "grad_norm": 0.016614550724625587, "learning_rate": 0.0029927007299270073, "loss": 0.823, "step": 141 }, { "epoch": 0.31063713426305717, "grad_norm": 0.015185157768428326, "learning_rate": 0.0029902676399026764, "loss": 0.8534, "step": 142 }, { "epoch": 0.3128247197156139, "grad_norm": 0.012511268258094788, "learning_rate": 0.0029878345498783455, "loss": 1.0021, "step": 143 }, { "epoch": 0.31501230516817064, "grad_norm": 0.03368072584271431, "learning_rate": 0.0029854014598540146, "loss": 0.9333, "step": 144 }, { "epoch": 0.3171998906207274, "grad_norm": 0.014194028452038765, "learning_rate": 0.0029829683698296837, "loss": 0.7353, "step": 145 }, { "epoch": 0.3193874760732841, "grad_norm": 0.022817425429821014, "learning_rate": 0.002980535279805353, "loss": 0.8653, "step": 146 }, { "epoch": 0.32157506152584087, "grad_norm": 0.034395311027765274, "learning_rate": 0.002978102189781022, "loss": 0.7872, "step": 147 }, { "epoch": 0.3237626469783976, "grad_norm": 0.04415661096572876, "learning_rate": 0.002975669099756691, "loss": 0.8668, "step": 148 }, { "epoch": 0.32595023243095433, "grad_norm": 0.013315894640982151, "learning_rate": 0.0029732360097323605, "loss": 0.7012, "step": 149 }, { "epoch": 0.3281378178835111, "grad_norm": 0.01931261457502842, "learning_rate": 0.002970802919708029, "loss": 0.6949, "step": 150 }, { "epoch": 0.3303254033360678, "grad_norm": 0.01766936294734478, "learning_rate": 0.0029683698296836983, "loss": 1.0637, "step": 151 }, { "epoch": 0.33251298878862456, "grad_norm": 0.04097762331366539, "learning_rate": 0.002965936739659368, "loss": 0.6945, "step": 152 }, { "epoch": 0.33470057424118127, "grad_norm": 0.019335204735398293, "learning_rate": 0.0029635036496350364, "loss": 0.9677, "step": 153 }, { "epoch": 0.33688815969373803, "grad_norm": 0.02775772474706173, "learning_rate": 0.0029610705596107055, "loss": 0.9445, "step": 154 }, { "epoch": 0.3390757451462948, "grad_norm": 0.012738276273012161, "learning_rate": 0.002958637469586375, "loss": 0.7254, "step": 155 }, { "epoch": 0.3412633305988515, "grad_norm": 0.025990145280957222, "learning_rate": 0.0029562043795620437, "loss": 0.7296, "step": 156 }, { "epoch": 0.34345091605140826, "grad_norm": 0.08288227766752243, "learning_rate": 0.002953771289537713, "loss": 0.8595, "step": 157 }, { "epoch": 0.345638501503965, "grad_norm": 0.05340643599629402, "learning_rate": 0.002951338199513382, "loss": 0.7425, "step": 158 }, { "epoch": 0.34782608695652173, "grad_norm": 0.030417539179325104, "learning_rate": 0.0029489051094890514, "loss": 0.7976, "step": 159 }, { "epoch": 0.3500136724090785, "grad_norm": 0.04232973977923393, "learning_rate": 0.00294647201946472, "loss": 0.8764, "step": 160 }, { "epoch": 0.3522012578616352, "grad_norm": 0.025519737973809242, "learning_rate": 0.002944038929440389, "loss": 0.8247, "step": 161 }, { "epoch": 0.35438884331419196, "grad_norm": 0.046103380620479584, "learning_rate": 0.0029416058394160587, "loss": 0.8432, "step": 162 }, { "epoch": 0.3565764287667487, "grad_norm": 0.01843344047665596, "learning_rate": 0.0029391727493917274, "loss": 0.8721, "step": 163 }, { "epoch": 0.3587640142193054, "grad_norm": 0.029839089140295982, "learning_rate": 0.0029367396593673965, "loss": 0.7955, "step": 164 }, { "epoch": 0.3609515996718622, "grad_norm": 0.023799125105142593, "learning_rate": 0.002934306569343066, "loss": 0.8929, "step": 165 }, { "epoch": 0.3631391851244189, "grad_norm": 0.01695132628083229, "learning_rate": 0.0029318734793187346, "loss": 0.8149, "step": 166 }, { "epoch": 0.36532677057697566, "grad_norm": 0.01710570976138115, "learning_rate": 0.0029294403892944037, "loss": 0.9953, "step": 167 }, { "epoch": 0.3675143560295324, "grad_norm": 0.008958813734352589, "learning_rate": 0.0029270072992700733, "loss": 0.6486, "step": 168 }, { "epoch": 0.3697019414820891, "grad_norm": 0.02033080905675888, "learning_rate": 0.002924574209245742, "loss": 0.7759, "step": 169 }, { "epoch": 0.3718895269346459, "grad_norm": 0.01737876608967781, "learning_rate": 0.002922141119221411, "loss": 0.7998, "step": 170 }, { "epoch": 0.37407711238720265, "grad_norm": 0.011925026774406433, "learning_rate": 0.0029197080291970805, "loss": 0.6405, "step": 171 }, { "epoch": 0.37626469783975935, "grad_norm": 0.010621492750942707, "learning_rate": 0.0029172749391727496, "loss": 0.735, "step": 172 }, { "epoch": 0.3784522832923161, "grad_norm": 0.02744341269135475, "learning_rate": 0.0029148418491484183, "loss": 0.9386, "step": 173 }, { "epoch": 0.3806398687448728, "grad_norm": 0.010641987435519695, "learning_rate": 0.002912408759124088, "loss": 0.6368, "step": 174 }, { "epoch": 0.3828274541974296, "grad_norm": 0.016506191343069077, "learning_rate": 0.002909975669099757, "loss": 0.7212, "step": 175 }, { "epoch": 0.38501503964998635, "grad_norm": 0.029457593336701393, "learning_rate": 0.0029075425790754256, "loss": 0.8386, "step": 176 }, { "epoch": 0.38720262510254305, "grad_norm": 0.008680049329996109, "learning_rate": 0.002905109489051095, "loss": 0.8827, "step": 177 }, { "epoch": 0.3893902105550998, "grad_norm": 0.029479682445526123, "learning_rate": 0.002902676399026764, "loss": 0.797, "step": 178 }, { "epoch": 0.3915777960076566, "grad_norm": 0.01670117862522602, "learning_rate": 0.002900243309002433, "loss": 0.7164, "step": 179 }, { "epoch": 0.3937653814602133, "grad_norm": 0.019070839509367943, "learning_rate": 0.0028978102189781024, "loss": 0.7425, "step": 180 }, { "epoch": 0.39595296691277004, "grad_norm": 0.010363463312387466, "learning_rate": 0.0028953771289537715, "loss": 0.9502, "step": 181 }, { "epoch": 0.39814055236532675, "grad_norm": 0.02518656477332115, "learning_rate": 0.0028929440389294406, "loss": 0.7689, "step": 182 }, { "epoch": 0.4003281378178835, "grad_norm": 0.014663388952612877, "learning_rate": 0.0028905109489051097, "loss": 0.8839, "step": 183 }, { "epoch": 0.4025157232704403, "grad_norm": 0.009784224443137646, "learning_rate": 0.0028880778588807787, "loss": 0.82, "step": 184 }, { "epoch": 0.404703308722997, "grad_norm": 0.02763255313038826, "learning_rate": 0.002885644768856448, "loss": 0.8051, "step": 185 }, { "epoch": 0.40689089417555374, "grad_norm": 0.023367729038000107, "learning_rate": 0.002883211678832117, "loss": 0.7191, "step": 186 }, { "epoch": 0.40907847962811045, "grad_norm": 0.025467796251177788, "learning_rate": 0.002880778588807786, "loss": 0.7385, "step": 187 }, { "epoch": 0.4112660650806672, "grad_norm": 0.03302817419171333, "learning_rate": 0.002878345498783455, "loss": 0.6099, "step": 188 }, { "epoch": 0.41345365053322397, "grad_norm": 0.016808858141303062, "learning_rate": 0.002875912408759124, "loss": 0.687, "step": 189 }, { "epoch": 0.4156412359857807, "grad_norm": 0.030584512278437614, "learning_rate": 0.0028734793187347933, "loss": 0.7922, "step": 190 }, { "epoch": 0.41782882143833744, "grad_norm": 0.05187975615262985, "learning_rate": 0.0028710462287104624, "loss": 0.8282, "step": 191 }, { "epoch": 0.4200164068908942, "grad_norm": 0.03264329209923744, "learning_rate": 0.0028686131386861315, "loss": 0.542, "step": 192 }, { "epoch": 0.4222039923434509, "grad_norm": 0.08889129012823105, "learning_rate": 0.0028661800486618006, "loss": 0.7642, "step": 193 }, { "epoch": 0.42439157779600767, "grad_norm": 0.017528299242258072, "learning_rate": 0.0028637469586374697, "loss": 1.0019, "step": 194 }, { "epoch": 0.4265791632485644, "grad_norm": 0.042831018567085266, "learning_rate": 0.0028613138686131388, "loss": 0.7849, "step": 195 }, { "epoch": 0.42876674870112114, "grad_norm": 0.06844168901443481, "learning_rate": 0.002858880778588808, "loss": 0.8431, "step": 196 }, { "epoch": 0.4309543341536779, "grad_norm": 0.056285906583070755, "learning_rate": 0.002856447688564477, "loss": 0.766, "step": 197 }, { "epoch": 0.4331419196062346, "grad_norm": 0.03165756165981293, "learning_rate": 0.002854014598540146, "loss": 0.526, "step": 198 }, { "epoch": 0.43532950505879137, "grad_norm": 0.01906641758978367, "learning_rate": 0.002851581508515815, "loss": 0.634, "step": 199 }, { "epoch": 0.4375170905113481, "grad_norm": 0.03528127446770668, "learning_rate": 0.0028491484184914842, "loss": 0.7152, "step": 200 }, { "epoch": 0.43970467596390483, "grad_norm": 0.03441726043820381, "learning_rate": 0.0028467153284671533, "loss": 0.807, "step": 201 }, { "epoch": 0.4418922614164616, "grad_norm": 0.07585262507200241, "learning_rate": 0.0028442822384428224, "loss": 0.8068, "step": 202 }, { "epoch": 0.4440798468690183, "grad_norm": 0.04637427628040314, "learning_rate": 0.0028418491484184915, "loss": 0.6726, "step": 203 }, { "epoch": 0.44626743232157506, "grad_norm": 0.014708532020449638, "learning_rate": 0.0028394160583941606, "loss": 0.7633, "step": 204 }, { "epoch": 0.4484550177741318, "grad_norm": 0.06609700620174408, "learning_rate": 0.0028369829683698297, "loss": 0.9395, "step": 205 }, { "epoch": 0.45064260322668853, "grad_norm": 0.014884551987051964, "learning_rate": 0.0028345498783454988, "loss": 0.7629, "step": 206 }, { "epoch": 0.4528301886792453, "grad_norm": 0.02310200408101082, "learning_rate": 0.002832116788321168, "loss": 0.6696, "step": 207 }, { "epoch": 0.455017774131802, "grad_norm": 0.020516803488135338, "learning_rate": 0.002829683698296837, "loss": 0.6966, "step": 208 }, { "epoch": 0.45720535958435876, "grad_norm": 0.018198775127530098, "learning_rate": 0.002827250608272506, "loss": 0.936, "step": 209 }, { "epoch": 0.4593929450369155, "grad_norm": 0.032083529978990555, "learning_rate": 0.002824817518248175, "loss": 0.853, "step": 210 }, { "epoch": 0.46158053048947223, "grad_norm": 0.01605304516851902, "learning_rate": 0.0028223844282238442, "loss": 0.8602, "step": 211 }, { "epoch": 0.463768115942029, "grad_norm": 0.024932844564318657, "learning_rate": 0.0028199513381995133, "loss": 0.9888, "step": 212 }, { "epoch": 0.46595570139458575, "grad_norm": 0.04917526990175247, "learning_rate": 0.0028175182481751824, "loss": 0.7155, "step": 213 }, { "epoch": 0.46814328684714246, "grad_norm": 0.017666855826973915, "learning_rate": 0.002815085158150852, "loss": 0.7597, "step": 214 }, { "epoch": 0.4703308722996992, "grad_norm": 0.06158105283975601, "learning_rate": 0.0028126520681265206, "loss": 0.808, "step": 215 }, { "epoch": 0.4725184577522559, "grad_norm": 0.028100378811359406, "learning_rate": 0.0028102189781021897, "loss": 0.8217, "step": 216 }, { "epoch": 0.4747060432048127, "grad_norm": 0.02049509435892105, "learning_rate": 0.0028077858880778592, "loss": 0.8441, "step": 217 }, { "epoch": 0.47689362865736945, "grad_norm": 0.018524937331676483, "learning_rate": 0.002805352798053528, "loss": 0.7565, "step": 218 }, { "epoch": 0.47908121410992616, "grad_norm": 0.017941996455192566, "learning_rate": 0.002802919708029197, "loss": 0.7098, "step": 219 }, { "epoch": 0.4812687995624829, "grad_norm": 0.042154472321271896, "learning_rate": 0.0028004866180048665, "loss": 0.8742, "step": 220 }, { "epoch": 0.4834563850150396, "grad_norm": 0.026872573420405388, "learning_rate": 0.002798053527980535, "loss": 0.7273, "step": 221 }, { "epoch": 0.4856439704675964, "grad_norm": 0.02051514759659767, "learning_rate": 0.0027956204379562043, "loss": 0.795, "step": 222 }, { "epoch": 0.48783155592015315, "grad_norm": 0.02145540714263916, "learning_rate": 0.0027931873479318738, "loss": 0.8192, "step": 223 }, { "epoch": 0.49001914137270985, "grad_norm": 0.04769520461559296, "learning_rate": 0.002790754257907543, "loss": 0.8592, "step": 224 }, { "epoch": 0.4922067268252666, "grad_norm": 0.01415792852640152, "learning_rate": 0.0027883211678832115, "loss": 0.7553, "step": 225 }, { "epoch": 0.4943943122778234, "grad_norm": 0.012172535061836243, "learning_rate": 0.002785888077858881, "loss": 0.739, "step": 226 }, { "epoch": 0.4965818977303801, "grad_norm": 0.055700596421957016, "learning_rate": 0.00278345498783455, "loss": 0.9405, "step": 227 }, { "epoch": 0.49876948318293685, "grad_norm": 0.025790488347411156, "learning_rate": 0.002781021897810219, "loss": 0.6641, "step": 228 }, { "epoch": 0.5009570686354936, "grad_norm": 0.013937574811279774, "learning_rate": 0.0027785888077858883, "loss": 0.7727, "step": 229 }, { "epoch": 0.5031446540880503, "grad_norm": 0.03238683566451073, "learning_rate": 0.0027761557177615574, "loss": 0.8109, "step": 230 }, { "epoch": 0.505332239540607, "grad_norm": 0.06841892749071121, "learning_rate": 0.002773722627737226, "loss": 0.7827, "step": 231 }, { "epoch": 0.5075198249931638, "grad_norm": 0.05782823637127876, "learning_rate": 0.002771289537712895, "loss": 0.9616, "step": 232 }, { "epoch": 0.5097074104457205, "grad_norm": 0.1389644742012024, "learning_rate": 0.0027688564476885647, "loss": 0.7447, "step": 233 }, { "epoch": 0.5118949958982772, "grad_norm": 0.07213829457759857, "learning_rate": 0.002766423357664234, "loss": 0.8738, "step": 234 }, { "epoch": 0.5140825813508341, "grad_norm": 0.03161882609128952, "learning_rate": 0.0027639902676399025, "loss": 0.5307, "step": 235 }, { "epoch": 0.5162701668033908, "grad_norm": 0.03051130659878254, "learning_rate": 0.002761557177615572, "loss": 0.7123, "step": 236 }, { "epoch": 0.5184577522559475, "grad_norm": 0.02562803030014038, "learning_rate": 0.002759124087591241, "loss": 0.8167, "step": 237 }, { "epoch": 0.5206453377085042, "grad_norm": 0.03016614355146885, "learning_rate": 0.0027566909975669097, "loss": 0.6904, "step": 238 }, { "epoch": 0.522832923161061, "grad_norm": 0.01147315464913845, "learning_rate": 0.0027542579075425793, "loss": 0.7007, "step": 239 }, { "epoch": 0.5250205086136177, "grad_norm": 0.017779918387532234, "learning_rate": 0.0027518248175182483, "loss": 0.9054, "step": 240 }, { "epoch": 0.5272080940661744, "grad_norm": 0.03238027170300484, "learning_rate": 0.002749391727493917, "loss": 0.7599, "step": 241 }, { "epoch": 0.5293956795187312, "grad_norm": 0.007716326508671045, "learning_rate": 0.0027469586374695865, "loss": 0.7561, "step": 242 }, { "epoch": 0.5315832649712879, "grad_norm": 0.028708985075354576, "learning_rate": 0.0027445255474452556, "loss": 0.6842, "step": 243 }, { "epoch": 0.5337708504238446, "grad_norm": 0.021554840728640556, "learning_rate": 0.0027420924574209247, "loss": 0.9046, "step": 244 }, { "epoch": 0.5359584358764015, "grad_norm": 0.010056296363472939, "learning_rate": 0.002739659367396594, "loss": 0.7747, "step": 245 }, { "epoch": 0.5381460213289582, "grad_norm": 0.014583374373614788, "learning_rate": 0.002737226277372263, "loss": 0.8104, "step": 246 }, { "epoch": 0.5403336067815149, "grad_norm": 0.10760743170976639, "learning_rate": 0.002734793187347932, "loss": 1.0181, "step": 247 }, { "epoch": 0.5425211922340717, "grad_norm": 0.030982421711087227, "learning_rate": 0.002732360097323601, "loss": 0.7125, "step": 248 }, { "epoch": 0.5447087776866284, "grad_norm": 0.017710238695144653, "learning_rate": 0.00272992700729927, "loss": 0.9256, "step": 249 }, { "epoch": 0.5468963631391851, "grad_norm": 0.027831239625811577, "learning_rate": 0.0027274939172749393, "loss": 0.7537, "step": 250 }, { "epoch": 0.5490839485917418, "grad_norm": 0.019798962399363518, "learning_rate": 0.0027250608272506084, "loss": 0.6165, "step": 251 }, { "epoch": 0.5512715340442986, "grad_norm": 0.00836907234042883, "learning_rate": 0.0027226277372262775, "loss": 0.7968, "step": 252 }, { "epoch": 0.5534591194968553, "grad_norm": 0.018117599189281464, "learning_rate": 0.0027201946472019465, "loss": 0.6087, "step": 253 }, { "epoch": 0.555646704949412, "grad_norm": 0.017056763172149658, "learning_rate": 0.0027177615571776156, "loss": 0.7837, "step": 254 }, { "epoch": 0.5578342904019689, "grad_norm": 0.009035620838403702, "learning_rate": 0.0027153284671532847, "loss": 0.6376, "step": 255 }, { "epoch": 0.5600218758545256, "grad_norm": 0.015250611118972301, "learning_rate": 0.002712895377128954, "loss": 0.7869, "step": 256 }, { "epoch": 0.5622094613070823, "grad_norm": 0.014554915949702263, "learning_rate": 0.002710462287104623, "loss": 0.9046, "step": 257 }, { "epoch": 0.5643970467596391, "grad_norm": 0.011779931373894215, "learning_rate": 0.002708029197080292, "loss": 0.8662, "step": 258 }, { "epoch": 0.5665846322121958, "grad_norm": 0.012663912028074265, "learning_rate": 0.002705596107055961, "loss": 1.3081, "step": 259 }, { "epoch": 0.5687722176647525, "grad_norm": 0.0059722489677369595, "learning_rate": 0.00270316301703163, "loss": 0.6796, "step": 260 }, { "epoch": 0.5709598031173093, "grad_norm": 0.03664208948612213, "learning_rate": 0.0027007299270072993, "loss": 0.9093, "step": 261 }, { "epoch": 0.573147388569866, "grad_norm": 0.042986199259757996, "learning_rate": 0.0026982968369829684, "loss": 0.9444, "step": 262 }, { "epoch": 0.5753349740224227, "grad_norm": 0.012048511765897274, "learning_rate": 0.0026958637469586375, "loss": 0.8134, "step": 263 }, { "epoch": 0.5775225594749795, "grad_norm": 0.012062503024935722, "learning_rate": 0.0026934306569343066, "loss": 0.7274, "step": 264 }, { "epoch": 0.5797101449275363, "grad_norm": 0.02607789821922779, "learning_rate": 0.0026909975669099757, "loss": 0.6531, "step": 265 }, { "epoch": 0.581897730380093, "grad_norm": 0.014329343102872372, "learning_rate": 0.002688564476885645, "loss": 0.6966, "step": 266 }, { "epoch": 0.5840853158326497, "grad_norm": 0.013629244640469551, "learning_rate": 0.002686131386861314, "loss": 0.7831, "step": 267 }, { "epoch": 0.5862729012852065, "grad_norm": 0.009315542876720428, "learning_rate": 0.002683698296836983, "loss": 0.6297, "step": 268 }, { "epoch": 0.5884604867377632, "grad_norm": 0.051916949450969696, "learning_rate": 0.002681265206812652, "loss": 0.7651, "step": 269 }, { "epoch": 0.5906480721903199, "grad_norm": 0.012272450141608715, "learning_rate": 0.002678832116788321, "loss": 0.6713, "step": 270 }, { "epoch": 0.5928356576428767, "grad_norm": 0.011517216451466084, "learning_rate": 0.00267639902676399, "loss": 0.6117, "step": 271 }, { "epoch": 0.5950232430954334, "grad_norm": 0.010973330587148666, "learning_rate": 0.0026739659367396593, "loss": 0.7631, "step": 272 }, { "epoch": 0.5972108285479901, "grad_norm": 0.06580788642168045, "learning_rate": 0.0026715328467153284, "loss": 0.9153, "step": 273 }, { "epoch": 0.5993984140005469, "grad_norm": 0.011350773274898529, "learning_rate": 0.0026690997566909975, "loss": 0.8094, "step": 274 }, { "epoch": 0.6015859994531036, "grad_norm": 0.019090717658400536, "learning_rate": 0.0026666666666666666, "loss": 0.9304, "step": 275 }, { "epoch": 0.6037735849056604, "grad_norm": 0.015177314169704914, "learning_rate": 0.002664233576642336, "loss": 0.6859, "step": 276 }, { "epoch": 0.6059611703582172, "grad_norm": 0.020254317671060562, "learning_rate": 0.0026618004866180048, "loss": 0.8386, "step": 277 }, { "epoch": 0.6081487558107739, "grad_norm": 0.014171348884701729, "learning_rate": 0.002659367396593674, "loss": 0.8112, "step": 278 }, { "epoch": 0.6103363412633306, "grad_norm": 0.00894536729902029, "learning_rate": 0.0026569343065693434, "loss": 0.6877, "step": 279 }, { "epoch": 0.6125239267158873, "grad_norm": 0.011850811541080475, "learning_rate": 0.002654501216545012, "loss": 0.8639, "step": 280 }, { "epoch": 0.6147115121684441, "grad_norm": 0.012202342972159386, "learning_rate": 0.002652068126520681, "loss": 0.7851, "step": 281 }, { "epoch": 0.6168990976210008, "grad_norm": 0.014019378460943699, "learning_rate": 0.0026496350364963507, "loss": 0.945, "step": 282 }, { "epoch": 0.6190866830735575, "grad_norm": 0.013264323584735394, "learning_rate": 0.0026472019464720193, "loss": 0.6363, "step": 283 }, { "epoch": 0.6212742685261143, "grad_norm": 0.010803530924022198, "learning_rate": 0.0026447688564476884, "loss": 0.7855, "step": 284 }, { "epoch": 0.623461853978671, "grad_norm": 0.015852496027946472, "learning_rate": 0.002642335766423358, "loss": 0.6334, "step": 285 }, { "epoch": 0.6256494394312277, "grad_norm": 0.023904947564005852, "learning_rate": 0.002639902676399027, "loss": 0.5551, "step": 286 }, { "epoch": 0.6278370248837846, "grad_norm": 0.00868566520512104, "learning_rate": 0.0026374695863746957, "loss": 0.9256, "step": 287 }, { "epoch": 0.6300246103363413, "grad_norm": 0.011297028511762619, "learning_rate": 0.002635036496350365, "loss": 0.7896, "step": 288 }, { "epoch": 0.632212195788898, "grad_norm": 0.01018528826534748, "learning_rate": 0.0026326034063260343, "loss": 0.8198, "step": 289 }, { "epoch": 0.6343997812414548, "grad_norm": 0.015003956854343414, "learning_rate": 0.002630170316301703, "loss": 0.7424, "step": 290 }, { "epoch": 0.6365873666940115, "grad_norm": 0.007440235000103712, "learning_rate": 0.0026277372262773725, "loss": 0.6904, "step": 291 }, { "epoch": 0.6387749521465682, "grad_norm": 0.014310602098703384, "learning_rate": 0.0026253041362530416, "loss": 0.7179, "step": 292 }, { "epoch": 0.6409625375991249, "grad_norm": 0.008294426836073399, "learning_rate": 0.0026228710462287102, "loss": 0.827, "step": 293 }, { "epoch": 0.6431501230516817, "grad_norm": 0.006840107962489128, "learning_rate": 0.0026204379562043798, "loss": 0.6749, "step": 294 }, { "epoch": 0.6453377085042384, "grad_norm": 0.008538591675460339, "learning_rate": 0.002618004866180049, "loss": 0.7467, "step": 295 }, { "epoch": 0.6475252939567951, "grad_norm": 0.007157974410802126, "learning_rate": 0.0026155717761557175, "loss": 0.7233, "step": 296 }, { "epoch": 0.649712879409352, "grad_norm": 0.030327659100294113, "learning_rate": 0.002613138686131387, "loss": 0.6642, "step": 297 }, { "epoch": 0.6519004648619087, "grad_norm": 0.012880248948931694, "learning_rate": 0.002610705596107056, "loss": 0.9694, "step": 298 }, { "epoch": 0.6540880503144654, "grad_norm": 0.014233557507395744, "learning_rate": 0.0026082725060827252, "loss": 0.7686, "step": 299 }, { "epoch": 0.6562756357670222, "grad_norm": 0.008432603441178799, "learning_rate": 0.0026058394160583943, "loss": 0.9355, "step": 300 }, { "epoch": 0.6584632212195789, "grad_norm": 0.009492720477283001, "learning_rate": 0.0026034063260340634, "loss": 0.7637, "step": 301 }, { "epoch": 0.6606508066721356, "grad_norm": 0.008224152028560638, "learning_rate": 0.0026009732360097325, "loss": 0.7609, "step": 302 }, { "epoch": 0.6628383921246924, "grad_norm": 0.011647099629044533, "learning_rate": 0.0025985401459854016, "loss": 0.6565, "step": 303 }, { "epoch": 0.6650259775772491, "grad_norm": 0.0120640117675066, "learning_rate": 0.0025961070559610707, "loss": 0.6751, "step": 304 }, { "epoch": 0.6672135630298058, "grad_norm": 0.014007077552378178, "learning_rate": 0.0025936739659367398, "loss": 0.8132, "step": 305 }, { "epoch": 0.6694011484823625, "grad_norm": 0.014167044311761856, "learning_rate": 0.002591240875912409, "loss": 0.8102, "step": 306 }, { "epoch": 0.6715887339349194, "grad_norm": 0.016142327338457108, "learning_rate": 0.002588807785888078, "loss": 0.8004, "step": 307 }, { "epoch": 0.6737763193874761, "grad_norm": 0.007279639132320881, "learning_rate": 0.002586374695863747, "loss": 0.732, "step": 308 }, { "epoch": 0.6759639048400328, "grad_norm": 0.011619196273386478, "learning_rate": 0.002583941605839416, "loss": 0.603, "step": 309 }, { "epoch": 0.6781514902925896, "grad_norm": 0.011564897373318672, "learning_rate": 0.0025815085158150852, "loss": 0.9163, "step": 310 }, { "epoch": 0.6803390757451463, "grad_norm": 0.010117938742041588, "learning_rate": 0.0025790754257907543, "loss": 0.8683, "step": 311 }, { "epoch": 0.682526661197703, "grad_norm": 0.017769185826182365, "learning_rate": 0.0025766423357664234, "loss": 0.6244, "step": 312 }, { "epoch": 0.6847142466502598, "grad_norm": 0.012199788354337215, "learning_rate": 0.0025742092457420925, "loss": 0.7076, "step": 313 }, { "epoch": 0.6869018321028165, "grad_norm": 0.008083075284957886, "learning_rate": 0.0025717761557177616, "loss": 0.8658, "step": 314 }, { "epoch": 0.6890894175553732, "grad_norm": 0.01086794026196003, "learning_rate": 0.0025693430656934307, "loss": 0.6941, "step": 315 }, { "epoch": 0.69127700300793, "grad_norm": 0.010161925107240677, "learning_rate": 0.0025669099756691, "loss": 0.6715, "step": 316 }, { "epoch": 0.6934645884604868, "grad_norm": 0.008891239762306213, "learning_rate": 0.002564476885644769, "loss": 0.8093, "step": 317 }, { "epoch": 0.6956521739130435, "grad_norm": 0.018787039443850517, "learning_rate": 0.002562043795620438, "loss": 0.8482, "step": 318 }, { "epoch": 0.6978397593656002, "grad_norm": 0.02541973814368248, "learning_rate": 0.002559610705596107, "loss": 0.76, "step": 319 }, { "epoch": 0.700027344818157, "grad_norm": 0.011948470957577229, "learning_rate": 0.002557177615571776, "loss": 0.7625, "step": 320 }, { "epoch": 0.7022149302707137, "grad_norm": 0.009559310041368008, "learning_rate": 0.0025547445255474453, "loss": 0.7354, "step": 321 }, { "epoch": 0.7044025157232704, "grad_norm": 0.008267502300441265, "learning_rate": 0.0025523114355231144, "loss": 0.7065, "step": 322 }, { "epoch": 0.7065901011758272, "grad_norm": 0.010692731477320194, "learning_rate": 0.0025498783454987834, "loss": 0.983, "step": 323 }, { "epoch": 0.7087776866283839, "grad_norm": 0.0124723045155406, "learning_rate": 0.0025474452554744525, "loss": 0.6154, "step": 324 }, { "epoch": 0.7109652720809406, "grad_norm": 0.015448692254722118, "learning_rate": 0.0025450121654501216, "loss": 0.6129, "step": 325 }, { "epoch": 0.7131528575334974, "grad_norm": 0.013601388782262802, "learning_rate": 0.0025425790754257907, "loss": 0.7214, "step": 326 }, { "epoch": 0.7153404429860541, "grad_norm": 0.012070258148014545, "learning_rate": 0.00254014598540146, "loss": 0.7077, "step": 327 }, { "epoch": 0.7175280284386109, "grad_norm": 0.05267300084233284, "learning_rate": 0.0025377128953771293, "loss": 0.7714, "step": 328 }, { "epoch": 0.7197156138911677, "grad_norm": 0.012087949551641941, "learning_rate": 0.002535279805352798, "loss": 0.9047, "step": 329 }, { "epoch": 0.7219031993437244, "grad_norm": 0.01940520666539669, "learning_rate": 0.002532846715328467, "loss": 0.7804, "step": 330 }, { "epoch": 0.7240907847962811, "grad_norm": 0.011884646490216255, "learning_rate": 0.0025304136253041366, "loss": 0.6859, "step": 331 }, { "epoch": 0.7262783702488378, "grad_norm": 0.02514353021979332, "learning_rate": 0.0025279805352798053, "loss": 0.7764, "step": 332 }, { "epoch": 0.7284659557013946, "grad_norm": 0.015074629336595535, "learning_rate": 0.0025255474452554744, "loss": 0.6756, "step": 333 }, { "epoch": 0.7306535411539513, "grad_norm": 0.036420077085494995, "learning_rate": 0.002523114355231144, "loss": 0.7407, "step": 334 }, { "epoch": 0.732841126606508, "grad_norm": 0.015621097758412361, "learning_rate": 0.0025206812652068126, "loss": 0.8072, "step": 335 }, { "epoch": 0.7350287120590648, "grad_norm": 0.010994632728397846, "learning_rate": 0.0025182481751824816, "loss": 0.9436, "step": 336 }, { "epoch": 0.7372162975116215, "grad_norm": 0.017064619809389114, "learning_rate": 0.002515815085158151, "loss": 0.9386, "step": 337 }, { "epoch": 0.7394038829641782, "grad_norm": 0.023198846727609634, "learning_rate": 0.00251338199513382, "loss": 0.7892, "step": 338 }, { "epoch": 0.7415914684167351, "grad_norm": 0.005636582616716623, "learning_rate": 0.002510948905109489, "loss": 0.8005, "step": 339 }, { "epoch": 0.7437790538692918, "grad_norm": 0.008022590540349483, "learning_rate": 0.0025085158150851584, "loss": 0.9142, "step": 340 }, { "epoch": 0.7459666393218485, "grad_norm": 0.013106726109981537, "learning_rate": 0.0025060827250608275, "loss": 0.6845, "step": 341 }, { "epoch": 0.7481542247744053, "grad_norm": 0.015878600999712944, "learning_rate": 0.002503649635036496, "loss": 0.8528, "step": 342 }, { "epoch": 0.750341810226962, "grad_norm": 0.013783195056021214, "learning_rate": 0.0025012165450121657, "loss": 0.8487, "step": 343 }, { "epoch": 0.7525293956795187, "grad_norm": 0.05050954222679138, "learning_rate": 0.002498783454987835, "loss": 0.9014, "step": 344 }, { "epoch": 0.7547169811320755, "grad_norm": 0.009747706353664398, "learning_rate": 0.0024963503649635035, "loss": 0.8331, "step": 345 }, { "epoch": 0.7569045665846322, "grad_norm": 0.27641791105270386, "learning_rate": 0.0024939172749391726, "loss": 0.8328, "step": 346 }, { "epoch": 0.7590921520371889, "grad_norm": 0.022615063935518265, "learning_rate": 0.002491484184914842, "loss": 1.025, "step": 347 }, { "epoch": 0.7612797374897456, "grad_norm": 0.018037477508187294, "learning_rate": 0.0024890510948905108, "loss": 0.8058, "step": 348 }, { "epoch": 0.7634673229423025, "grad_norm": 0.03229966387152672, "learning_rate": 0.00248661800486618, "loss": 0.8224, "step": 349 }, { "epoch": 0.7656549083948592, "grad_norm": 0.03468572720885277, "learning_rate": 0.0024841849148418494, "loss": 0.6558, "step": 350 }, { "epoch": 0.7678424938474159, "grad_norm": 0.04352645203471184, "learning_rate": 0.0024817518248175185, "loss": 0.7869, "step": 351 }, { "epoch": 0.7700300792999727, "grad_norm": 0.0520501509308815, "learning_rate": 0.002479318734793187, "loss": 0.8318, "step": 352 }, { "epoch": 0.7722176647525294, "grad_norm": 0.025180073454976082, "learning_rate": 0.0024768856447688566, "loss": 0.8454, "step": 353 }, { "epoch": 0.7744052502050861, "grad_norm": 0.013843162916600704, "learning_rate": 0.0024744525547445257, "loss": 0.979, "step": 354 }, { "epoch": 0.7765928356576429, "grad_norm": 0.026960408315062523, "learning_rate": 0.0024720194647201944, "loss": 0.7692, "step": 355 }, { "epoch": 0.7787804211101996, "grad_norm": 0.02509387582540512, "learning_rate": 0.002469586374695864, "loss": 0.7471, "step": 356 }, { "epoch": 0.7809680065627563, "grad_norm": 0.014011479914188385, "learning_rate": 0.002467153284671533, "loss": 0.7752, "step": 357 }, { "epoch": 0.7831555920153132, "grad_norm": 0.01862008310854435, "learning_rate": 0.0024647201946472017, "loss": 0.9891, "step": 358 }, { "epoch": 0.7853431774678699, "grad_norm": 0.01249686349183321, "learning_rate": 0.002462287104622871, "loss": 0.9046, "step": 359 }, { "epoch": 0.7875307629204266, "grad_norm": 0.018710242584347725, "learning_rate": 0.0024598540145985403, "loss": 0.7926, "step": 360 }, { "epoch": 0.7897183483729833, "grad_norm": 0.015550883486866951, "learning_rate": 0.0024574209245742094, "loss": 0.9209, "step": 361 }, { "epoch": 0.7919059338255401, "grad_norm": 0.011178571730852127, "learning_rate": 0.0024549878345498785, "loss": 0.7962, "step": 362 }, { "epoch": 0.7940935192780968, "grad_norm": 0.017678866162896156, "learning_rate": 0.0024525547445255476, "loss": 0.9532, "step": 363 }, { "epoch": 0.7962811047306535, "grad_norm": 0.021445617079734802, "learning_rate": 0.0024501216545012167, "loss": 0.8302, "step": 364 }, { "epoch": 0.7984686901832103, "grad_norm": 0.015537573955953121, "learning_rate": 0.0024476885644768858, "loss": 0.7665, "step": 365 }, { "epoch": 0.800656275635767, "grad_norm": 0.015302474610507488, "learning_rate": 0.002445255474452555, "loss": 0.7161, "step": 366 }, { "epoch": 0.8028438610883237, "grad_norm": 0.013649791479110718, "learning_rate": 0.002442822384428224, "loss": 0.6766, "step": 367 }, { "epoch": 0.8050314465408805, "grad_norm": 0.01138269528746605, "learning_rate": 0.002440389294403893, "loss": 0.7797, "step": 368 }, { "epoch": 0.8072190319934373, "grad_norm": 0.014025691896677017, "learning_rate": 0.002437956204379562, "loss": 0.779, "step": 369 }, { "epoch": 0.809406617445994, "grad_norm": 0.011000445112586021, "learning_rate": 0.002435523114355231, "loss": 0.8064, "step": 370 }, { "epoch": 0.8115942028985508, "grad_norm": 0.010309292934834957, "learning_rate": 0.0024330900243309003, "loss": 0.7252, "step": 371 }, { "epoch": 0.8137817883511075, "grad_norm": 0.007664249278604984, "learning_rate": 0.0024306569343065694, "loss": 0.7081, "step": 372 }, { "epoch": 0.8159693738036642, "grad_norm": 0.015154222957789898, "learning_rate": 0.0024282238442822385, "loss": 0.7869, "step": 373 }, { "epoch": 0.8181569592562209, "grad_norm": 0.01371028833091259, "learning_rate": 0.0024257907542579076, "loss": 0.7423, "step": 374 }, { "epoch": 0.8203445447087777, "grad_norm": 0.012794865295290947, "learning_rate": 0.0024233576642335767, "loss": 0.9341, "step": 375 }, { "epoch": 0.8225321301613344, "grad_norm": 0.011340939439833164, "learning_rate": 0.0024209245742092458, "loss": 1.0406, "step": 376 }, { "epoch": 0.8247197156138911, "grad_norm": 0.013491635210812092, "learning_rate": 0.002418491484184915, "loss": 0.763, "step": 377 }, { "epoch": 0.8269073010664479, "grad_norm": 0.008016029372811317, "learning_rate": 0.002416058394160584, "loss": 0.7132, "step": 378 }, { "epoch": 0.8290948865190046, "grad_norm": 0.011460046283900738, "learning_rate": 0.002413625304136253, "loss": 0.6306, "step": 379 }, { "epoch": 0.8312824719715614, "grad_norm": 0.0110190873965621, "learning_rate": 0.002411192214111922, "loss": 0.6944, "step": 380 }, { "epoch": 0.8334700574241182, "grad_norm": 0.008347691036760807, "learning_rate": 0.0024087591240875912, "loss": 0.8926, "step": 381 }, { "epoch": 0.8356576428766749, "grad_norm": 0.007940311916172504, "learning_rate": 0.0024063260340632603, "loss": 0.8666, "step": 382 }, { "epoch": 0.8378452283292316, "grad_norm": 0.011534546501934528, "learning_rate": 0.0024038929440389294, "loss": 0.9077, "step": 383 }, { "epoch": 0.8400328137817884, "grad_norm": 0.010218126699328423, "learning_rate": 0.0024014598540145985, "loss": 0.8393, "step": 384 }, { "epoch": 0.8422203992343451, "grad_norm": 0.01117737777531147, "learning_rate": 0.0023990267639902676, "loss": 0.8401, "step": 385 }, { "epoch": 0.8444079846869018, "grad_norm": 0.01495604682713747, "learning_rate": 0.0023965936739659367, "loss": 0.6524, "step": 386 }, { "epoch": 0.8465955701394585, "grad_norm": 0.01132154744118452, "learning_rate": 0.002394160583941606, "loss": 0.6973, "step": 387 }, { "epoch": 0.8487831555920153, "grad_norm": 0.016704557463526726, "learning_rate": 0.002391727493917275, "loss": 0.8638, "step": 388 }, { "epoch": 0.850970741044572, "grad_norm": 0.03163198381662369, "learning_rate": 0.002389294403892944, "loss": 0.6569, "step": 389 }, { "epoch": 0.8531583264971287, "grad_norm": 0.009892611764371395, "learning_rate": 0.002386861313868613, "loss": 0.8507, "step": 390 }, { "epoch": 0.8553459119496856, "grad_norm": 0.009704566560685635, "learning_rate": 0.002384428223844282, "loss": 0.7567, "step": 391 }, { "epoch": 0.8575334974022423, "grad_norm": 0.011233623139560223, "learning_rate": 0.0023819951338199512, "loss": 0.9072, "step": 392 }, { "epoch": 0.859721082854799, "grad_norm": 0.017818894237279892, "learning_rate": 0.0023795620437956208, "loss": 0.6716, "step": 393 }, { "epoch": 0.8619086683073558, "grad_norm": 0.009800358675420284, "learning_rate": 0.0023771289537712894, "loss": 0.6331, "step": 394 }, { "epoch": 0.8640962537599125, "grad_norm": 0.00855625793337822, "learning_rate": 0.0023746958637469585, "loss": 0.8208, "step": 395 }, { "epoch": 0.8662838392124692, "grad_norm": 0.007912772707641125, "learning_rate": 0.002372262773722628, "loss": 0.6897, "step": 396 }, { "epoch": 0.868471424665026, "grad_norm": 0.015991948544979095, "learning_rate": 0.0023698296836982967, "loss": 0.5838, "step": 397 }, { "epoch": 0.8706590101175827, "grad_norm": 0.013330014422535896, "learning_rate": 0.002367396593673966, "loss": 0.7765, "step": 398 }, { "epoch": 0.8728465955701394, "grad_norm": 0.0108262337744236, "learning_rate": 0.0023649635036496353, "loss": 0.8259, "step": 399 }, { "epoch": 0.8750341810226961, "grad_norm": 0.01277016382664442, "learning_rate": 0.002362530413625304, "loss": 0.5084, "step": 400 }, { "epoch": 0.877221766475253, "grad_norm": 0.00825558416545391, "learning_rate": 0.002360097323600973, "loss": 0.8388, "step": 401 }, { "epoch": 0.8794093519278097, "grad_norm": 0.008703862316906452, "learning_rate": 0.0023576642335766426, "loss": 0.889, "step": 402 }, { "epoch": 0.8815969373803664, "grad_norm": 0.009978721849620342, "learning_rate": 0.0023552311435523117, "loss": 0.7724, "step": 403 }, { "epoch": 0.8837845228329232, "grad_norm": 0.009193633683025837, "learning_rate": 0.0023527980535279804, "loss": 0.9257, "step": 404 }, { "epoch": 0.8859721082854799, "grad_norm": 0.009905806742608547, "learning_rate": 0.00235036496350365, "loss": 0.9046, "step": 405 }, { "epoch": 0.8881596937380366, "grad_norm": 0.0108295027166605, "learning_rate": 0.002347931873479319, "loss": 0.6427, "step": 406 }, { "epoch": 0.8903472791905934, "grad_norm": 0.010898306965827942, "learning_rate": 0.0023454987834549876, "loss": 0.6888, "step": 407 }, { "epoch": 0.8925348646431501, "grad_norm": 0.013794617727398872, "learning_rate": 0.002343065693430657, "loss": 0.8544, "step": 408 }, { "epoch": 0.8947224500957068, "grad_norm": 0.014423336833715439, "learning_rate": 0.0023406326034063262, "loss": 0.7525, "step": 409 }, { "epoch": 0.8969100355482637, "grad_norm": 0.010249799117445946, "learning_rate": 0.002338199513381995, "loss": 0.7588, "step": 410 }, { "epoch": 0.8990976210008204, "grad_norm": 0.014359788969159126, "learning_rate": 0.0023357664233576644, "loss": 0.8303, "step": 411 }, { "epoch": 0.9012852064533771, "grad_norm": 0.007848945446312428, "learning_rate": 0.0023333333333333335, "loss": 0.7478, "step": 412 }, { "epoch": 0.9034727919059339, "grad_norm": 0.010217231698334217, "learning_rate": 0.0023309002433090026, "loss": 0.8758, "step": 413 }, { "epoch": 0.9056603773584906, "grad_norm": 0.008166585117578506, "learning_rate": 0.0023284671532846717, "loss": 0.8669, "step": 414 }, { "epoch": 0.9078479628110473, "grad_norm": 0.08122234046459198, "learning_rate": 0.002326034063260341, "loss": 0.8672, "step": 415 }, { "epoch": 0.910035548263604, "grad_norm": 0.026630746200680733, "learning_rate": 0.00232360097323601, "loss": 0.8429, "step": 416 }, { "epoch": 0.9122231337161608, "grad_norm": 0.011199391447007656, "learning_rate": 0.002321167883211679, "loss": 0.7394, "step": 417 }, { "epoch": 0.9144107191687175, "grad_norm": 0.034359946846961975, "learning_rate": 0.002318734793187348, "loss": 0.757, "step": 418 }, { "epoch": 0.9165983046212742, "grad_norm": 0.007310883607715368, "learning_rate": 0.002316301703163017, "loss": 0.8614, "step": 419 }, { "epoch": 0.918785890073831, "grad_norm": 0.017180046066641808, "learning_rate": 0.002313868613138686, "loss": 0.7018, "step": 420 }, { "epoch": 0.9209734755263878, "grad_norm": 0.010772480629384518, "learning_rate": 0.0023114355231143554, "loss": 1.0247, "step": 421 }, { "epoch": 0.9231610609789445, "grad_norm": 0.013757293112576008, "learning_rate": 0.0023090024330900244, "loss": 0.7243, "step": 422 }, { "epoch": 0.9253486464315013, "grad_norm": 0.010658146813511848, "learning_rate": 0.0023065693430656935, "loss": 0.8289, "step": 423 }, { "epoch": 0.927536231884058, "grad_norm": 0.013902239501476288, "learning_rate": 0.0023041362530413626, "loss": 0.7706, "step": 424 }, { "epoch": 0.9297238173366147, "grad_norm": 0.011173736304044724, "learning_rate": 0.0023017031630170317, "loss": 0.8055, "step": 425 }, { "epoch": 0.9319114027891715, "grad_norm": 0.011386138387024403, "learning_rate": 0.002299270072992701, "loss": 0.6273, "step": 426 }, { "epoch": 0.9340989882417282, "grad_norm": 0.008862471207976341, "learning_rate": 0.00229683698296837, "loss": 0.7032, "step": 427 }, { "epoch": 0.9362865736942849, "grad_norm": 0.02106628008186817, "learning_rate": 0.002294403892944039, "loss": 0.7835, "step": 428 }, { "epoch": 0.9384741591468416, "grad_norm": 0.010091581381857395, "learning_rate": 0.002291970802919708, "loss": 0.6805, "step": 429 }, { "epoch": 0.9406617445993984, "grad_norm": 0.012447184883058071, "learning_rate": 0.002289537712895377, "loss": 0.7323, "step": 430 }, { "epoch": 0.9428493300519551, "grad_norm": 0.015980314463377, "learning_rate": 0.0022871046228710463, "loss": 0.8842, "step": 431 }, { "epoch": 0.9450369155045119, "grad_norm": 0.007705094758421183, "learning_rate": 0.0022846715328467154, "loss": 0.8907, "step": 432 }, { "epoch": 0.9472245009570687, "grad_norm": 0.00878717191517353, "learning_rate": 0.0022822384428223845, "loss": 0.7455, "step": 433 }, { "epoch": 0.9494120864096254, "grad_norm": 0.026101326569914818, "learning_rate": 0.0022798053527980536, "loss": 0.6827, "step": 434 }, { "epoch": 0.9515996718621821, "grad_norm": 0.008718657307326794, "learning_rate": 0.0022773722627737226, "loss": 0.9253, "step": 435 }, { "epoch": 0.9537872573147389, "grad_norm": 0.009151890873908997, "learning_rate": 0.0022749391727493917, "loss": 0.8735, "step": 436 }, { "epoch": 0.9559748427672956, "grad_norm": 0.012189007364213467, "learning_rate": 0.002272506082725061, "loss": 0.94, "step": 437 }, { "epoch": 0.9581624282198523, "grad_norm": 0.00890439935028553, "learning_rate": 0.00227007299270073, "loss": 0.7572, "step": 438 }, { "epoch": 0.9603500136724091, "grad_norm": 0.013200386427342892, "learning_rate": 0.002267639902676399, "loss": 0.7361, "step": 439 }, { "epoch": 0.9625375991249658, "grad_norm": 0.011736634187400341, "learning_rate": 0.002265206812652068, "loss": 0.6326, "step": 440 }, { "epoch": 0.9647251845775225, "grad_norm": 0.006781425327062607, "learning_rate": 0.002262773722627737, "loss": 0.7254, "step": 441 }, { "epoch": 0.9669127700300792, "grad_norm": 0.008296315558254719, "learning_rate": 0.0022603406326034063, "loss": 0.6898, "step": 442 }, { "epoch": 0.9691003554826361, "grad_norm": 0.008293522521853447, "learning_rate": 0.0022579075425790754, "loss": 0.7953, "step": 443 }, { "epoch": 0.9712879409351928, "grad_norm": 0.00848364643752575, "learning_rate": 0.0022554744525547445, "loss": 0.8203, "step": 444 }, { "epoch": 0.9734755263877495, "grad_norm": 0.012193895876407623, "learning_rate": 0.002253041362530414, "loss": 0.6794, "step": 445 }, { "epoch": 0.9756631118403063, "grad_norm": 0.018784867599606514, "learning_rate": 0.0022506082725060827, "loss": 0.5793, "step": 446 }, { "epoch": 0.977850697292863, "grad_norm": 0.008517356589436531, "learning_rate": 0.0022481751824817518, "loss": 0.4866, "step": 447 }, { "epoch": 0.9800382827454197, "grad_norm": 0.017300793901085854, "learning_rate": 0.0022457420924574213, "loss": 0.8304, "step": 448 }, { "epoch": 0.9822258681979765, "grad_norm": 0.010441828519105911, "learning_rate": 0.00224330900243309, "loss": 0.9823, "step": 449 }, { "epoch": 0.9844134536505332, "grad_norm": 0.013992452062666416, "learning_rate": 0.002240875912408759, "loss": 0.7828, "step": 450 }, { "epoch": 0.9866010391030899, "grad_norm": 0.006943755783140659, "learning_rate": 0.0022384428223844286, "loss": 0.6205, "step": 451 }, { "epoch": 0.9887886245556468, "grad_norm": 0.0063702561892569065, "learning_rate": 0.0022360097323600972, "loss": 1.0355, "step": 452 }, { "epoch": 0.9909762100082035, "grad_norm": 0.007510766386985779, "learning_rate": 0.0022335766423357663, "loss": 0.7581, "step": 453 }, { "epoch": 0.9931637954607602, "grad_norm": 0.010165141895413399, "learning_rate": 0.002231143552311436, "loss": 0.8831, "step": 454 }, { "epoch": 0.9953513809133169, "grad_norm": 0.012972669675946236, "learning_rate": 0.002228710462287105, "loss": 0.6523, "step": 455 }, { "epoch": 0.9975389663658737, "grad_norm": 0.007454239297658205, "learning_rate": 0.0022262773722627736, "loss": 0.8721, "step": 456 }, { "epoch": 0.9997265518184304, "grad_norm": 0.007078221533447504, "learning_rate": 0.0022238442822384427, "loss": 0.6737, "step": 457 }, { "epoch": 1.0019141372709872, "grad_norm": 0.021942665800452232, "learning_rate": 0.002221411192214112, "loss": 0.8231, "step": 458 }, { "epoch": 1.0041017227235438, "grad_norm": 0.019108066335320473, "learning_rate": 0.002218978102189781, "loss": 0.6809, "step": 459 }, { "epoch": 1.0062893081761006, "grad_norm": 0.013495873659849167, "learning_rate": 0.00221654501216545, "loss": 0.7663, "step": 460 }, { "epoch": 1.0084768936286574, "grad_norm": 0.009844646789133549, "learning_rate": 0.0022141119221411195, "loss": 0.8189, "step": 461 }, { "epoch": 1.010664479081214, "grad_norm": 0.008135687559843063, "learning_rate": 0.002211678832116788, "loss": 0.7935, "step": 462 }, { "epoch": 1.0128520645337709, "grad_norm": 0.01022945623844862, "learning_rate": 0.0022092457420924572, "loss": 0.7855, "step": 463 }, { "epoch": 1.0150396499863277, "grad_norm": 0.011145783588290215, "learning_rate": 0.0022068126520681268, "loss": 0.9334, "step": 464 }, { "epoch": 1.0172272354388843, "grad_norm": 0.014914394356310368, "learning_rate": 0.002204379562043796, "loss": 0.8769, "step": 465 }, { "epoch": 1.019414820891441, "grad_norm": 0.010317330248653889, "learning_rate": 0.0022019464720194645, "loss": 0.9083, "step": 466 }, { "epoch": 1.021602406343998, "grad_norm": 0.012516210786998272, "learning_rate": 0.002199513381995134, "loss": 0.7169, "step": 467 }, { "epoch": 1.0237899917965545, "grad_norm": 0.015528671443462372, "learning_rate": 0.002197080291970803, "loss": 0.6738, "step": 468 }, { "epoch": 1.0259775772491113, "grad_norm": 0.007066753227263689, "learning_rate": 0.002194647201946472, "loss": 0.5918, "step": 469 }, { "epoch": 1.0281651627016681, "grad_norm": 0.007939637638628483, "learning_rate": 0.0021922141119221413, "loss": 0.6588, "step": 470 }, { "epoch": 1.0303527481542247, "grad_norm": 0.007144363131374121, "learning_rate": 0.0021897810218978104, "loss": 0.4427, "step": 471 }, { "epoch": 1.0325403336067815, "grad_norm": 0.007886086590588093, "learning_rate": 0.002187347931873479, "loss": 0.7392, "step": 472 }, { "epoch": 1.0347279190593381, "grad_norm": 0.007826312445104122, "learning_rate": 0.0021849148418491486, "loss": 0.743, "step": 473 }, { "epoch": 1.036915504511895, "grad_norm": 0.007945370860397816, "learning_rate": 0.0021824817518248177, "loss": 0.6567, "step": 474 }, { "epoch": 1.0391030899644518, "grad_norm": 0.009234143421053886, "learning_rate": 0.0021800486618004863, "loss": 0.8079, "step": 475 }, { "epoch": 1.0412906754170084, "grad_norm": 0.011828969232738018, "learning_rate": 0.002177615571776156, "loss": 0.7132, "step": 476 }, { "epoch": 1.0434782608695652, "grad_norm": 0.008681892417371273, "learning_rate": 0.002175182481751825, "loss": 0.8417, "step": 477 }, { "epoch": 1.045665846322122, "grad_norm": 0.008761374279856682, "learning_rate": 0.002172749391727494, "loss": 0.7446, "step": 478 }, { "epoch": 1.0478534317746786, "grad_norm": 0.014171335846185684, "learning_rate": 0.002170316301703163, "loss": 0.739, "step": 479 }, { "epoch": 1.0500410172272354, "grad_norm": 0.011624401435256004, "learning_rate": 0.0021678832116788322, "loss": 0.8935, "step": 480 }, { "epoch": 1.0522286026797922, "grad_norm": 0.019760416820645332, "learning_rate": 0.0021654501216545013, "loss": 0.9159, "step": 481 }, { "epoch": 1.0544161881323488, "grad_norm": 0.0076353419572114944, "learning_rate": 0.0021630170316301704, "loss": 0.8153, "step": 482 }, { "epoch": 1.0566037735849056, "grad_norm": 0.009698878973722458, "learning_rate": 0.0021605839416058395, "loss": 0.8043, "step": 483 }, { "epoch": 1.0587913590374625, "grad_norm": 0.007674135267734528, "learning_rate": 0.0021581508515815086, "loss": 0.6816, "step": 484 }, { "epoch": 1.060978944490019, "grad_norm": 0.01642732322216034, "learning_rate": 0.0021557177615571777, "loss": 0.9525, "step": 485 }, { "epoch": 1.0631665299425759, "grad_norm": 0.016669275239109993, "learning_rate": 0.002153284671532847, "loss": 0.5482, "step": 486 }, { "epoch": 1.0653541153951327, "grad_norm": 0.012565388344228268, "learning_rate": 0.002150851581508516, "loss": 0.6211, "step": 487 }, { "epoch": 1.0675417008476893, "grad_norm": 0.01363010797649622, "learning_rate": 0.002148418491484185, "loss": 0.5152, "step": 488 }, { "epoch": 1.069729286300246, "grad_norm": 0.020599598065018654, "learning_rate": 0.002145985401459854, "loss": 0.8035, "step": 489 }, { "epoch": 1.071916871752803, "grad_norm": 0.013294585980474949, "learning_rate": 0.002143552311435523, "loss": 0.8999, "step": 490 }, { "epoch": 1.0741044572053595, "grad_norm": 0.038667161017656326, "learning_rate": 0.0021411192214111923, "loss": 0.7514, "step": 491 }, { "epoch": 1.0762920426579163, "grad_norm": 0.010547326877713203, "learning_rate": 0.0021386861313868613, "loss": 0.6819, "step": 492 }, { "epoch": 1.0784796281104732, "grad_norm": 0.009484006091952324, "learning_rate": 0.0021362530413625304, "loss": 0.6253, "step": 493 }, { "epoch": 1.0806672135630297, "grad_norm": 0.009657086804509163, "learning_rate": 0.0021338199513381995, "loss": 0.7112, "step": 494 }, { "epoch": 1.0828547990155866, "grad_norm": 0.01714419014751911, "learning_rate": 0.0021313868613138686, "loss": 0.9098, "step": 495 }, { "epoch": 1.0850423844681434, "grad_norm": 0.01343261357396841, "learning_rate": 0.0021289537712895377, "loss": 0.7902, "step": 496 }, { "epoch": 1.0872299699207, "grad_norm": 0.00883649941533804, "learning_rate": 0.002126520681265207, "loss": 0.9971, "step": 497 }, { "epoch": 1.0894175553732568, "grad_norm": 0.00613701157271862, "learning_rate": 0.002124087591240876, "loss": 0.7527, "step": 498 }, { "epoch": 1.0916051408258136, "grad_norm": 0.009846502915024757, "learning_rate": 0.002121654501216545, "loss": 0.7402, "step": 499 }, { "epoch": 1.0937927262783702, "grad_norm": 0.010731893591582775, "learning_rate": 0.002119221411192214, "loss": 0.7848, "step": 500 }, { "epoch": 1.095980311730927, "grad_norm": 0.011895066127181053, "learning_rate": 0.002116788321167883, "loss": 0.7164, "step": 501 }, { "epoch": 1.0981678971834836, "grad_norm": 0.007519803941249847, "learning_rate": 0.0021143552311435523, "loss": 0.9606, "step": 502 }, { "epoch": 1.1003554826360404, "grad_norm": 0.009692378342151642, "learning_rate": 0.0021119221411192214, "loss": 0.7633, "step": 503 }, { "epoch": 1.1025430680885973, "grad_norm": 0.011364142410457134, "learning_rate": 0.0021094890510948905, "loss": 0.6945, "step": 504 }, { "epoch": 1.1047306535411539, "grad_norm": 0.007994066923856735, "learning_rate": 0.0021070559610705595, "loss": 0.6423, "step": 505 }, { "epoch": 1.1069182389937107, "grad_norm": 0.02612650953233242, "learning_rate": 0.0021046228710462286, "loss": 0.8676, "step": 506 }, { "epoch": 1.1091058244462675, "grad_norm": 0.007825646549463272, "learning_rate": 0.002102189781021898, "loss": 0.5687, "step": 507 }, { "epoch": 1.111293409898824, "grad_norm": 0.008077848702669144, "learning_rate": 0.002099756690997567, "loss": 0.7509, "step": 508 }, { "epoch": 1.113480995351381, "grad_norm": 0.009620738215744495, "learning_rate": 0.002097323600973236, "loss": 0.5996, "step": 509 }, { "epoch": 1.1156685808039377, "grad_norm": 0.0255615022033453, "learning_rate": 0.0020948905109489054, "loss": 0.6696, "step": 510 }, { "epoch": 1.1178561662564943, "grad_norm": 0.010550931096076965, "learning_rate": 0.002092457420924574, "loss": 0.7019, "step": 511 }, { "epoch": 1.1200437517090511, "grad_norm": 0.028004566207528114, "learning_rate": 0.002090024330900243, "loss": 0.8809, "step": 512 }, { "epoch": 1.122231337161608, "grad_norm": 0.013075259514153004, "learning_rate": 0.0020875912408759127, "loss": 0.6108, "step": 513 }, { "epoch": 1.1244189226141645, "grad_norm": 0.015426448546350002, "learning_rate": 0.0020851581508515814, "loss": 0.7146, "step": 514 }, { "epoch": 1.1266065080667214, "grad_norm": 0.007735779043287039, "learning_rate": 0.0020827250608272505, "loss": 0.8517, "step": 515 }, { "epoch": 1.1287940935192782, "grad_norm": 0.012412245385348797, "learning_rate": 0.00208029197080292, "loss": 0.6694, "step": 516 }, { "epoch": 1.1309816789718348, "grad_norm": 0.009669258259236813, "learning_rate": 0.0020778588807785887, "loss": 0.612, "step": 517 }, { "epoch": 1.1331692644243916, "grad_norm": 0.010346516966819763, "learning_rate": 0.0020754257907542577, "loss": 0.7956, "step": 518 }, { "epoch": 1.1353568498769484, "grad_norm": 0.008683484978973866, "learning_rate": 0.0020729927007299273, "loss": 0.7012, "step": 519 }, { "epoch": 1.137544435329505, "grad_norm": 0.009093291126191616, "learning_rate": 0.0020705596107055964, "loss": 0.6406, "step": 520 }, { "epoch": 1.1397320207820618, "grad_norm": 0.019143717363476753, "learning_rate": 0.002068126520681265, "loss": 0.6632, "step": 521 }, { "epoch": 1.1419196062346186, "grad_norm": 0.008810199797153473, "learning_rate": 0.0020656934306569345, "loss": 0.6248, "step": 522 }, { "epoch": 1.1441071916871752, "grad_norm": 0.009826627559959888, "learning_rate": 0.0020632603406326036, "loss": 0.7367, "step": 523 }, { "epoch": 1.146294777139732, "grad_norm": 0.007178613916039467, "learning_rate": 0.0020608272506082723, "loss": 0.6688, "step": 524 }, { "epoch": 1.1484823625922886, "grad_norm": 0.00853504054248333, "learning_rate": 0.002058394160583942, "loss": 0.6802, "step": 525 }, { "epoch": 1.1506699480448455, "grad_norm": 0.011418921872973442, "learning_rate": 0.002055961070559611, "loss": 0.5832, "step": 526 }, { "epoch": 1.1528575334974023, "grad_norm": 0.015032613649964333, "learning_rate": 0.0020535279805352796, "loss": 0.6841, "step": 527 }, { "epoch": 1.155045118949959, "grad_norm": 0.008302520960569382, "learning_rate": 0.002051094890510949, "loss": 0.7869, "step": 528 }, { "epoch": 1.1572327044025157, "grad_norm": 0.006403745152056217, "learning_rate": 0.002048661800486618, "loss": 0.7054, "step": 529 }, { "epoch": 1.1594202898550725, "grad_norm": 0.00577664515003562, "learning_rate": 0.0020462287104622873, "loss": 0.8063, "step": 530 }, { "epoch": 1.161607875307629, "grad_norm": 0.011647713370621204, "learning_rate": 0.002043795620437956, "loss": 0.7921, "step": 531 }, { "epoch": 1.163795460760186, "grad_norm": 0.011479120701551437, "learning_rate": 0.0020413625304136255, "loss": 0.9256, "step": 532 }, { "epoch": 1.1659830462127427, "grad_norm": 0.007622700184583664, "learning_rate": 0.0020389294403892946, "loss": 0.722, "step": 533 }, { "epoch": 1.1681706316652993, "grad_norm": 0.0064216419123113155, "learning_rate": 0.0020364963503649632, "loss": 0.6979, "step": 534 }, { "epoch": 1.1703582171178561, "grad_norm": 0.007917587645351887, "learning_rate": 0.0020340632603406327, "loss": 0.8049, "step": 535 }, { "epoch": 1.172545802570413, "grad_norm": 0.0061738938093185425, "learning_rate": 0.002031630170316302, "loss": 0.7057, "step": 536 }, { "epoch": 1.1747333880229696, "grad_norm": 0.0060928682796657085, "learning_rate": 0.0020291970802919705, "loss": 0.8, "step": 537 }, { "epoch": 1.1769209734755264, "grad_norm": 0.00664818799123168, "learning_rate": 0.00202676399026764, "loss": 0.7944, "step": 538 }, { "epoch": 1.1791085589280832, "grad_norm": 0.027486886829137802, "learning_rate": 0.002024330900243309, "loss": 0.8446, "step": 539 }, { "epoch": 1.1812961443806398, "grad_norm": 0.01736626587808132, "learning_rate": 0.002021897810218978, "loss": 0.8303, "step": 540 }, { "epoch": 1.1834837298331966, "grad_norm": 0.0084115294739604, "learning_rate": 0.0020194647201946473, "loss": 0.7323, "step": 541 }, { "epoch": 1.1856713152857534, "grad_norm": 0.01464123371988535, "learning_rate": 0.0020170316301703164, "loss": 0.8395, "step": 542 }, { "epoch": 1.18785890073831, "grad_norm": 0.007480619940906763, "learning_rate": 0.0020145985401459855, "loss": 0.7309, "step": 543 }, { "epoch": 1.1900464861908668, "grad_norm": 0.014315255917608738, "learning_rate": 0.0020121654501216546, "loss": 0.6468, "step": 544 }, { "epoch": 1.1922340716434237, "grad_norm": 0.009927434846758842, "learning_rate": 0.0020097323600973237, "loss": 0.7544, "step": 545 }, { "epoch": 1.1944216570959802, "grad_norm": 0.019481701776385307, "learning_rate": 0.0020072992700729928, "loss": 0.8124, "step": 546 }, { "epoch": 1.196609242548537, "grad_norm": 0.007046518847346306, "learning_rate": 0.002004866180048662, "loss": 0.6582, "step": 547 }, { "epoch": 1.1987968280010939, "grad_norm": 0.012643888592720032, "learning_rate": 0.002002433090024331, "loss": 0.8098, "step": 548 }, { "epoch": 1.2009844134536505, "grad_norm": 0.008585029281675816, "learning_rate": 0.002, "loss": 0.7206, "step": 549 }, { "epoch": 1.2031719989062073, "grad_norm": 0.014269394800066948, "learning_rate": 0.001997566909975669, "loss": 0.8426, "step": 550 }, { "epoch": 1.2053595843587641, "grad_norm": 0.006986747495830059, "learning_rate": 0.0019951338199513382, "loss": 0.7793, "step": 551 }, { "epoch": 1.2075471698113207, "grad_norm": 0.014269756153225899, "learning_rate": 0.0019927007299270073, "loss": 0.7668, "step": 552 }, { "epoch": 1.2097347552638775, "grad_norm": 0.009506807662546635, "learning_rate": 0.0019902676399026764, "loss": 0.771, "step": 553 }, { "epoch": 1.2119223407164341, "grad_norm": 0.008203186094760895, "learning_rate": 0.0019878345498783455, "loss": 0.8037, "step": 554 }, { "epoch": 1.214109926168991, "grad_norm": 0.01714324578642845, "learning_rate": 0.0019854014598540146, "loss": 0.66, "step": 555 }, { "epoch": 1.2162975116215478, "grad_norm": 0.01466370839625597, "learning_rate": 0.0019829683698296837, "loss": 0.8761, "step": 556 }, { "epoch": 1.2184850970741046, "grad_norm": 0.049504704773426056, "learning_rate": 0.0019805352798053528, "loss": 0.7717, "step": 557 }, { "epoch": 1.2206726825266612, "grad_norm": 0.010891391895711422, "learning_rate": 0.001978102189781022, "loss": 0.7754, "step": 558 }, { "epoch": 1.222860267979218, "grad_norm": 0.007297700271010399, "learning_rate": 0.001975669099756691, "loss": 0.882, "step": 559 }, { "epoch": 1.2250478534317746, "grad_norm": 0.010113504715263844, "learning_rate": 0.00197323600973236, "loss": 0.7514, "step": 560 }, { "epoch": 1.2272354388843314, "grad_norm": 0.0076246317476034164, "learning_rate": 0.001970802919708029, "loss": 0.9311, "step": 561 }, { "epoch": 1.2294230243368882, "grad_norm": 0.010274101980030537, "learning_rate": 0.0019683698296836987, "loss": 0.9348, "step": 562 }, { "epoch": 1.2316106097894448, "grad_norm": 0.007466154173016548, "learning_rate": 0.0019659367396593673, "loss": 0.6847, "step": 563 }, { "epoch": 1.2337981952420016, "grad_norm": 0.012906615622341633, "learning_rate": 0.0019635036496350364, "loss": 0.9068, "step": 564 }, { "epoch": 1.2359857806945584, "grad_norm": 0.008850296027958393, "learning_rate": 0.001961070559610706, "loss": 0.9032, "step": 565 }, { "epoch": 1.238173366147115, "grad_norm": 0.009153778664767742, "learning_rate": 0.0019586374695863746, "loss": 0.7872, "step": 566 }, { "epoch": 1.2403609515996719, "grad_norm": 0.014177209697663784, "learning_rate": 0.0019562043795620437, "loss": 0.7902, "step": 567 }, { "epoch": 1.2425485370522287, "grad_norm": 0.008819716051220894, "learning_rate": 0.001953771289537713, "loss": 0.5116, "step": 568 }, { "epoch": 1.2447361225047853, "grad_norm": 0.012600511312484741, "learning_rate": 0.001951338199513382, "loss": 0.8224, "step": 569 }, { "epoch": 1.246923707957342, "grad_norm": 0.012330558151006699, "learning_rate": 0.001948905109489051, "loss": 0.6959, "step": 570 }, { "epoch": 1.249111293409899, "grad_norm": 0.013719186186790466, "learning_rate": 0.00194647201946472, "loss": 0.8555, "step": 571 }, { "epoch": 1.2512988788624555, "grad_norm": 0.019239958375692368, "learning_rate": 0.0019440389294403894, "loss": 0.8459, "step": 572 }, { "epoch": 1.2534864643150123, "grad_norm": 0.00825503934174776, "learning_rate": 0.0019416058394160585, "loss": 0.6807, "step": 573 }, { "epoch": 1.2556740497675691, "grad_norm": 0.00811754260212183, "learning_rate": 0.0019391727493917273, "loss": 0.661, "step": 574 }, { "epoch": 1.2578616352201257, "grad_norm": 0.009656975045800209, "learning_rate": 0.0019367396593673967, "loss": 0.693, "step": 575 }, { "epoch": 1.2600492206726825, "grad_norm": 0.01010841503739357, "learning_rate": 0.0019343065693430658, "loss": 0.7331, "step": 576 }, { "epoch": 1.2622368061252391, "grad_norm": 0.01344444788992405, "learning_rate": 0.0019318734793187346, "loss": 0.89, "step": 577 }, { "epoch": 1.264424391577796, "grad_norm": 0.009256028570234776, "learning_rate": 0.001929440389294404, "loss": 0.7227, "step": 578 }, { "epoch": 1.2666119770303528, "grad_norm": 0.009699441492557526, "learning_rate": 0.001927007299270073, "loss": 0.6758, "step": 579 }, { "epoch": 1.2687995624829096, "grad_norm": 0.013547690585255623, "learning_rate": 0.001924574209245742, "loss": 0.8159, "step": 580 }, { "epoch": 1.2709871479354662, "grad_norm": 0.011569716967642307, "learning_rate": 0.0019221411192214114, "loss": 0.7126, "step": 581 }, { "epoch": 1.273174733388023, "grad_norm": 0.009194127283990383, "learning_rate": 0.0019197080291970803, "loss": 0.8327, "step": 582 }, { "epoch": 1.2753623188405796, "grad_norm": 0.01622292585670948, "learning_rate": 0.0019172749391727494, "loss": 0.8118, "step": 583 }, { "epoch": 1.2775499042931364, "grad_norm": 0.016841020435094833, "learning_rate": 0.0019148418491484187, "loss": 0.8746, "step": 584 }, { "epoch": 1.2797374897456932, "grad_norm": 0.011160912923514843, "learning_rate": 0.0019124087591240876, "loss": 0.7846, "step": 585 }, { "epoch": 1.28192507519825, "grad_norm": 0.013098710216581821, "learning_rate": 0.0019099756690997567, "loss": 0.666, "step": 586 }, { "epoch": 1.2841126606508066, "grad_norm": 0.008245709352195263, "learning_rate": 0.001907542579075426, "loss": 0.7799, "step": 587 }, { "epoch": 1.2863002461033635, "grad_norm": 0.005503001157194376, "learning_rate": 0.0019051094890510949, "loss": 0.605, "step": 588 }, { "epoch": 1.28848783155592, "grad_norm": 0.014160554856061935, "learning_rate": 0.001902676399026764, "loss": 0.7715, "step": 589 }, { "epoch": 1.2906754170084769, "grad_norm": 0.06220156326889992, "learning_rate": 0.0019002433090024333, "loss": 1.0173, "step": 590 }, { "epoch": 1.2928630024610337, "grad_norm": 0.023459481075406075, "learning_rate": 0.0018978102189781021, "loss": 0.7195, "step": 591 }, { "epoch": 1.2950505879135905, "grad_norm": 0.02028430998325348, "learning_rate": 0.0018953771289537712, "loss": 0.8889, "step": 592 }, { "epoch": 1.297238173366147, "grad_norm": 0.007861199788749218, "learning_rate": 0.0018929440389294405, "loss": 0.8249, "step": 593 }, { "epoch": 1.299425758818704, "grad_norm": 0.008794757537543774, "learning_rate": 0.0018905109489051096, "loss": 0.8978, "step": 594 }, { "epoch": 1.3016133442712605, "grad_norm": 0.027899743989109993, "learning_rate": 0.0018880778588807785, "loss": 0.8259, "step": 595 }, { "epoch": 1.3038009297238173, "grad_norm": 0.006755333859473467, "learning_rate": 0.0018856447688564478, "loss": 0.8913, "step": 596 }, { "epoch": 1.3059885151763742, "grad_norm": 0.016409730538725853, "learning_rate": 0.001883211678832117, "loss": 0.7902, "step": 597 }, { "epoch": 1.3081761006289307, "grad_norm": 0.012431084178388119, "learning_rate": 0.0018807785888077858, "loss": 0.5474, "step": 598 }, { "epoch": 1.3103636860814876, "grad_norm": 0.0099630793556571, "learning_rate": 0.001878345498783455, "loss": 0.7595, "step": 599 }, { "epoch": 1.3125512715340442, "grad_norm": 0.027248527854681015, "learning_rate": 0.0018759124087591242, "loss": 1.0273, "step": 600 }, { "epoch": 1.314738856986601, "grad_norm": 0.008029641583561897, "learning_rate": 0.001873479318734793, "loss": 0.6951, "step": 601 }, { "epoch": 1.3169264424391578, "grad_norm": 0.011218305677175522, "learning_rate": 0.0018710462287104626, "loss": 0.9217, "step": 602 }, { "epoch": 1.3191140278917146, "grad_norm": 0.024159464985132217, "learning_rate": 0.0018686131386861315, "loss": 0.7839, "step": 603 }, { "epoch": 1.3213016133442712, "grad_norm": 0.01127669122070074, "learning_rate": 0.0018661800486618006, "loss": 0.6711, "step": 604 }, { "epoch": 1.323489198796828, "grad_norm": 0.014322164468467236, "learning_rate": 0.0018637469586374699, "loss": 0.8935, "step": 605 }, { "epoch": 1.3256767842493846, "grad_norm": 0.010018724948167801, "learning_rate": 0.0018613138686131387, "loss": 0.7622, "step": 606 }, { "epoch": 1.3278643697019414, "grad_norm": 0.02816806361079216, "learning_rate": 0.0018588807785888078, "loss": 0.8948, "step": 607 }, { "epoch": 1.3300519551544983, "grad_norm": 0.011105911806225777, "learning_rate": 0.0018564476885644767, "loss": 0.754, "step": 608 }, { "epoch": 1.332239540607055, "grad_norm": 0.007195697631686926, "learning_rate": 0.001854014598540146, "loss": 0.6923, "step": 609 }, { "epoch": 1.3344271260596117, "grad_norm": 0.010149553418159485, "learning_rate": 0.001851581508515815, "loss": 0.8129, "step": 610 }, { "epoch": 1.3366147115121685, "grad_norm": 0.006798075046390295, "learning_rate": 0.001849148418491484, "loss": 0.5858, "step": 611 }, { "epoch": 1.338802296964725, "grad_norm": 0.006904991343617439, "learning_rate": 0.0018467153284671533, "loss": 0.7058, "step": 612 }, { "epoch": 1.340989882417282, "grad_norm": 0.019244657829403877, "learning_rate": 0.0018442822384428224, "loss": 0.7452, "step": 613 }, { "epoch": 1.3431774678698387, "grad_norm": 0.10027986764907837, "learning_rate": 0.0018418491484184915, "loss": 0.7935, "step": 614 }, { "epoch": 1.3453650533223955, "grad_norm": 0.028616629540920258, "learning_rate": 0.0018394160583941608, "loss": 0.7798, "step": 615 }, { "epoch": 1.3475526387749521, "grad_norm": 0.02287200279533863, "learning_rate": 0.0018369829683698297, "loss": 0.7231, "step": 616 }, { "epoch": 1.349740224227509, "grad_norm": 0.029162835329771042, "learning_rate": 0.0018345498783454988, "loss": 0.7196, "step": 617 }, { "epoch": 1.3519278096800655, "grad_norm": 0.00748335849493742, "learning_rate": 0.001832116788321168, "loss": 0.6841, "step": 618 }, { "epoch": 1.3541153951326224, "grad_norm": 0.012842601165175438, "learning_rate": 0.001829683698296837, "loss": 0.8114, "step": 619 }, { "epoch": 1.3563029805851792, "grad_norm": 0.01425047405064106, "learning_rate": 0.001827250608272506, "loss": 0.713, "step": 620 }, { "epoch": 1.3584905660377358, "grad_norm": 0.011411231942474842, "learning_rate": 0.0018248175182481753, "loss": 0.8576, "step": 621 }, { "epoch": 1.3606781514902926, "grad_norm": 0.02541513741016388, "learning_rate": 0.0018223844282238442, "loss": 0.7529, "step": 622 }, { "epoch": 1.3628657369428494, "grad_norm": 0.009776429273188114, "learning_rate": 0.0018199513381995133, "loss": 0.6062, "step": 623 }, { "epoch": 1.365053322395406, "grad_norm": 0.01603938452899456, "learning_rate": 0.0018175182481751826, "loss": 1.3558, "step": 624 }, { "epoch": 1.3672409078479628, "grad_norm": 0.01858574151992798, "learning_rate": 0.0018150851581508517, "loss": 0.7067, "step": 625 }, { "epoch": 1.3694284933005196, "grad_norm": 0.014604609459638596, "learning_rate": 0.0018126520681265206, "loss": 0.65, "step": 626 }, { "epoch": 1.3716160787530762, "grad_norm": 0.01383352093398571, "learning_rate": 0.00181021897810219, "loss": 0.724, "step": 627 }, { "epoch": 1.373803664205633, "grad_norm": 0.007166001014411449, "learning_rate": 0.001807785888077859, "loss": 0.7063, "step": 628 }, { "epoch": 1.3759912496581896, "grad_norm": 0.01364620216190815, "learning_rate": 0.0018053527980535279, "loss": 0.942, "step": 629 }, { "epoch": 1.3781788351107465, "grad_norm": 0.013178148306906223, "learning_rate": 0.0018029197080291972, "loss": 0.7134, "step": 630 }, { "epoch": 1.3803664205633033, "grad_norm": 0.016469091176986694, "learning_rate": 0.0018004866180048663, "loss": 0.8652, "step": 631 }, { "epoch": 1.38255400601586, "grad_norm": 0.008818808011710644, "learning_rate": 0.0017980535279805351, "loss": 0.7157, "step": 632 }, { "epoch": 1.3847415914684167, "grad_norm": 0.006165484432131052, "learning_rate": 0.0017956204379562047, "loss": 0.8267, "step": 633 }, { "epoch": 1.3869291769209735, "grad_norm": 0.017317302525043488, "learning_rate": 0.0017931873479318735, "loss": 0.7661, "step": 634 }, { "epoch": 1.38911676237353, "grad_norm": 0.01045684702694416, "learning_rate": 0.0017907542579075426, "loss": 0.797, "step": 635 }, { "epoch": 1.391304347826087, "grad_norm": 0.004696684889495373, "learning_rate": 0.001788321167883212, "loss": 0.793, "step": 636 }, { "epoch": 1.3934919332786437, "grad_norm": 0.01570739410817623, "learning_rate": 0.0017858880778588808, "loss": 0.9052, "step": 637 }, { "epoch": 1.3956795187312006, "grad_norm": 0.006558465771377087, "learning_rate": 0.00178345498783455, "loss": 0.7475, "step": 638 }, { "epoch": 1.3978671041837571, "grad_norm": 0.008167284540832043, "learning_rate": 0.0017810218978102192, "loss": 0.7801, "step": 639 }, { "epoch": 1.400054689636314, "grad_norm": 0.007898733019828796, "learning_rate": 0.001778588807785888, "loss": 0.7694, "step": 640 }, { "epoch": 1.4022422750888706, "grad_norm": 0.011702708899974823, "learning_rate": 0.0017761557177615572, "loss": 0.7104, "step": 641 }, { "epoch": 1.4044298605414274, "grad_norm": 0.01823602244257927, "learning_rate": 0.0017737226277372265, "loss": 0.837, "step": 642 }, { "epoch": 1.4066174459939842, "grad_norm": 0.019088082015514374, "learning_rate": 0.0017712895377128954, "loss": 0.8105, "step": 643 }, { "epoch": 1.408805031446541, "grad_norm": 0.008738362230360508, "learning_rate": 0.0017688564476885645, "loss": 0.9423, "step": 644 }, { "epoch": 1.4109926168990976, "grad_norm": 0.010799618437886238, "learning_rate": 0.0017664233576642336, "loss": 0.7173, "step": 645 }, { "epoch": 1.4131802023516544, "grad_norm": 0.007114489562809467, "learning_rate": 0.0017639902676399029, "loss": 0.7322, "step": 646 }, { "epoch": 1.415367787804211, "grad_norm": 0.021334782242774963, "learning_rate": 0.0017615571776155717, "loss": 0.7808, "step": 647 }, { "epoch": 1.4175553732567678, "grad_norm": 0.06464671343564987, "learning_rate": 0.0017591240875912408, "loss": 0.8948, "step": 648 }, { "epoch": 1.4197429587093247, "grad_norm": 0.016822345554828644, "learning_rate": 0.0017566909975669101, "loss": 0.7481, "step": 649 }, { "epoch": 1.4219305441618812, "grad_norm": 0.01005722675472498, "learning_rate": 0.001754257907542579, "loss": 0.8027, "step": 650 }, { "epoch": 1.424118129614438, "grad_norm": 0.01469690166413784, "learning_rate": 0.0017518248175182481, "loss": 0.7487, "step": 651 }, { "epoch": 1.4263057150669949, "grad_norm": 0.013352830894291401, "learning_rate": 0.0017493917274939174, "loss": 0.9439, "step": 652 }, { "epoch": 1.4284933005195515, "grad_norm": 0.01574932225048542, "learning_rate": 0.0017469586374695863, "loss": 0.7926, "step": 653 }, { "epoch": 1.4306808859721083, "grad_norm": 0.012712597846984863, "learning_rate": 0.0017445255474452554, "loss": 0.8369, "step": 654 }, { "epoch": 1.4328684714246651, "grad_norm": 0.018248263746500015, "learning_rate": 0.0017420924574209247, "loss": 0.6585, "step": 655 }, { "epoch": 1.4350560568772217, "grad_norm": 0.0181551706045866, "learning_rate": 0.0017396593673965938, "loss": 0.8487, "step": 656 }, { "epoch": 1.4372436423297785, "grad_norm": 0.009059487842023373, "learning_rate": 0.0017372262773722627, "loss": 0.8897, "step": 657 }, { "epoch": 1.4394312277823351, "grad_norm": 0.007483980618417263, "learning_rate": 0.001734793187347932, "loss": 0.6673, "step": 658 }, { "epoch": 1.441618813234892, "grad_norm": 0.007589507382363081, "learning_rate": 0.001732360097323601, "loss": 0.7013, "step": 659 }, { "epoch": 1.4438063986874488, "grad_norm": 0.011493782512843609, "learning_rate": 0.00172992700729927, "loss": 0.5457, "step": 660 }, { "epoch": 1.4459939841400056, "grad_norm": 0.027656735852360725, "learning_rate": 0.0017274939172749392, "loss": 0.7251, "step": 661 }, { "epoch": 1.4481815695925622, "grad_norm": 0.022569406777620316, "learning_rate": 0.0017250608272506083, "loss": 0.7104, "step": 662 }, { "epoch": 1.450369155045119, "grad_norm": 0.028735000640153885, "learning_rate": 0.0017226277372262772, "loss": 0.8682, "step": 663 }, { "epoch": 1.4525567404976756, "grad_norm": 0.012052370235323906, "learning_rate": 0.0017201946472019465, "loss": 0.7508, "step": 664 }, { "epoch": 1.4547443259502324, "grad_norm": 0.008707467466592789, "learning_rate": 0.0017177615571776156, "loss": 0.83, "step": 665 }, { "epoch": 1.4569319114027892, "grad_norm": 0.01061397884041071, "learning_rate": 0.0017153284671532847, "loss": 0.9431, "step": 666 }, { "epoch": 1.459119496855346, "grad_norm": 0.011903772130608559, "learning_rate": 0.001712895377128954, "loss": 0.723, "step": 667 }, { "epoch": 1.4613070823079026, "grad_norm": 0.03922785073518753, "learning_rate": 0.001710462287104623, "loss": 0.6581, "step": 668 }, { "epoch": 1.4634946677604594, "grad_norm": 0.014414667151868343, "learning_rate": 0.001708029197080292, "loss": 0.8511, "step": 669 }, { "epoch": 1.465682253213016, "grad_norm": 0.010338617488741875, "learning_rate": 0.0017055961070559613, "loss": 0.7162, "step": 670 }, { "epoch": 1.4678698386655729, "grad_norm": 0.011176107451319695, "learning_rate": 0.0017031630170316302, "loss": 0.8674, "step": 671 }, { "epoch": 1.4700574241181297, "grad_norm": 0.014365148730576038, "learning_rate": 0.0017007299270072993, "loss": 0.7739, "step": 672 }, { "epoch": 1.4722450095706865, "grad_norm": 0.019749363884329796, "learning_rate": 0.0016982968369829686, "loss": 0.7571, "step": 673 }, { "epoch": 1.474432595023243, "grad_norm": 0.011761876754462719, "learning_rate": 0.0016958637469586374, "loss": 0.7208, "step": 674 }, { "epoch": 1.4766201804758, "grad_norm": 0.025715123862028122, "learning_rate": 0.0016934306569343065, "loss": 0.7554, "step": 675 }, { "epoch": 1.4788077659283565, "grad_norm": 0.028069710358977318, "learning_rate": 0.0016909975669099759, "loss": 0.6652, "step": 676 }, { "epoch": 1.4809953513809133, "grad_norm": 0.02627987042069435, "learning_rate": 0.001688564476885645, "loss": 0.7924, "step": 677 }, { "epoch": 1.4831829368334701, "grad_norm": 0.005099075846374035, "learning_rate": 0.0016861313868613138, "loss": 0.75, "step": 678 }, { "epoch": 1.4853705222860267, "grad_norm": 0.007156622130423784, "learning_rate": 0.0016836982968369831, "loss": 0.8034, "step": 679 }, { "epoch": 1.4875581077385835, "grad_norm": 0.008162274025380611, "learning_rate": 0.0016812652068126522, "loss": 0.6174, "step": 680 }, { "epoch": 1.4897456931911401, "grad_norm": 0.01390012539923191, "learning_rate": 0.001678832116788321, "loss": 0.7813, "step": 681 }, { "epoch": 1.491933278643697, "grad_norm": 0.03663848340511322, "learning_rate": 0.0016763990267639902, "loss": 0.6028, "step": 682 }, { "epoch": 1.4941208640962538, "grad_norm": 0.01389587577432394, "learning_rate": 0.0016739659367396595, "loss": 0.9186, "step": 683 }, { "epoch": 1.4963084495488106, "grad_norm": 0.007214284967631102, "learning_rate": 0.0016715328467153284, "loss": 1.0112, "step": 684 }, { "epoch": 1.4984960350013672, "grad_norm": 0.01086746621876955, "learning_rate": 0.0016690997566909975, "loss": 0.7628, "step": 685 }, { "epoch": 1.500683620453924, "grad_norm": 0.006750196684151888, "learning_rate": 0.0016666666666666668, "loss": 0.8025, "step": 686 }, { "epoch": 1.5028712059064806, "grad_norm": 0.012172271497547626, "learning_rate": 0.0016642335766423359, "loss": 0.7559, "step": 687 }, { "epoch": 1.5050587913590374, "grad_norm": 0.03923722356557846, "learning_rate": 0.0016618004866180047, "loss": 0.8227, "step": 688 }, { "epoch": 1.5072463768115942, "grad_norm": 0.020949123427271843, "learning_rate": 0.001659367396593674, "loss": 0.7272, "step": 689 }, { "epoch": 1.509433962264151, "grad_norm": 0.012365633621811867, "learning_rate": 0.0016569343065693431, "loss": 0.9127, "step": 690 }, { "epoch": 1.5116215477167076, "grad_norm": 0.012725708074867725, "learning_rate": 0.001654501216545012, "loss": 0.7576, "step": 691 }, { "epoch": 1.5138091331692645, "grad_norm": 0.014691759832203388, "learning_rate": 0.0016520681265206813, "loss": 0.6951, "step": 692 }, { "epoch": 1.515996718621821, "grad_norm": 0.009719770401716232, "learning_rate": 0.0016496350364963504, "loss": 0.6947, "step": 693 }, { "epoch": 1.5181843040743779, "grad_norm": 0.0074682896956801414, "learning_rate": 0.0016472019464720193, "loss": 0.8467, "step": 694 }, { "epoch": 1.5203718895269347, "grad_norm": 0.011303418315947056, "learning_rate": 0.0016447688564476886, "loss": 0.7453, "step": 695 }, { "epoch": 1.5225594749794915, "grad_norm": 0.009616104885935783, "learning_rate": 0.0016423357664233577, "loss": 0.8284, "step": 696 }, { "epoch": 1.524747060432048, "grad_norm": 0.004562855698168278, "learning_rate": 0.0016399026763990268, "loss": 0.776, "step": 697 }, { "epoch": 1.5269346458846047, "grad_norm": 0.0057913740165531635, "learning_rate": 0.001637469586374696, "loss": 0.5635, "step": 698 }, { "epoch": 1.5291222313371615, "grad_norm": 0.011465840972959995, "learning_rate": 0.001635036496350365, "loss": 0.7466, "step": 699 }, { "epoch": 1.5313098167897183, "grad_norm": 0.009356693364679813, "learning_rate": 0.001632603406326034, "loss": 0.7555, "step": 700 }, { "epoch": 1.5334974022422752, "grad_norm": 0.01132314745336771, "learning_rate": 0.0016301703163017034, "loss": 0.6987, "step": 701 }, { "epoch": 1.535684987694832, "grad_norm": 0.011162355542182922, "learning_rate": 0.0016277372262773723, "loss": 0.7895, "step": 702 }, { "epoch": 1.5378725731473886, "grad_norm": 0.008752882480621338, "learning_rate": 0.0016253041362530413, "loss": 0.7829, "step": 703 }, { "epoch": 1.5400601585999452, "grad_norm": 0.0067902375012636185, "learning_rate": 0.0016228710462287107, "loss": 0.7541, "step": 704 }, { "epoch": 1.542247744052502, "grad_norm": 0.010398069396615028, "learning_rate": 0.0016204379562043795, "loss": 0.84, "step": 705 }, { "epoch": 1.5444353295050588, "grad_norm": 0.006489087361842394, "learning_rate": 0.0016180048661800486, "loss": 0.7745, "step": 706 }, { "epoch": 1.5466229149576156, "grad_norm": 0.00789352972060442, "learning_rate": 0.001615571776155718, "loss": 0.7006, "step": 707 }, { "epoch": 1.5488105004101724, "grad_norm": 0.005906807258725166, "learning_rate": 0.001613138686131387, "loss": 0.826, "step": 708 }, { "epoch": 1.550998085862729, "grad_norm": 0.006026630289852619, "learning_rate": 0.001610705596107056, "loss": 0.6783, "step": 709 }, { "epoch": 1.5531856713152856, "grad_norm": 0.010388746857643127, "learning_rate": 0.0016082725060827252, "loss": 0.8531, "step": 710 }, { "epoch": 1.5553732567678424, "grad_norm": 0.01053705345839262, "learning_rate": 0.0016058394160583943, "loss": 0.7257, "step": 711 }, { "epoch": 1.5575608422203993, "grad_norm": 0.006276300642639399, "learning_rate": 0.0016034063260340632, "loss": 0.7996, "step": 712 }, { "epoch": 1.559748427672956, "grad_norm": 0.006276302970945835, "learning_rate": 0.0016009732360097325, "loss": 0.8443, "step": 713 }, { "epoch": 1.5619360131255127, "grad_norm": 0.008509790524840355, "learning_rate": 0.0015985401459854016, "loss": 0.7289, "step": 714 }, { "epoch": 1.5641235985780695, "grad_norm": 0.01978105679154396, "learning_rate": 0.0015961070559610705, "loss": 0.846, "step": 715 }, { "epoch": 1.566311184030626, "grad_norm": 0.012076129205524921, "learning_rate": 0.0015936739659367398, "loss": 0.7292, "step": 716 }, { "epoch": 1.568498769483183, "grad_norm": 0.01716456562280655, "learning_rate": 0.0015912408759124089, "loss": 0.7655, "step": 717 }, { "epoch": 1.5706863549357397, "grad_norm": 0.016601664945483208, "learning_rate": 0.001588807785888078, "loss": 0.7277, "step": 718 }, { "epoch": 1.5728739403882965, "grad_norm": 0.010958652012050152, "learning_rate": 0.0015863746958637468, "loss": 0.7392, "step": 719 }, { "epoch": 1.5750615258408531, "grad_norm": 0.007287964224815369, "learning_rate": 0.0015839416058394161, "loss": 0.822, "step": 720 }, { "epoch": 1.57724911129341, "grad_norm": 0.010577067732810974, "learning_rate": 0.0015815085158150852, "loss": 0.732, "step": 721 }, { "epoch": 1.5794366967459665, "grad_norm": 0.007742591667920351, "learning_rate": 0.001579075425790754, "loss": 0.8312, "step": 722 }, { "epoch": 1.5816242821985234, "grad_norm": 0.009659879840910435, "learning_rate": 0.0015766423357664234, "loss": 0.8213, "step": 723 }, { "epoch": 1.5838118676510802, "grad_norm": 0.015149835497140884, "learning_rate": 0.0015742092457420925, "loss": 0.6992, "step": 724 }, { "epoch": 1.585999453103637, "grad_norm": 0.007888193242251873, "learning_rate": 0.0015717761557177614, "loss": 0.8853, "step": 725 }, { "epoch": 1.5881870385561936, "grad_norm": 0.011876450851559639, "learning_rate": 0.0015693430656934307, "loss": 0.7645, "step": 726 }, { "epoch": 1.5903746240087502, "grad_norm": 0.015837261453270912, "learning_rate": 0.0015669099756690998, "loss": 0.8061, "step": 727 }, { "epoch": 1.592562209461307, "grad_norm": 0.006944081746041775, "learning_rate": 0.0015644768856447687, "loss": 0.6043, "step": 728 }, { "epoch": 1.5947497949138638, "grad_norm": 0.01456182450056076, "learning_rate": 0.0015620437956204382, "loss": 0.9343, "step": 729 }, { "epoch": 1.5969373803664206, "grad_norm": 0.007655070163309574, "learning_rate": 0.001559610705596107, "loss": 0.727, "step": 730 }, { "epoch": 1.5991249658189775, "grad_norm": 0.014365557581186295, "learning_rate": 0.0015571776155717761, "loss": 0.6884, "step": 731 }, { "epoch": 1.601312551271534, "grad_norm": 0.013196627609431744, "learning_rate": 0.0015547445255474455, "loss": 0.6522, "step": 732 }, { "epoch": 1.6035001367240906, "grad_norm": 0.0069740209728479385, "learning_rate": 0.0015523114355231143, "loss": 0.812, "step": 733 }, { "epoch": 1.6056877221766475, "grad_norm": 0.018947165459394455, "learning_rate": 0.0015498783454987834, "loss": 0.7464, "step": 734 }, { "epoch": 1.6078753076292043, "grad_norm": 0.02975570783019066, "learning_rate": 0.0015474452554744527, "loss": 1.0338, "step": 735 }, { "epoch": 1.610062893081761, "grad_norm": 0.01144670695066452, "learning_rate": 0.0015450121654501216, "loss": 0.9582, "step": 736 }, { "epoch": 1.612250478534318, "grad_norm": 0.08359838277101517, "learning_rate": 0.0015425790754257907, "loss": 0.6188, "step": 737 }, { "epoch": 1.6144380639868745, "grad_norm": 0.005582269746810198, "learning_rate": 0.00154014598540146, "loss": 0.6557, "step": 738 }, { "epoch": 1.616625649439431, "grad_norm": 0.008966202847659588, "learning_rate": 0.001537712895377129, "loss": 0.6564, "step": 739 }, { "epoch": 1.618813234891988, "grad_norm": 0.011794374324381351, "learning_rate": 0.001535279805352798, "loss": 0.8051, "step": 740 }, { "epoch": 1.6210008203445447, "grad_norm": 0.00766439875587821, "learning_rate": 0.0015328467153284673, "loss": 0.8145, "step": 741 }, { "epoch": 1.6231884057971016, "grad_norm": 0.014379739761352539, "learning_rate": 0.0015304136253041364, "loss": 0.8658, "step": 742 }, { "epoch": 1.6253759912496581, "grad_norm": 0.01025471929460764, "learning_rate": 0.0015279805352798053, "loss": 0.6969, "step": 743 }, { "epoch": 1.627563576702215, "grad_norm": 0.012737879529595375, "learning_rate": 0.0015255474452554746, "loss": 0.9006, "step": 744 }, { "epoch": 1.6297511621547716, "grad_norm": 0.0110158147290349, "learning_rate": 0.0015231143552311437, "loss": 0.7326, "step": 745 }, { "epoch": 1.6319387476073284, "grad_norm": 0.011220619082450867, "learning_rate": 0.0015206812652068125, "loss": 0.8275, "step": 746 }, { "epoch": 1.6341263330598852, "grad_norm": 0.00941223930567503, "learning_rate": 0.0015182481751824818, "loss": 0.8187, "step": 747 }, { "epoch": 1.636313918512442, "grad_norm": 0.004144694656133652, "learning_rate": 0.001515815085158151, "loss": 0.7248, "step": 748 }, { "epoch": 1.6385015039649986, "grad_norm": 0.013639383018016815, "learning_rate": 0.0015133819951338198, "loss": 0.7966, "step": 749 }, { "epoch": 1.6406890894175554, "grad_norm": 0.006385320797562599, "learning_rate": 0.0015109489051094893, "loss": 0.6772, "step": 750 }, { "epoch": 1.642876674870112, "grad_norm": 0.011585132218897343, "learning_rate": 0.0015085158150851582, "loss": 0.6696, "step": 751 }, { "epoch": 1.6450642603226688, "grad_norm": 0.023672277107834816, "learning_rate": 0.0015060827250608273, "loss": 0.6978, "step": 752 }, { "epoch": 1.6472518457752257, "grad_norm": 0.014683379791676998, "learning_rate": 0.0015036496350364966, "loss": 0.5735, "step": 753 }, { "epoch": 1.6494394312277825, "grad_norm": 0.010881925001740456, "learning_rate": 0.0015012165450121655, "loss": 0.6773, "step": 754 }, { "epoch": 1.651627016680339, "grad_norm": 0.009006233885884285, "learning_rate": 0.0014987834549878346, "loss": 0.7773, "step": 755 }, { "epoch": 1.6538146021328957, "grad_norm": 0.01426916103810072, "learning_rate": 0.0014963503649635037, "loss": 0.7436, "step": 756 }, { "epoch": 1.6560021875854525, "grad_norm": 0.005649265833199024, "learning_rate": 0.0014939172749391728, "loss": 0.7041, "step": 757 }, { "epoch": 1.6581897730380093, "grad_norm": 0.008767529390752316, "learning_rate": 0.0014914841849148419, "loss": 0.6701, "step": 758 }, { "epoch": 1.6603773584905661, "grad_norm": 0.007580756675451994, "learning_rate": 0.001489051094890511, "loss": 0.6593, "step": 759 }, { "epoch": 1.662564943943123, "grad_norm": 0.010842681862413883, "learning_rate": 0.0014866180048661803, "loss": 0.9414, "step": 760 }, { "epoch": 1.6647525293956795, "grad_norm": 0.008890979923307896, "learning_rate": 0.0014841849148418491, "loss": 0.8333, "step": 761 }, { "epoch": 1.6669401148482361, "grad_norm": 0.00815370213240385, "learning_rate": 0.0014817518248175182, "loss": 0.8596, "step": 762 }, { "epoch": 1.669127700300793, "grad_norm": 0.007434117142111063, "learning_rate": 0.0014793187347931875, "loss": 0.631, "step": 763 }, { "epoch": 1.6713152857533498, "grad_norm": 0.007965626195073128, "learning_rate": 0.0014768856447688564, "loss": 0.7377, "step": 764 }, { "epoch": 1.6735028712059066, "grad_norm": 0.014369670301675797, "learning_rate": 0.0014744525547445257, "loss": 0.6907, "step": 765 }, { "epoch": 1.6756904566584632, "grad_norm": 0.013002739287912846, "learning_rate": 0.0014720194647201946, "loss": 0.8491, "step": 766 }, { "epoch": 1.67787804211102, "grad_norm": 0.008742110803723335, "learning_rate": 0.0014695863746958637, "loss": 1.0319, "step": 767 }, { "epoch": 1.6800656275635766, "grad_norm": 0.01362073328346014, "learning_rate": 0.001467153284671533, "loss": 0.596, "step": 768 }, { "epoch": 1.6822532130161334, "grad_norm": 0.007842877879738808, "learning_rate": 0.0014647201946472019, "loss": 0.848, "step": 769 }, { "epoch": 1.6844407984686902, "grad_norm": 0.007685767021030188, "learning_rate": 0.001462287104622871, "loss": 0.6811, "step": 770 }, { "epoch": 1.686628383921247, "grad_norm": 0.07299596816301346, "learning_rate": 0.0014598540145985403, "loss": 0.7739, "step": 771 }, { "epoch": 1.6888159693738036, "grad_norm": 0.02475287765264511, "learning_rate": 0.0014574209245742091, "loss": 0.8412, "step": 772 }, { "epoch": 1.6910035548263604, "grad_norm": 0.02310485951602459, "learning_rate": 0.0014549878345498785, "loss": 0.7707, "step": 773 }, { "epoch": 1.693191140278917, "grad_norm": 0.006614830810576677, "learning_rate": 0.0014525547445255475, "loss": 0.9116, "step": 774 }, { "epoch": 1.6953787257314739, "grad_norm": 0.017114151269197464, "learning_rate": 0.0014501216545012164, "loss": 0.7767, "step": 775 }, { "epoch": 1.6975663111840307, "grad_norm": 0.007972135208547115, "learning_rate": 0.0014476885644768857, "loss": 0.8053, "step": 776 }, { "epoch": 1.6997538966365875, "grad_norm": 0.013452711515128613, "learning_rate": 0.0014452554744525548, "loss": 0.633, "step": 777 }, { "epoch": 1.701941482089144, "grad_norm": 0.01562053058296442, "learning_rate": 0.001442822384428224, "loss": 0.8312, "step": 778 }, { "epoch": 1.7041290675417007, "grad_norm": 0.006510770879685879, "learning_rate": 0.001440389294403893, "loss": 0.7721, "step": 779 }, { "epoch": 1.7063166529942575, "grad_norm": 0.011892448179423809, "learning_rate": 0.001437956204379562, "loss": 0.6629, "step": 780 }, { "epoch": 1.7085042384468143, "grad_norm": 0.005237538833171129, "learning_rate": 0.0014355231143552312, "loss": 0.5767, "step": 781 }, { "epoch": 1.7106918238993711, "grad_norm": 0.020627424120903015, "learning_rate": 0.0014330900243309003, "loss": 0.8974, "step": 782 }, { "epoch": 1.712879409351928, "grad_norm": 0.012742357328534126, "learning_rate": 0.0014306569343065694, "loss": 0.5843, "step": 783 }, { "epoch": 1.7150669948044845, "grad_norm": 0.011114447377622128, "learning_rate": 0.0014282238442822385, "loss": 0.9336, "step": 784 }, { "epoch": 1.7172545802570411, "grad_norm": 0.01212508138269186, "learning_rate": 0.0014257907542579076, "loss": 0.853, "step": 785 }, { "epoch": 1.719442165709598, "grad_norm": 0.006842518225312233, "learning_rate": 0.0014233576642335767, "loss": 0.8329, "step": 786 }, { "epoch": 1.7216297511621548, "grad_norm": 0.008684076368808746, "learning_rate": 0.0014209245742092457, "loss": 0.8503, "step": 787 }, { "epoch": 1.7238173366147116, "grad_norm": 0.009845465421676636, "learning_rate": 0.0014184914841849148, "loss": 0.9911, "step": 788 }, { "epoch": 1.7260049220672684, "grad_norm": 0.007301978301256895, "learning_rate": 0.001416058394160584, "loss": 0.6684, "step": 789 }, { "epoch": 1.728192507519825, "grad_norm": 0.010263817384839058, "learning_rate": 0.001413625304136253, "loss": 0.6852, "step": 790 }, { "epoch": 1.7303800929723816, "grad_norm": 0.012078475207090378, "learning_rate": 0.0014111922141119221, "loss": 0.6509, "step": 791 }, { "epoch": 1.7325676784249384, "grad_norm": 0.012108572758734226, "learning_rate": 0.0014087591240875912, "loss": 0.7183, "step": 792 }, { "epoch": 1.7347552638774952, "grad_norm": 0.011477826163172722, "learning_rate": 0.0014063260340632603, "loss": 0.8856, "step": 793 }, { "epoch": 1.736942849330052, "grad_norm": 0.007066864520311356, "learning_rate": 0.0014038929440389296, "loss": 0.6114, "step": 794 }, { "epoch": 1.7391304347826086, "grad_norm": 0.011538154445588589, "learning_rate": 0.0014014598540145985, "loss": 0.6716, "step": 795 }, { "epoch": 1.7413180202351655, "grad_norm": 0.008611057884991169, "learning_rate": 0.0013990267639902676, "loss": 0.9979, "step": 796 }, { "epoch": 1.743505605687722, "grad_norm": 0.013740317896008492, "learning_rate": 0.0013965936739659369, "loss": 0.8166, "step": 797 }, { "epoch": 1.7456931911402789, "grad_norm": 0.008636080659925938, "learning_rate": 0.0013941605839416058, "loss": 0.8138, "step": 798 }, { "epoch": 1.7478807765928357, "grad_norm": 0.008637238293886185, "learning_rate": 0.001391727493917275, "loss": 0.9225, "step": 799 }, { "epoch": 1.7500683620453925, "grad_norm": 0.022517461329698563, "learning_rate": 0.0013892944038929442, "loss": 0.735, "step": 800 }, { "epoch": 1.752255947497949, "grad_norm": 0.005302282981574535, "learning_rate": 0.001386861313868613, "loss": 0.657, "step": 801 }, { "epoch": 1.754443532950506, "grad_norm": 0.04943990707397461, "learning_rate": 0.0013844282238442824, "loss": 0.623, "step": 802 }, { "epoch": 1.7566311184030625, "grad_norm": 0.011758695356547832, "learning_rate": 0.0013819951338199512, "loss": 0.8038, "step": 803 }, { "epoch": 1.7588187038556193, "grad_norm": 0.009712104685604572, "learning_rate": 0.0013795620437956205, "loss": 0.7268, "step": 804 }, { "epoch": 1.7610062893081762, "grad_norm": 0.007741864304989576, "learning_rate": 0.0013771289537712896, "loss": 0.7049, "step": 805 }, { "epoch": 1.763193874760733, "grad_norm": 0.010713865980505943, "learning_rate": 0.0013746958637469585, "loss": 0.6425, "step": 806 }, { "epoch": 1.7653814602132896, "grad_norm": 0.006576141808182001, "learning_rate": 0.0013722627737226278, "loss": 0.7601, "step": 807 }, { "epoch": 1.7675690456658462, "grad_norm": 0.007796050515025854, "learning_rate": 0.001369829683698297, "loss": 0.659, "step": 808 }, { "epoch": 1.769756631118403, "grad_norm": 0.01460753008723259, "learning_rate": 0.001367396593673966, "loss": 0.769, "step": 809 }, { "epoch": 1.7719442165709598, "grad_norm": 0.010747969150543213, "learning_rate": 0.001364963503649635, "loss": 0.8531, "step": 810 }, { "epoch": 1.7741318020235166, "grad_norm": 0.011500733904540539, "learning_rate": 0.0013625304136253042, "loss": 0.7294, "step": 811 }, { "epoch": 1.7763193874760734, "grad_norm": 0.013433235697448254, "learning_rate": 0.0013600973236009733, "loss": 0.6442, "step": 812 }, { "epoch": 1.77850697292863, "grad_norm": 0.019317343831062317, "learning_rate": 0.0013576642335766424, "loss": 0.6254, "step": 813 }, { "epoch": 1.7806945583811866, "grad_norm": 0.020062780007719994, "learning_rate": 0.0013552311435523115, "loss": 0.6957, "step": 814 }, { "epoch": 1.7828821438337434, "grad_norm": 0.00756926229223609, "learning_rate": 0.0013527980535279806, "loss": 0.7532, "step": 815 }, { "epoch": 1.7850697292863003, "grad_norm": 0.0089380769059062, "learning_rate": 0.0013503649635036496, "loss": 0.6534, "step": 816 }, { "epoch": 1.787257314738857, "grad_norm": 0.006980338133871555, "learning_rate": 0.0013479318734793187, "loss": 0.7314, "step": 817 }, { "epoch": 1.789444900191414, "grad_norm": 0.0074529629200696945, "learning_rate": 0.0013454987834549878, "loss": 0.8291, "step": 818 }, { "epoch": 1.7916324856439705, "grad_norm": 0.02699979580938816, "learning_rate": 0.001343065693430657, "loss": 0.7249, "step": 819 }, { "epoch": 1.793820071096527, "grad_norm": 0.008204830810427666, "learning_rate": 0.001340632603406326, "loss": 0.7446, "step": 820 }, { "epoch": 1.796007656549084, "grad_norm": 0.006959575694054365, "learning_rate": 0.001338199513381995, "loss": 0.6694, "step": 821 }, { "epoch": 1.7981952420016407, "grad_norm": 0.006019539665430784, "learning_rate": 0.0013357664233576642, "loss": 0.7947, "step": 822 }, { "epoch": 1.8003828274541975, "grad_norm": 0.007515772711485624, "learning_rate": 0.0013333333333333333, "loss": 0.6259, "step": 823 }, { "epoch": 1.8025704129067541, "grad_norm": 0.0231679268181324, "learning_rate": 0.0013309002433090024, "loss": 0.5702, "step": 824 }, { "epoch": 1.804757998359311, "grad_norm": 0.009831500239670277, "learning_rate": 0.0013284671532846717, "loss": 0.7197, "step": 825 }, { "epoch": 1.8069455838118675, "grad_norm": 0.011389415711164474, "learning_rate": 0.0013260340632603406, "loss": 0.8466, "step": 826 }, { "epoch": 1.8091331692644244, "grad_norm": 0.010654733516275883, "learning_rate": 0.0013236009732360097, "loss": 0.7456, "step": 827 }, { "epoch": 1.8113207547169812, "grad_norm": 0.010770871303975582, "learning_rate": 0.001321167883211679, "loss": 0.6827, "step": 828 }, { "epoch": 1.813508340169538, "grad_norm": 0.00828484632074833, "learning_rate": 0.0013187347931873478, "loss": 0.6794, "step": 829 }, { "epoch": 1.8156959256220946, "grad_norm": 0.00973398145288229, "learning_rate": 0.0013163017031630172, "loss": 0.7354, "step": 830 }, { "epoch": 1.8178835110746514, "grad_norm": 0.00983220711350441, "learning_rate": 0.0013138686131386862, "loss": 0.8531, "step": 831 }, { "epoch": 1.820071096527208, "grad_norm": 0.02620159089565277, "learning_rate": 0.0013114355231143551, "loss": 0.7631, "step": 832 }, { "epoch": 1.8222586819797648, "grad_norm": 0.057880647480487823, "learning_rate": 0.0013090024330900244, "loss": 0.9336, "step": 833 }, { "epoch": 1.8244462674323216, "grad_norm": 0.011240589432418346, "learning_rate": 0.0013065693430656935, "loss": 0.5887, "step": 834 }, { "epoch": 1.8266338528848785, "grad_norm": 0.012356660328805447, "learning_rate": 0.0013041362530413626, "loss": 0.702, "step": 835 }, { "epoch": 1.828821438337435, "grad_norm": 0.006840168032795191, "learning_rate": 0.0013017031630170317, "loss": 0.756, "step": 836 }, { "epoch": 1.8310090237899916, "grad_norm": 0.005550102796405554, "learning_rate": 0.0012992700729927008, "loss": 0.7161, "step": 837 }, { "epoch": 1.8331966092425485, "grad_norm": 0.0120685501024127, "learning_rate": 0.0012968369829683699, "loss": 0.9234, "step": 838 }, { "epoch": 1.8353841946951053, "grad_norm": 0.008514792658388615, "learning_rate": 0.001294403892944039, "loss": 0.5988, "step": 839 }, { "epoch": 1.837571780147662, "grad_norm": 0.019344119355082512, "learning_rate": 0.001291970802919708, "loss": 0.8419, "step": 840 }, { "epoch": 1.839759365600219, "grad_norm": 0.01257373858243227, "learning_rate": 0.0012895377128953772, "loss": 0.6785, "step": 841 }, { "epoch": 1.8419469510527755, "grad_norm": 0.022899962961673737, "learning_rate": 0.0012871046228710463, "loss": 0.6617, "step": 842 }, { "epoch": 1.844134536505332, "grad_norm": 0.012275392189621925, "learning_rate": 0.0012846715328467154, "loss": 0.8096, "step": 843 }, { "epoch": 1.846322121957889, "grad_norm": 0.01191315334290266, "learning_rate": 0.0012822384428223844, "loss": 0.7757, "step": 844 }, { "epoch": 1.8485097074104457, "grad_norm": 0.012164206244051456, "learning_rate": 0.0012798053527980535, "loss": 0.7284, "step": 845 }, { "epoch": 1.8506972928630026, "grad_norm": 0.007747825235128403, "learning_rate": 0.0012773722627737226, "loss": 0.673, "step": 846 }, { "epoch": 1.8528848783155591, "grad_norm": 0.01633123680949211, "learning_rate": 0.0012749391727493917, "loss": 0.6006, "step": 847 }, { "epoch": 1.855072463768116, "grad_norm": 0.008600953966379166, "learning_rate": 0.0012725060827250608, "loss": 0.7354, "step": 848 }, { "epoch": 1.8572600492206726, "grad_norm": 0.008487503044307232, "learning_rate": 0.00127007299270073, "loss": 0.689, "step": 849 }, { "epoch": 1.8594476346732294, "grad_norm": 0.01615467295050621, "learning_rate": 0.001267639902676399, "loss": 0.7461, "step": 850 }, { "epoch": 1.8616352201257862, "grad_norm": 0.008541187271475792, "learning_rate": 0.0012652068126520683, "loss": 0.6958, "step": 851 }, { "epoch": 1.863822805578343, "grad_norm": 0.01053849421441555, "learning_rate": 0.0012627737226277372, "loss": 0.6786, "step": 852 }, { "epoch": 1.8660103910308996, "grad_norm": 0.008857163600623608, "learning_rate": 0.0012603406326034063, "loss": 0.6645, "step": 853 }, { "epoch": 1.8681979764834564, "grad_norm": 0.006793574895709753, "learning_rate": 0.0012579075425790756, "loss": 0.6311, "step": 854 }, { "epoch": 1.870385561936013, "grad_norm": 0.01936703361570835, "learning_rate": 0.0012554744525547445, "loss": 0.9318, "step": 855 }, { "epoch": 1.8725731473885698, "grad_norm": 0.009839971549808979, "learning_rate": 0.0012530413625304138, "loss": 0.7309, "step": 856 }, { "epoch": 1.8747607328411267, "grad_norm": 0.010399356484413147, "learning_rate": 0.0012506082725060829, "loss": 0.8351, "step": 857 }, { "epoch": 1.8769483182936835, "grad_norm": 0.014294488355517387, "learning_rate": 0.0012481751824817517, "loss": 0.6187, "step": 858 }, { "epoch": 1.87913590374624, "grad_norm": 0.011614672839641571, "learning_rate": 0.001245742092457421, "loss": 0.9295, "step": 859 }, { "epoch": 1.8813234891987969, "grad_norm": 0.015355818904936314, "learning_rate": 0.00124330900243309, "loss": 0.5266, "step": 860 }, { "epoch": 1.8835110746513535, "grad_norm": 0.011674858629703522, "learning_rate": 0.0012408759124087592, "loss": 0.6467, "step": 861 }, { "epoch": 1.8856986601039103, "grad_norm": 0.013345809653401375, "learning_rate": 0.0012384428223844283, "loss": 0.8166, "step": 862 }, { "epoch": 1.8878862455564671, "grad_norm": 0.009595265612006187, "learning_rate": 0.0012360097323600972, "loss": 0.7704, "step": 863 }, { "epoch": 1.890073831009024, "grad_norm": 0.01896647922694683, "learning_rate": 0.0012335766423357665, "loss": 0.7815, "step": 864 }, { "epoch": 1.8922614164615805, "grad_norm": 0.017639558762311935, "learning_rate": 0.0012311435523114356, "loss": 0.7979, "step": 865 }, { "epoch": 1.8944490019141371, "grad_norm": 0.022902049124240875, "learning_rate": 0.0012287104622871047, "loss": 0.8904, "step": 866 }, { "epoch": 1.896636587366694, "grad_norm": 0.0124649154022336, "learning_rate": 0.0012262773722627738, "loss": 0.7693, "step": 867 }, { "epoch": 1.8988241728192508, "grad_norm": 0.007474742829799652, "learning_rate": 0.0012238442822384429, "loss": 0.6641, "step": 868 }, { "epoch": 1.9010117582718076, "grad_norm": 0.008987569250166416, "learning_rate": 0.001221411192214112, "loss": 0.6378, "step": 869 }, { "epoch": 1.9031993437243644, "grad_norm": 0.009300309233367443, "learning_rate": 0.001218978102189781, "loss": 0.7426, "step": 870 }, { "epoch": 1.905386929176921, "grad_norm": 0.01408142875880003, "learning_rate": 0.0012165450121654502, "loss": 0.7824, "step": 871 }, { "epoch": 1.9075745146294776, "grad_norm": 0.00678917346522212, "learning_rate": 0.0012141119221411192, "loss": 0.7978, "step": 872 }, { "epoch": 1.9097621000820344, "grad_norm": 0.010661943815648556, "learning_rate": 0.0012116788321167883, "loss": 0.6591, "step": 873 }, { "epoch": 1.9119496855345912, "grad_norm": 0.009882554411888123, "learning_rate": 0.0012092457420924574, "loss": 0.7443, "step": 874 }, { "epoch": 1.914137270987148, "grad_norm": 0.12100229412317276, "learning_rate": 0.0012068126520681265, "loss": 0.8035, "step": 875 }, { "epoch": 1.9163248564397046, "grad_norm": 0.01500593964010477, "learning_rate": 0.0012043795620437956, "loss": 0.8671, "step": 876 }, { "epoch": 1.9185124418922614, "grad_norm": 0.01351536437869072, "learning_rate": 0.0012019464720194647, "loss": 0.824, "step": 877 }, { "epoch": 1.920700027344818, "grad_norm": 0.02334493212401867, "learning_rate": 0.0011995133819951338, "loss": 0.7728, "step": 878 }, { "epoch": 1.9228876127973749, "grad_norm": 0.04414600878953934, "learning_rate": 0.001197080291970803, "loss": 0.7811, "step": 879 }, { "epoch": 1.9250751982499317, "grad_norm": 0.03064621239900589, "learning_rate": 0.001194647201946472, "loss": 0.8812, "step": 880 }, { "epoch": 1.9272627837024885, "grad_norm": 0.010438323952257633, "learning_rate": 0.001192214111922141, "loss": 0.8027, "step": 881 }, { "epoch": 1.929450369155045, "grad_norm": 0.016364533454179764, "learning_rate": 0.0011897810218978104, "loss": 0.6239, "step": 882 }, { "epoch": 1.931637954607602, "grad_norm": 0.02069861628115177, "learning_rate": 0.0011873479318734793, "loss": 0.8137, "step": 883 }, { "epoch": 1.9338255400601585, "grad_norm": 0.017191501334309578, "learning_rate": 0.0011849148418491484, "loss": 0.8052, "step": 884 }, { "epoch": 1.9360131255127153, "grad_norm": 0.014077574014663696, "learning_rate": 0.0011824817518248177, "loss": 0.8584, "step": 885 }, { "epoch": 1.9382007109652721, "grad_norm": 0.009209788404405117, "learning_rate": 0.0011800486618004865, "loss": 0.6426, "step": 886 }, { "epoch": 1.940388296417829, "grad_norm": 0.026021014899015427, "learning_rate": 0.0011776155717761558, "loss": 0.7457, "step": 887 }, { "epoch": 1.9425758818703855, "grad_norm": 0.024019265547394753, "learning_rate": 0.001175182481751825, "loss": 0.869, "step": 888 }, { "epoch": 1.9447634673229421, "grad_norm": 0.020230406895279884, "learning_rate": 0.0011727493917274938, "loss": 0.8532, "step": 889 }, { "epoch": 1.946951052775499, "grad_norm": 0.018076736479997635, "learning_rate": 0.0011703163017031631, "loss": 0.7276, "step": 890 }, { "epoch": 1.9491386382280558, "grad_norm": 0.019679049029946327, "learning_rate": 0.0011678832116788322, "loss": 0.7214, "step": 891 }, { "epoch": 1.9513262236806126, "grad_norm": 0.010772393085062504, "learning_rate": 0.0011654501216545013, "loss": 0.6786, "step": 892 }, { "epoch": 1.9535138091331694, "grad_norm": 0.010874917730689049, "learning_rate": 0.0011630170316301704, "loss": 0.7272, "step": 893 }, { "epoch": 1.955701394585726, "grad_norm": 0.00815314520150423, "learning_rate": 0.0011605839416058395, "loss": 0.8908, "step": 894 }, { "epoch": 1.9578889800382826, "grad_norm": 0.008539310656487942, "learning_rate": 0.0011581508515815086, "loss": 0.6394, "step": 895 }, { "epoch": 1.9600765654908394, "grad_norm": 0.039017412811517715, "learning_rate": 0.0011557177615571777, "loss": 0.6505, "step": 896 }, { "epoch": 1.9622641509433962, "grad_norm": 0.009175320155918598, "learning_rate": 0.0011532846715328468, "loss": 0.975, "step": 897 }, { "epoch": 1.964451736395953, "grad_norm": 0.014542749151587486, "learning_rate": 0.0011508515815085159, "loss": 0.7222, "step": 898 }, { "epoch": 1.9666393218485099, "grad_norm": 0.01856316812336445, "learning_rate": 0.001148418491484185, "loss": 0.7575, "step": 899 }, { "epoch": 1.9688269073010665, "grad_norm": 0.007601718418300152, "learning_rate": 0.001145985401459854, "loss": 0.7233, "step": 900 }, { "epoch": 1.971014492753623, "grad_norm": 0.034239862114191055, "learning_rate": 0.0011435523114355231, "loss": 0.6989, "step": 901 }, { "epoch": 1.9732020782061799, "grad_norm": 0.00851233210414648, "learning_rate": 0.0011411192214111922, "loss": 0.8321, "step": 902 }, { "epoch": 1.9753896636587367, "grad_norm": 0.009412054903805256, "learning_rate": 0.0011386861313868613, "loss": 0.7139, "step": 903 }, { "epoch": 1.9775772491112935, "grad_norm": 0.012049161829054356, "learning_rate": 0.0011362530413625304, "loss": 0.6989, "step": 904 }, { "epoch": 1.97976483456385, "grad_norm": 0.010931652970612049, "learning_rate": 0.0011338199513381995, "loss": 0.8747, "step": 905 }, { "epoch": 1.981952420016407, "grad_norm": 0.015494965016841888, "learning_rate": 0.0011313868613138686, "loss": 0.8644, "step": 906 }, { "epoch": 1.9841400054689635, "grad_norm": 0.012480970472097397, "learning_rate": 0.0011289537712895377, "loss": 0.907, "step": 907 }, { "epoch": 1.9863275909215203, "grad_norm": 0.01492912694811821, "learning_rate": 0.001126520681265207, "loss": 0.7421, "step": 908 }, { "epoch": 1.9885151763740772, "grad_norm": 0.012027468532323837, "learning_rate": 0.0011240875912408759, "loss": 0.9274, "step": 909 }, { "epoch": 1.990702761826634, "grad_norm": 0.014835814945399761, "learning_rate": 0.001121654501216545, "loss": 0.8337, "step": 910 }, { "epoch": 1.9928903472791906, "grad_norm": 0.008667545393109322, "learning_rate": 0.0011192214111922143, "loss": 0.6117, "step": 911 }, { "epoch": 1.9950779327317474, "grad_norm": 0.01624200865626335, "learning_rate": 0.0011167883211678832, "loss": 0.8712, "step": 912 }, { "epoch": 1.997265518184304, "grad_norm": 0.008188914507627487, "learning_rate": 0.0011143552311435525, "loss": 0.8495, "step": 913 }, { "epoch": 1.9994531036368608, "grad_norm": 0.013718970119953156, "learning_rate": 0.0011119221411192213, "loss": 0.8417, "step": 914 }, { "epoch": 2.0016406890894176, "grad_norm": 0.00691073015332222, "learning_rate": 0.0011094890510948904, "loss": 0.7033, "step": 915 }, { "epoch": 2.0038282745419744, "grad_norm": 0.017321942374110222, "learning_rate": 0.0011070559610705597, "loss": 0.7979, "step": 916 }, { "epoch": 2.0060158599945312, "grad_norm": 0.007781198713928461, "learning_rate": 0.0011046228710462286, "loss": 0.6795, "step": 917 }, { "epoch": 2.0082034454470876, "grad_norm": 0.007755633443593979, "learning_rate": 0.001102189781021898, "loss": 0.6363, "step": 918 }, { "epoch": 2.0103910308996444, "grad_norm": 0.015355097129940987, "learning_rate": 0.001099756690997567, "loss": 0.7684, "step": 919 }, { "epoch": 2.0125786163522013, "grad_norm": 0.009972341358661652, "learning_rate": 0.001097323600973236, "loss": 0.7659, "step": 920 }, { "epoch": 2.014766201804758, "grad_norm": 0.00998846534639597, "learning_rate": 0.0010948905109489052, "loss": 0.918, "step": 921 }, { "epoch": 2.016953787257315, "grad_norm": 0.007050537038594484, "learning_rate": 0.0010924574209245743, "loss": 0.7083, "step": 922 }, { "epoch": 2.0191413727098713, "grad_norm": 0.008426625281572342, "learning_rate": 0.0010900243309002432, "loss": 0.7962, "step": 923 }, { "epoch": 2.021328958162428, "grad_norm": 0.009424027986824512, "learning_rate": 0.0010875912408759125, "loss": 0.7369, "step": 924 }, { "epoch": 2.023516543614985, "grad_norm": 0.012517026625573635, "learning_rate": 0.0010851581508515816, "loss": 0.8281, "step": 925 }, { "epoch": 2.0257041290675417, "grad_norm": 0.016427017748355865, "learning_rate": 0.0010827250608272507, "loss": 0.7808, "step": 926 }, { "epoch": 2.0278917145200985, "grad_norm": 0.011162400245666504, "learning_rate": 0.0010802919708029198, "loss": 0.8512, "step": 927 }, { "epoch": 2.0300792999726553, "grad_norm": 0.025822371244430542, "learning_rate": 0.0010778588807785888, "loss": 0.6347, "step": 928 }, { "epoch": 2.0322668854252117, "grad_norm": 0.008243129588663578, "learning_rate": 0.001075425790754258, "loss": 0.7126, "step": 929 }, { "epoch": 2.0344544708777685, "grad_norm": 0.01245404314249754, "learning_rate": 0.001072992700729927, "loss": 0.6111, "step": 930 }, { "epoch": 2.0366420563303254, "grad_norm": 0.006443020887672901, "learning_rate": 0.0010705596107055961, "loss": 0.6287, "step": 931 }, { "epoch": 2.038829641782882, "grad_norm": 0.01358412578701973, "learning_rate": 0.0010681265206812652, "loss": 1.0563, "step": 932 }, { "epoch": 2.041017227235439, "grad_norm": 0.010836120694875717, "learning_rate": 0.0010656934306569343, "loss": 0.7046, "step": 933 }, { "epoch": 2.043204812687996, "grad_norm": 0.012488581240177155, "learning_rate": 0.0010632603406326034, "loss": 0.8661, "step": 934 }, { "epoch": 2.045392398140552, "grad_norm": 0.009522946551442146, "learning_rate": 0.0010608272506082725, "loss": 0.6687, "step": 935 }, { "epoch": 2.047579983593109, "grad_norm": 0.03695467486977577, "learning_rate": 0.0010583941605839416, "loss": 0.7727, "step": 936 }, { "epoch": 2.049767569045666, "grad_norm": 0.04616512730717659, "learning_rate": 0.0010559610705596107, "loss": 0.7193, "step": 937 }, { "epoch": 2.0519551544982226, "grad_norm": 0.010503578931093216, "learning_rate": 0.0010535279805352798, "loss": 0.6701, "step": 938 }, { "epoch": 2.0541427399507795, "grad_norm": 0.008623762056231499, "learning_rate": 0.001051094890510949, "loss": 0.7161, "step": 939 }, { "epoch": 2.0563303254033363, "grad_norm": 0.007583661004900932, "learning_rate": 0.001048661800486618, "loss": 0.7402, "step": 940 }, { "epoch": 2.0585179108558926, "grad_norm": 0.008966002613306046, "learning_rate": 0.001046228710462287, "loss": 0.7016, "step": 941 }, { "epoch": 2.0607054963084495, "grad_norm": 0.0104443971067667, "learning_rate": 0.0010437956204379564, "loss": 0.7877, "step": 942 }, { "epoch": 2.0628930817610063, "grad_norm": 0.011073727160692215, "learning_rate": 0.0010413625304136252, "loss": 0.8216, "step": 943 }, { "epoch": 2.065080667213563, "grad_norm": 0.006104661151766777, "learning_rate": 0.0010389294403892943, "loss": 0.7218, "step": 944 }, { "epoch": 2.06726825266612, "grad_norm": 0.006152690388262272, "learning_rate": 0.0010364963503649636, "loss": 0.6807, "step": 945 }, { "epoch": 2.0694558381186763, "grad_norm": 0.01146136224269867, "learning_rate": 0.0010340632603406325, "loss": 0.8706, "step": 946 }, { "epoch": 2.071643423571233, "grad_norm": 0.008924251422286034, "learning_rate": 0.0010316301703163018, "loss": 0.7596, "step": 947 }, { "epoch": 2.07383100902379, "grad_norm": 0.01587800122797489, "learning_rate": 0.001029197080291971, "loss": 0.8315, "step": 948 }, { "epoch": 2.0760185944763467, "grad_norm": 0.007868033833801746, "learning_rate": 0.0010267639902676398, "loss": 0.8498, "step": 949 }, { "epoch": 2.0782061799289036, "grad_norm": 0.009655119851231575, "learning_rate": 0.001024330900243309, "loss": 0.909, "step": 950 }, { "epoch": 2.0803937653814604, "grad_norm": 0.014302834868431091, "learning_rate": 0.001021897810218978, "loss": 0.8934, "step": 951 }, { "epoch": 2.0825813508340167, "grad_norm": 0.008887048810720444, "learning_rate": 0.0010194647201946473, "loss": 0.62, "step": 952 }, { "epoch": 2.0847689362865736, "grad_norm": 0.016339240595698357, "learning_rate": 0.0010170316301703164, "loss": 0.7503, "step": 953 }, { "epoch": 2.0869565217391304, "grad_norm": 0.013042870908975601, "learning_rate": 0.0010145985401459853, "loss": 0.7425, "step": 954 }, { "epoch": 2.089144107191687, "grad_norm": 0.009357294999063015, "learning_rate": 0.0010121654501216546, "loss": 0.7565, "step": 955 }, { "epoch": 2.091331692644244, "grad_norm": 0.008100231178104877, "learning_rate": 0.0010097323600973237, "loss": 0.659, "step": 956 }, { "epoch": 2.093519278096801, "grad_norm": 0.008745480328798294, "learning_rate": 0.0010072992700729927, "loss": 0.6722, "step": 957 }, { "epoch": 2.095706863549357, "grad_norm": 0.02181909792125225, "learning_rate": 0.0010048661800486618, "loss": 0.7497, "step": 958 }, { "epoch": 2.097894449001914, "grad_norm": 0.005593753885477781, "learning_rate": 0.001002433090024331, "loss": 0.6413, "step": 959 }, { "epoch": 2.100082034454471, "grad_norm": 0.0110318623483181, "learning_rate": 0.001, "loss": 0.7437, "step": 960 }, { "epoch": 2.1022696199070277, "grad_norm": 0.07487611472606659, "learning_rate": 0.0009975669099756691, "loss": 0.8967, "step": 961 }, { "epoch": 2.1044572053595845, "grad_norm": 0.011572844348847866, "learning_rate": 0.0009951338199513382, "loss": 0.7016, "step": 962 }, { "epoch": 2.1066447908121413, "grad_norm": 0.0219709649682045, "learning_rate": 0.0009927007299270073, "loss": 0.7582, "step": 963 }, { "epoch": 2.1088323762646977, "grad_norm": 0.014250703155994415, "learning_rate": 0.0009902676399026764, "loss": 0.6485, "step": 964 }, { "epoch": 2.1110199617172545, "grad_norm": 0.010836089961230755, "learning_rate": 0.0009878345498783455, "loss": 0.7457, "step": 965 }, { "epoch": 2.1132075471698113, "grad_norm": 0.010538347065448761, "learning_rate": 0.0009854014598540146, "loss": 0.7283, "step": 966 }, { "epoch": 2.115395132622368, "grad_norm": 0.011399851180613041, "learning_rate": 0.0009829683698296837, "loss": 0.6896, "step": 967 }, { "epoch": 2.117582718074925, "grad_norm": 0.027435095980763435, "learning_rate": 0.000980535279805353, "loss": 0.9376, "step": 968 }, { "epoch": 2.1197703035274817, "grad_norm": 0.00705757224932313, "learning_rate": 0.0009781021897810219, "loss": 0.7243, "step": 969 }, { "epoch": 2.121957888980038, "grad_norm": 0.0098995016887784, "learning_rate": 0.000975669099756691, "loss": 0.7931, "step": 970 }, { "epoch": 2.124145474432595, "grad_norm": 0.011125714518129826, "learning_rate": 0.00097323600973236, "loss": 0.6044, "step": 971 }, { "epoch": 2.1263330598851518, "grad_norm": 0.009387229569256306, "learning_rate": 0.0009708029197080292, "loss": 0.7187, "step": 972 }, { "epoch": 2.1285206453377086, "grad_norm": 0.01129234954714775, "learning_rate": 0.0009683698296836983, "loss": 0.8324, "step": 973 }, { "epoch": 2.1307082307902654, "grad_norm": 0.011272157542407513, "learning_rate": 0.0009659367396593673, "loss": 0.7128, "step": 974 }, { "epoch": 2.132895816242822, "grad_norm": 0.010409243404865265, "learning_rate": 0.0009635036496350365, "loss": 0.7535, "step": 975 }, { "epoch": 2.1350834016953786, "grad_norm": 0.00857408158481121, "learning_rate": 0.0009610705596107057, "loss": 0.8129, "step": 976 }, { "epoch": 2.1372709871479354, "grad_norm": 0.014548208564519882, "learning_rate": 0.0009586374695863747, "loss": 0.676, "step": 977 }, { "epoch": 2.139458572600492, "grad_norm": 0.016449380666017532, "learning_rate": 0.0009562043795620438, "loss": 0.7384, "step": 978 }, { "epoch": 2.141646158053049, "grad_norm": 0.007109857629984617, "learning_rate": 0.000953771289537713, "loss": 0.6808, "step": 979 }, { "epoch": 2.143833743505606, "grad_norm": 0.009979904629290104, "learning_rate": 0.000951338199513382, "loss": 0.6907, "step": 980 }, { "epoch": 2.146021328958162, "grad_norm": 0.008424636907875538, "learning_rate": 0.0009489051094890511, "loss": 0.7423, "step": 981 }, { "epoch": 2.148208914410719, "grad_norm": 0.01054910384118557, "learning_rate": 0.0009464720194647203, "loss": 0.6611, "step": 982 }, { "epoch": 2.150396499863276, "grad_norm": 0.0084614809602499, "learning_rate": 0.0009440389294403893, "loss": 0.7548, "step": 983 }, { "epoch": 2.1525840853158327, "grad_norm": 0.008796039037406445, "learning_rate": 0.0009416058394160585, "loss": 0.9042, "step": 984 }, { "epoch": 2.1547716707683895, "grad_norm": 0.011639994569122791, "learning_rate": 0.0009391727493917275, "loss": 0.6474, "step": 985 }, { "epoch": 2.1569592562209463, "grad_norm": 0.011916186660528183, "learning_rate": 0.0009367396593673965, "loss": 0.7848, "step": 986 }, { "epoch": 2.1591468416735027, "grad_norm": 0.01620625890791416, "learning_rate": 0.0009343065693430657, "loss": 0.7924, "step": 987 }, { "epoch": 2.1613344271260595, "grad_norm": 0.008310189470648766, "learning_rate": 0.0009318734793187349, "loss": 0.8015, "step": 988 }, { "epoch": 2.1635220125786163, "grad_norm": 0.008162159472703934, "learning_rate": 0.0009294403892944039, "loss": 0.8261, "step": 989 }, { "epoch": 2.165709598031173, "grad_norm": 0.009289762936532497, "learning_rate": 0.000927007299270073, "loss": 0.8676, "step": 990 }, { "epoch": 2.16789718348373, "grad_norm": 0.007392804138362408, "learning_rate": 0.000924574209245742, "loss": 0.6025, "step": 991 }, { "epoch": 2.1700847689362868, "grad_norm": 0.008378117345273495, "learning_rate": 0.0009221411192214112, "loss": 0.5951, "step": 992 }, { "epoch": 2.172272354388843, "grad_norm": 0.037044674158096313, "learning_rate": 0.0009197080291970804, "loss": 0.7454, "step": 993 }, { "epoch": 2.1744599398414, "grad_norm": 0.01427681464701891, "learning_rate": 0.0009172749391727494, "loss": 0.5663, "step": 994 }, { "epoch": 2.1766475252939568, "grad_norm": 0.010998294688761234, "learning_rate": 0.0009148418491484185, "loss": 0.9058, "step": 995 }, { "epoch": 2.1788351107465136, "grad_norm": 0.007977189496159554, "learning_rate": 0.0009124087591240877, "loss": 0.664, "step": 996 }, { "epoch": 2.1810226961990704, "grad_norm": 0.008938194252550602, "learning_rate": 0.0009099756690997567, "loss": 0.7787, "step": 997 }, { "epoch": 2.1832102816516272, "grad_norm": 0.014179794117808342, "learning_rate": 0.0009075425790754259, "loss": 0.6453, "step": 998 }, { "epoch": 2.1853978671041836, "grad_norm": 0.01838630810379982, "learning_rate": 0.000905109489051095, "loss": 0.7138, "step": 999 }, { "epoch": 2.1875854525567404, "grad_norm": 0.027501361444592476, "learning_rate": 0.0009026763990267639, "loss": 0.7204, "step": 1000 }, { "epoch": 2.1897730380092972, "grad_norm": 0.007381811738014221, "learning_rate": 0.0009002433090024331, "loss": 0.8955, "step": 1001 }, { "epoch": 2.191960623461854, "grad_norm": 0.07506415992975235, "learning_rate": 0.0008978102189781023, "loss": 0.802, "step": 1002 }, { "epoch": 2.194148208914411, "grad_norm": 0.028858385980129242, "learning_rate": 0.0008953771289537713, "loss": 0.7682, "step": 1003 }, { "epoch": 2.1963357943669672, "grad_norm": 0.013214879669249058, "learning_rate": 0.0008929440389294404, "loss": 0.7162, "step": 1004 }, { "epoch": 2.198523379819524, "grad_norm": 0.007629261817783117, "learning_rate": 0.0008905109489051096, "loss": 0.7283, "step": 1005 }, { "epoch": 2.200710965272081, "grad_norm": 0.007726036943495274, "learning_rate": 0.0008880778588807786, "loss": 0.8558, "step": 1006 }, { "epoch": 2.2028985507246377, "grad_norm": 0.008436914533376694, "learning_rate": 0.0008856447688564477, "loss": 0.7377, "step": 1007 }, { "epoch": 2.2050861361771945, "grad_norm": 0.02465754747390747, "learning_rate": 0.0008832116788321168, "loss": 0.5909, "step": 1008 }, { "epoch": 2.2072737216297513, "grad_norm": 0.007964403368532658, "learning_rate": 0.0008807785888077859, "loss": 0.9931, "step": 1009 }, { "epoch": 2.2094613070823077, "grad_norm": 0.008428809233009815, "learning_rate": 0.0008783454987834551, "loss": 0.8308, "step": 1010 }, { "epoch": 2.2116488925348645, "grad_norm": 0.005988140590488911, "learning_rate": 0.0008759124087591241, "loss": 0.6528, "step": 1011 }, { "epoch": 2.2138364779874213, "grad_norm": 0.009502807632088661, "learning_rate": 0.0008734793187347931, "loss": 0.7241, "step": 1012 }, { "epoch": 2.216024063439978, "grad_norm": 0.01181811187416315, "learning_rate": 0.0008710462287104623, "loss": 0.5897, "step": 1013 }, { "epoch": 2.218211648892535, "grad_norm": 0.013522054068744183, "learning_rate": 0.0008686131386861313, "loss": 0.7664, "step": 1014 }, { "epoch": 2.220399234345092, "grad_norm": 0.008381453342735767, "learning_rate": 0.0008661800486618005, "loss": 0.7758, "step": 1015 }, { "epoch": 2.222586819797648, "grad_norm": 0.011634815484285355, "learning_rate": 0.0008637469586374696, "loss": 0.7362, "step": 1016 }, { "epoch": 2.224774405250205, "grad_norm": 0.008570423349738121, "learning_rate": 0.0008613138686131386, "loss": 0.8869, "step": 1017 }, { "epoch": 2.226961990702762, "grad_norm": 0.01613277941942215, "learning_rate": 0.0008588807785888078, "loss": 0.8074, "step": 1018 }, { "epoch": 2.2291495761553186, "grad_norm": 0.0062742773443460464, "learning_rate": 0.000856447688564477, "loss": 0.7695, "step": 1019 }, { "epoch": 2.2313371616078754, "grad_norm": 0.011958430521190166, "learning_rate": 0.000854014598540146, "loss": 0.9689, "step": 1020 }, { "epoch": 2.2335247470604322, "grad_norm": 0.010232674889266491, "learning_rate": 0.0008515815085158151, "loss": 0.7289, "step": 1021 }, { "epoch": 2.2357123325129886, "grad_norm": 0.010546423494815826, "learning_rate": 0.0008491484184914843, "loss": 0.7882, "step": 1022 }, { "epoch": 2.2378999179655454, "grad_norm": 0.006704252678900957, "learning_rate": 0.0008467153284671533, "loss": 0.7245, "step": 1023 }, { "epoch": 2.2400875034181023, "grad_norm": 0.00856088288128376, "learning_rate": 0.0008442822384428225, "loss": 0.8478, "step": 1024 }, { "epoch": 2.242275088870659, "grad_norm": 0.011011838912963867, "learning_rate": 0.0008418491484184916, "loss": 0.8878, "step": 1025 }, { "epoch": 2.244462674323216, "grad_norm": 0.008859807625412941, "learning_rate": 0.0008394160583941605, "loss": 1.0637, "step": 1026 }, { "epoch": 2.2466502597757723, "grad_norm": 0.019353823736310005, "learning_rate": 0.0008369829683698297, "loss": 0.6664, "step": 1027 }, { "epoch": 2.248837845228329, "grad_norm": 0.007266916800290346, "learning_rate": 0.0008345498783454987, "loss": 0.7924, "step": 1028 }, { "epoch": 2.251025430680886, "grad_norm": 0.00936873722821474, "learning_rate": 0.0008321167883211679, "loss": 0.7045, "step": 1029 }, { "epoch": 2.2532130161334427, "grad_norm": 0.007908246479928493, "learning_rate": 0.000829683698296837, "loss": 0.9256, "step": 1030 }, { "epoch": 2.2554006015859995, "grad_norm": 0.024966659024357796, "learning_rate": 0.000827250608272506, "loss": 0.7243, "step": 1031 }, { "epoch": 2.2575881870385563, "grad_norm": 0.009444604627788067, "learning_rate": 0.0008248175182481752, "loss": 0.7369, "step": 1032 }, { "epoch": 2.259775772491113, "grad_norm": 0.009447803720831871, "learning_rate": 0.0008223844282238443, "loss": 0.7721, "step": 1033 }, { "epoch": 2.2619633579436695, "grad_norm": 0.008546645753085613, "learning_rate": 0.0008199513381995134, "loss": 0.8094, "step": 1034 }, { "epoch": 2.2641509433962264, "grad_norm": 0.006809299346059561, "learning_rate": 0.0008175182481751825, "loss": 0.7907, "step": 1035 }, { "epoch": 2.266338528848783, "grad_norm": 0.013527573086321354, "learning_rate": 0.0008150851581508517, "loss": 0.6692, "step": 1036 }, { "epoch": 2.26852611430134, "grad_norm": 0.007041016593575478, "learning_rate": 0.0008126520681265207, "loss": 0.7474, "step": 1037 }, { "epoch": 2.270713699753897, "grad_norm": 0.006707175634801388, "learning_rate": 0.0008102189781021898, "loss": 0.8134, "step": 1038 }, { "epoch": 2.272901285206453, "grad_norm": 0.030407702550292015, "learning_rate": 0.000807785888077859, "loss": 0.7734, "step": 1039 }, { "epoch": 2.27508887065901, "grad_norm": 0.011364832520484924, "learning_rate": 0.000805352798053528, "loss": 0.6188, "step": 1040 }, { "epoch": 2.277276456111567, "grad_norm": 0.009676680900156498, "learning_rate": 0.0008029197080291971, "loss": 0.8262, "step": 1041 }, { "epoch": 2.2794640415641236, "grad_norm": 0.012146366760134697, "learning_rate": 0.0008004866180048662, "loss": 0.7543, "step": 1042 }, { "epoch": 2.2816516270166805, "grad_norm": 0.021344035863876343, "learning_rate": 0.0007980535279805352, "loss": 0.8434, "step": 1043 }, { "epoch": 2.2838392124692373, "grad_norm": 0.019379200413823128, "learning_rate": 0.0007956204379562044, "loss": 0.6678, "step": 1044 }, { "epoch": 2.2860267979217936, "grad_norm": 0.012972463853657246, "learning_rate": 0.0007931873479318734, "loss": 0.7363, "step": 1045 }, { "epoch": 2.2882143833743505, "grad_norm": 0.005540755111724138, "learning_rate": 0.0007907542579075426, "loss": 0.7702, "step": 1046 }, { "epoch": 2.2904019688269073, "grad_norm": 0.01054232195019722, "learning_rate": 0.0007883211678832117, "loss": 0.8086, "step": 1047 }, { "epoch": 2.292589554279464, "grad_norm": 0.006333992816507816, "learning_rate": 0.0007858880778588807, "loss": 0.8547, "step": 1048 }, { "epoch": 2.294777139732021, "grad_norm": 0.007503498811274767, "learning_rate": 0.0007834549878345499, "loss": 0.9384, "step": 1049 }, { "epoch": 2.2969647251845773, "grad_norm": 0.009519786573946476, "learning_rate": 0.0007810218978102191, "loss": 0.7457, "step": 1050 }, { "epoch": 2.299152310637134, "grad_norm": 0.009697610512375832, "learning_rate": 0.0007785888077858881, "loss": 0.6572, "step": 1051 }, { "epoch": 2.301339896089691, "grad_norm": 0.01142230723053217, "learning_rate": 0.0007761557177615572, "loss": 0.7003, "step": 1052 }, { "epoch": 2.3035274815422477, "grad_norm": 0.014880196191370487, "learning_rate": 0.0007737226277372264, "loss": 0.9522, "step": 1053 }, { "epoch": 2.3057150669948046, "grad_norm": 0.03530775010585785, "learning_rate": 0.0007712895377128953, "loss": 0.8303, "step": 1054 }, { "epoch": 2.3079026524473614, "grad_norm": 0.008375970646739006, "learning_rate": 0.0007688564476885646, "loss": 0.9399, "step": 1055 }, { "epoch": 2.310090237899918, "grad_norm": 0.011312820017337799, "learning_rate": 0.0007664233576642336, "loss": 0.6918, "step": 1056 }, { "epoch": 2.3122778233524746, "grad_norm": 0.00965717900544405, "learning_rate": 0.0007639902676399026, "loss": 0.6898, "step": 1057 }, { "epoch": 2.3144654088050314, "grad_norm": 0.046056658029556274, "learning_rate": 0.0007615571776155718, "loss": 0.7655, "step": 1058 }, { "epoch": 2.316652994257588, "grad_norm": 0.006473752204328775, "learning_rate": 0.0007591240875912409, "loss": 0.7825, "step": 1059 }, { "epoch": 2.318840579710145, "grad_norm": 0.012731518596410751, "learning_rate": 0.0007566909975669099, "loss": 0.7138, "step": 1060 }, { "epoch": 2.321028165162702, "grad_norm": 0.01815684884786606, "learning_rate": 0.0007542579075425791, "loss": 0.7992, "step": 1061 }, { "epoch": 2.323215750615258, "grad_norm": 0.012457008473575115, "learning_rate": 0.0007518248175182483, "loss": 0.7565, "step": 1062 }, { "epoch": 2.325403336067815, "grad_norm": 0.011130121536552906, "learning_rate": 0.0007493917274939173, "loss": 0.6585, "step": 1063 }, { "epoch": 2.327590921520372, "grad_norm": 0.009390764869749546, "learning_rate": 0.0007469586374695864, "loss": 0.5921, "step": 1064 }, { "epoch": 2.3297785069729287, "grad_norm": 0.006265114061534405, "learning_rate": 0.0007445255474452555, "loss": 0.862, "step": 1065 }, { "epoch": 2.3319660924254855, "grad_norm": 0.014493511989712715, "learning_rate": 0.0007420924574209246, "loss": 0.6529, "step": 1066 }, { "epoch": 2.3341536778780423, "grad_norm": 0.01009755115956068, "learning_rate": 0.0007396593673965938, "loss": 1.0077, "step": 1067 }, { "epoch": 2.3363412633305987, "grad_norm": 0.022166702896356583, "learning_rate": 0.0007372262773722629, "loss": 0.9121, "step": 1068 }, { "epoch": 2.3385288487831555, "grad_norm": 0.028010999783873558, "learning_rate": 0.0007347931873479318, "loss": 0.6663, "step": 1069 }, { "epoch": 2.3407164342357123, "grad_norm": 0.012431381270289421, "learning_rate": 0.0007323600973236009, "loss": 0.7579, "step": 1070 }, { "epoch": 2.342904019688269, "grad_norm": 0.0932813212275505, "learning_rate": 0.0007299270072992701, "loss": 0.5542, "step": 1071 }, { "epoch": 2.345091605140826, "grad_norm": 0.011022589169442654, "learning_rate": 0.0007274939172749392, "loss": 0.7093, "step": 1072 }, { "epoch": 2.3472791905933827, "grad_norm": 0.008994583040475845, "learning_rate": 0.0007250608272506082, "loss": 0.7466, "step": 1073 }, { "epoch": 2.349466776045939, "grad_norm": 0.01782486028969288, "learning_rate": 0.0007226277372262774, "loss": 0.6847, "step": 1074 }, { "epoch": 2.351654361498496, "grad_norm": 0.011398195289075375, "learning_rate": 0.0007201946472019465, "loss": 0.687, "step": 1075 }, { "epoch": 2.3538419469510528, "grad_norm": 0.023858705535531044, "learning_rate": 0.0007177615571776156, "loss": 0.6984, "step": 1076 }, { "epoch": 2.3560295324036096, "grad_norm": 0.008185802958905697, "learning_rate": 0.0007153284671532847, "loss": 0.8747, "step": 1077 }, { "epoch": 2.3582171178561664, "grad_norm": 0.018106609582901, "learning_rate": 0.0007128953771289538, "loss": 0.6591, "step": 1078 }, { "epoch": 2.360404703308723, "grad_norm": 0.013991002924740314, "learning_rate": 0.0007104622871046229, "loss": 0.818, "step": 1079 }, { "epoch": 2.3625922887612796, "grad_norm": 0.007820016704499722, "learning_rate": 0.000708029197080292, "loss": 0.9661, "step": 1080 }, { "epoch": 2.3647798742138364, "grad_norm": 0.020563364028930664, "learning_rate": 0.0007055961070559611, "loss": 0.896, "step": 1081 }, { "epoch": 2.366967459666393, "grad_norm": 0.01632773131132126, "learning_rate": 0.0007031630170316302, "loss": 0.8516, "step": 1082 }, { "epoch": 2.36915504511895, "grad_norm": 0.012202097102999687, "learning_rate": 0.0007007299270072992, "loss": 0.921, "step": 1083 }, { "epoch": 2.371342630571507, "grad_norm": 0.009598075412213802, "learning_rate": 0.0006982968369829684, "loss": 0.677, "step": 1084 }, { "epoch": 2.373530216024063, "grad_norm": 0.010769539512693882, "learning_rate": 0.0006958637469586375, "loss": 0.7964, "step": 1085 }, { "epoch": 2.37571780147662, "grad_norm": 0.011242173612117767, "learning_rate": 0.0006934306569343065, "loss": 0.6444, "step": 1086 }, { "epoch": 2.377905386929177, "grad_norm": 0.009250817820429802, "learning_rate": 0.0006909975669099756, "loss": 0.7456, "step": 1087 }, { "epoch": 2.3800929723817337, "grad_norm": 0.008871940895915031, "learning_rate": 0.0006885644768856448, "loss": 0.7497, "step": 1088 }, { "epoch": 2.3822805578342905, "grad_norm": 0.014774895273149014, "learning_rate": 0.0006861313868613139, "loss": 0.8508, "step": 1089 }, { "epoch": 2.3844681432868473, "grad_norm": 0.008470469154417515, "learning_rate": 0.000683698296836983, "loss": 0.6278, "step": 1090 }, { "epoch": 2.386655728739404, "grad_norm": 0.02862645871937275, "learning_rate": 0.0006812652068126521, "loss": 0.7235, "step": 1091 }, { "epoch": 2.3888433141919605, "grad_norm": 0.010565055534243584, "learning_rate": 0.0006788321167883212, "loss": 0.7064, "step": 1092 }, { "epoch": 2.3910308996445173, "grad_norm": 0.00996407214552164, "learning_rate": 0.0006763990267639903, "loss": 0.747, "step": 1093 }, { "epoch": 2.393218485097074, "grad_norm": 0.008201108314096928, "learning_rate": 0.0006739659367396594, "loss": 0.8917, "step": 1094 }, { "epoch": 2.395406070549631, "grad_norm": 0.007856379263103008, "learning_rate": 0.0006715328467153285, "loss": 0.8106, "step": 1095 }, { "epoch": 2.3975936560021878, "grad_norm": 0.01899876445531845, "learning_rate": 0.0006690997566909976, "loss": 0.9151, "step": 1096 }, { "epoch": 2.399781241454744, "grad_norm": 0.0086012938991189, "learning_rate": 0.0006666666666666666, "loss": 0.872, "step": 1097 }, { "epoch": 2.401968826907301, "grad_norm": 0.007030507083982229, "learning_rate": 0.0006642335766423358, "loss": 0.6529, "step": 1098 }, { "epoch": 2.4041564123598578, "grad_norm": 0.01876233145594597, "learning_rate": 0.0006618004866180048, "loss": 0.8421, "step": 1099 }, { "epoch": 2.4063439978124146, "grad_norm": 0.033474959433078766, "learning_rate": 0.0006593673965936739, "loss": 0.6956, "step": 1100 }, { "epoch": 2.4085315832649714, "grad_norm": 0.018535858020186424, "learning_rate": 0.0006569343065693431, "loss": 0.7232, "step": 1101 }, { "epoch": 2.4107191687175282, "grad_norm": 0.010383503511548042, "learning_rate": 0.0006545012165450122, "loss": 0.5804, "step": 1102 }, { "epoch": 2.4129067541700846, "grad_norm": 0.0077387490309774876, "learning_rate": 0.0006520681265206813, "loss": 0.828, "step": 1103 }, { "epoch": 2.4150943396226414, "grad_norm": 0.011656009592115879, "learning_rate": 0.0006496350364963504, "loss": 0.9106, "step": 1104 }, { "epoch": 2.4172819250751982, "grad_norm": 0.005996339488774538, "learning_rate": 0.0006472019464720195, "loss": 0.6921, "step": 1105 }, { "epoch": 2.419469510527755, "grad_norm": 0.022230584174394608, "learning_rate": 0.0006447688564476886, "loss": 0.9711, "step": 1106 }, { "epoch": 2.421657095980312, "grad_norm": 0.031066155061125755, "learning_rate": 0.0006423357664233577, "loss": 0.8718, "step": 1107 }, { "epoch": 2.4238446814328682, "grad_norm": 0.011762702837586403, "learning_rate": 0.0006399026763990268, "loss": 0.818, "step": 1108 }, { "epoch": 2.426032266885425, "grad_norm": 0.009383924305438995, "learning_rate": 0.0006374695863746959, "loss": 0.5913, "step": 1109 }, { "epoch": 2.428219852337982, "grad_norm": 0.012824693694710732, "learning_rate": 0.000635036496350365, "loss": 0.7115, "step": 1110 }, { "epoch": 2.4304074377905387, "grad_norm": 0.007453750818967819, "learning_rate": 0.0006326034063260342, "loss": 0.7374, "step": 1111 }, { "epoch": 2.4325950232430955, "grad_norm": 0.007933787070214748, "learning_rate": 0.0006301703163017031, "loss": 0.7921, "step": 1112 }, { "epoch": 2.4347826086956523, "grad_norm": 0.01717616245150566, "learning_rate": 0.0006277372262773722, "loss": 0.9326, "step": 1113 }, { "epoch": 2.436970194148209, "grad_norm": 0.009397076442837715, "learning_rate": 0.0006253041362530414, "loss": 0.6388, "step": 1114 }, { "epoch": 2.4391577796007655, "grad_norm": 0.008330175653100014, "learning_rate": 0.0006228710462287105, "loss": 0.5517, "step": 1115 }, { "epoch": 2.4413453650533223, "grad_norm": 0.013194689527153969, "learning_rate": 0.0006204379562043796, "loss": 0.8779, "step": 1116 }, { "epoch": 2.443532950505879, "grad_norm": 0.012824257835745811, "learning_rate": 0.0006180048661800486, "loss": 0.7731, "step": 1117 }, { "epoch": 2.445720535958436, "grad_norm": 0.011488651856780052, "learning_rate": 0.0006155717761557178, "loss": 0.7806, "step": 1118 }, { "epoch": 2.447908121410993, "grad_norm": 0.006684242747724056, "learning_rate": 0.0006131386861313869, "loss": 1.0212, "step": 1119 }, { "epoch": 2.450095706863549, "grad_norm": 0.010995331220328808, "learning_rate": 0.000610705596107056, "loss": 0.8499, "step": 1120 }, { "epoch": 2.452283292316106, "grad_norm": 0.016977710649371147, "learning_rate": 0.0006082725060827251, "loss": 0.7029, "step": 1121 }, { "epoch": 2.454470877768663, "grad_norm": 0.008742439560592175, "learning_rate": 0.0006058394160583942, "loss": 0.6834, "step": 1122 }, { "epoch": 2.4566584632212196, "grad_norm": 0.006410808768123388, "learning_rate": 0.0006034063260340633, "loss": 0.8371, "step": 1123 }, { "epoch": 2.4588460486737764, "grad_norm": 0.008776198141276836, "learning_rate": 0.0006009732360097324, "loss": 0.7001, "step": 1124 }, { "epoch": 2.4610336341263332, "grad_norm": 0.007712388876825571, "learning_rate": 0.0005985401459854014, "loss": 0.5664, "step": 1125 }, { "epoch": 2.4632212195788896, "grad_norm": 0.011250052601099014, "learning_rate": 0.0005961070559610705, "loss": 0.8572, "step": 1126 }, { "epoch": 2.4654088050314464, "grad_norm": 0.010831180959939957, "learning_rate": 0.0005936739659367396, "loss": 0.6984, "step": 1127 }, { "epoch": 2.4675963904840033, "grad_norm": 0.025114471092820168, "learning_rate": 0.0005912408759124088, "loss": 0.7401, "step": 1128 }, { "epoch": 2.46978397593656, "grad_norm": 0.006640868727117777, "learning_rate": 0.0005888077858880779, "loss": 0.5887, "step": 1129 }, { "epoch": 2.471971561389117, "grad_norm": 0.0060841697268188, "learning_rate": 0.0005863746958637469, "loss": 0.7121, "step": 1130 }, { "epoch": 2.4741591468416733, "grad_norm": 0.012216274626553059, "learning_rate": 0.0005839416058394161, "loss": 0.8174, "step": 1131 }, { "epoch": 2.47634673229423, "grad_norm": 0.009857951663434505, "learning_rate": 0.0005815085158150852, "loss": 0.7229, "step": 1132 }, { "epoch": 2.478534317746787, "grad_norm": 0.010938407853245735, "learning_rate": 0.0005790754257907543, "loss": 0.5738, "step": 1133 }, { "epoch": 2.4807219031993437, "grad_norm": 0.026813512668013573, "learning_rate": 0.0005766423357664234, "loss": 0.8543, "step": 1134 }, { "epoch": 2.4829094886519005, "grad_norm": 0.01071678102016449, "learning_rate": 0.0005742092457420925, "loss": 0.9774, "step": 1135 }, { "epoch": 2.4850970741044573, "grad_norm": 0.009592295624315739, "learning_rate": 0.0005717761557177616, "loss": 0.9619, "step": 1136 }, { "epoch": 2.487284659557014, "grad_norm": 0.005114677362143993, "learning_rate": 0.0005693430656934307, "loss": 0.8033, "step": 1137 }, { "epoch": 2.4894722450095705, "grad_norm": 0.012539639137685299, "learning_rate": 0.0005669099756690998, "loss": 0.8993, "step": 1138 }, { "epoch": 2.4916598304621274, "grad_norm": 0.026053965091705322, "learning_rate": 0.0005644768856447688, "loss": 0.6817, "step": 1139 }, { "epoch": 2.493847415914684, "grad_norm": 0.007609077729284763, "learning_rate": 0.0005620437956204379, "loss": 0.8549, "step": 1140 }, { "epoch": 2.496035001367241, "grad_norm": 0.010698397643864155, "learning_rate": 0.0005596107055961071, "loss": 0.7068, "step": 1141 }, { "epoch": 2.498222586819798, "grad_norm": 0.008611828088760376, "learning_rate": 0.0005571776155717762, "loss": 0.7465, "step": 1142 }, { "epoch": 2.500410172272354, "grad_norm": 0.01089494489133358, "learning_rate": 0.0005547445255474452, "loss": 0.6224, "step": 1143 }, { "epoch": 2.502597757724911, "grad_norm": 0.024782098829746246, "learning_rate": 0.0005523114355231143, "loss": 0.8328, "step": 1144 }, { "epoch": 2.504785343177468, "grad_norm": 0.006382483057677746, "learning_rate": 0.0005498783454987835, "loss": 0.7787, "step": 1145 }, { "epoch": 2.5069729286300246, "grad_norm": 0.016949672251939774, "learning_rate": 0.0005474452554744526, "loss": 0.7046, "step": 1146 }, { "epoch": 2.5091605140825815, "grad_norm": 0.027401480823755264, "learning_rate": 0.0005450121654501216, "loss": 0.6702, "step": 1147 }, { "epoch": 2.5113480995351383, "grad_norm": 0.01999586448073387, "learning_rate": 0.0005425790754257908, "loss": 0.8054, "step": 1148 }, { "epoch": 2.513535684987695, "grad_norm": 0.010145720094442368, "learning_rate": 0.0005401459854014599, "loss": 0.6592, "step": 1149 }, { "epoch": 2.5157232704402515, "grad_norm": 0.018535887822508812, "learning_rate": 0.000537712895377129, "loss": 0.7254, "step": 1150 }, { "epoch": 2.5179108558928083, "grad_norm": 0.009648307226598263, "learning_rate": 0.0005352798053527981, "loss": 0.6838, "step": 1151 }, { "epoch": 2.520098441345365, "grad_norm": 0.016310011968016624, "learning_rate": 0.0005328467153284672, "loss": 0.8777, "step": 1152 }, { "epoch": 2.522286026797922, "grad_norm": 0.010320610366761684, "learning_rate": 0.0005304136253041362, "loss": 0.7651, "step": 1153 }, { "epoch": 2.5244736122504783, "grad_norm": 0.012834092602133751, "learning_rate": 0.0005279805352798053, "loss": 0.7847, "step": 1154 }, { "epoch": 2.526661197703035, "grad_norm": 0.011668582446873188, "learning_rate": 0.0005255474452554745, "loss": 0.7225, "step": 1155 }, { "epoch": 2.528848783155592, "grad_norm": 0.009817942976951599, "learning_rate": 0.0005231143552311435, "loss": 0.6983, "step": 1156 }, { "epoch": 2.5310363686081487, "grad_norm": 0.009282633662223816, "learning_rate": 0.0005206812652068126, "loss": 0.7688, "step": 1157 }, { "epoch": 2.5332239540607056, "grad_norm": 0.007419208530336618, "learning_rate": 0.0005182481751824818, "loss": 0.728, "step": 1158 }, { "epoch": 2.5354115395132624, "grad_norm": 0.029275061562657356, "learning_rate": 0.0005158150851581509, "loss": 0.8293, "step": 1159 }, { "epoch": 2.537599124965819, "grad_norm": 0.01723194308578968, "learning_rate": 0.0005133819951338199, "loss": 0.6128, "step": 1160 }, { "epoch": 2.5397867104183756, "grad_norm": 0.009285934269428253, "learning_rate": 0.000510948905109489, "loss": 0.6788, "step": 1161 }, { "epoch": 2.5419742958709324, "grad_norm": 0.008555158041417599, "learning_rate": 0.0005085158150851582, "loss": 0.6507, "step": 1162 }, { "epoch": 2.544161881323489, "grad_norm": 0.0168358962982893, "learning_rate": 0.0005060827250608273, "loss": 0.942, "step": 1163 }, { "epoch": 2.546349466776046, "grad_norm": 0.0068771797232329845, "learning_rate": 0.0005036496350364964, "loss": 0.7844, "step": 1164 }, { "epoch": 2.548537052228603, "grad_norm": 0.04532065615057945, "learning_rate": 0.0005012165450121655, "loss": 0.8095, "step": 1165 }, { "epoch": 2.550724637681159, "grad_norm": 0.00933657493442297, "learning_rate": 0.0004987834549878346, "loss": 0.8072, "step": 1166 }, { "epoch": 2.552912223133716, "grad_norm": 0.009804673492908478, "learning_rate": 0.0004963503649635036, "loss": 0.8715, "step": 1167 }, { "epoch": 2.555099808586273, "grad_norm": 0.010783910751342773, "learning_rate": 0.0004939172749391727, "loss": 0.7891, "step": 1168 }, { "epoch": 2.5572873940388297, "grad_norm": 0.011784784495830536, "learning_rate": 0.0004914841849148418, "loss": 0.7262, "step": 1169 }, { "epoch": 2.5594749794913865, "grad_norm": 0.007322199642658234, "learning_rate": 0.0004890510948905109, "loss": 0.7809, "step": 1170 }, { "epoch": 2.5616625649439433, "grad_norm": 0.011777276173233986, "learning_rate": 0.00048661800486618, "loss": 0.7791, "step": 1171 }, { "epoch": 2.5638501503965, "grad_norm": 0.015589660964906216, "learning_rate": 0.00048418491484184916, "loss": 0.921, "step": 1172 }, { "epoch": 2.5660377358490565, "grad_norm": 0.010277018882334232, "learning_rate": 0.00048175182481751826, "loss": 0.9368, "step": 1173 }, { "epoch": 2.5682253213016133, "grad_norm": 0.02483278699219227, "learning_rate": 0.00047931873479318735, "loss": 0.7714, "step": 1174 }, { "epoch": 2.57041290675417, "grad_norm": 0.013863074593245983, "learning_rate": 0.0004768856447688565, "loss": 0.6637, "step": 1175 }, { "epoch": 2.572600492206727, "grad_norm": 0.015338894911110401, "learning_rate": 0.00047445255474452553, "loss": 0.7678, "step": 1176 }, { "epoch": 2.5747880776592833, "grad_norm": 0.007364062592387199, "learning_rate": 0.0004720194647201946, "loss": 0.995, "step": 1177 }, { "epoch": 2.57697566311184, "grad_norm": 0.1765730232000351, "learning_rate": 0.00046958637469586377, "loss": 0.7865, "step": 1178 }, { "epoch": 2.579163248564397, "grad_norm": 0.010664415545761585, "learning_rate": 0.00046715328467153287, "loss": 0.5741, "step": 1179 }, { "epoch": 2.5813508340169538, "grad_norm": 0.012521582655608654, "learning_rate": 0.00046472019464720196, "loss": 0.6621, "step": 1180 }, { "epoch": 2.5835384194695106, "grad_norm": 0.03732423484325409, "learning_rate": 0.000462287104622871, "loss": 0.7453, "step": 1181 }, { "epoch": 2.5857260049220674, "grad_norm": 0.013986853882670403, "learning_rate": 0.0004598540145985402, "loss": 0.7057, "step": 1182 }, { "epoch": 2.587913590374624, "grad_norm": 0.013078927993774414, "learning_rate": 0.00045742092457420923, "loss": 0.7167, "step": 1183 }, { "epoch": 2.590101175827181, "grad_norm": 0.006835412234067917, "learning_rate": 0.0004549878345498783, "loss": 0.8064, "step": 1184 }, { "epoch": 2.5922887612797374, "grad_norm": 0.020057901740074158, "learning_rate": 0.0004525547445255475, "loss": 0.7096, "step": 1185 }, { "epoch": 2.594476346732294, "grad_norm": 0.026187503710389137, "learning_rate": 0.00045012165450121657, "loss": 0.9496, "step": 1186 }, { "epoch": 2.596663932184851, "grad_norm": 0.012171875685453415, "learning_rate": 0.00044768856447688566, "loss": 0.7529, "step": 1187 }, { "epoch": 2.598851517637408, "grad_norm": 0.012145042419433594, "learning_rate": 0.0004452554744525548, "loss": 0.8654, "step": 1188 }, { "epoch": 2.601039103089964, "grad_norm": 0.013504109345376492, "learning_rate": 0.00044282238442822384, "loss": 0.6347, "step": 1189 }, { "epoch": 2.603226688542521, "grad_norm": 0.01362569723278284, "learning_rate": 0.00044038929440389293, "loss": 0.661, "step": 1190 }, { "epoch": 2.605414273995078, "grad_norm": 0.013327688910067081, "learning_rate": 0.00043795620437956203, "loss": 0.6851, "step": 1191 }, { "epoch": 2.6076018594476347, "grad_norm": 0.008194427005946636, "learning_rate": 0.0004355231143552312, "loss": 0.8226, "step": 1192 }, { "epoch": 2.6097894449001915, "grad_norm": 0.017937535420060158, "learning_rate": 0.00043309002433090027, "loss": 0.7033, "step": 1193 }, { "epoch": 2.6119770303527483, "grad_norm": 0.005625641439110041, "learning_rate": 0.0004306569343065693, "loss": 0.7106, "step": 1194 }, { "epoch": 2.614164615805305, "grad_norm": 0.01812170445919037, "learning_rate": 0.0004282238442822385, "loss": 0.7344, "step": 1195 }, { "epoch": 2.6163522012578615, "grad_norm": 0.007461361587047577, "learning_rate": 0.00042579075425790754, "loss": 0.835, "step": 1196 }, { "epoch": 2.6185397867104183, "grad_norm": 0.014407969079911709, "learning_rate": 0.00042335766423357664, "loss": 0.7829, "step": 1197 }, { "epoch": 2.620727372162975, "grad_norm": 0.008925898931920528, "learning_rate": 0.0004209245742092458, "loss": 0.6425, "step": 1198 }, { "epoch": 2.622914957615532, "grad_norm": 0.010357217863202095, "learning_rate": 0.0004184914841849149, "loss": 0.894, "step": 1199 }, { "epoch": 2.6251025430680883, "grad_norm": 0.01632748544216156, "learning_rate": 0.00041605839416058397, "loss": 0.6886, "step": 1200 }, { "epoch": 2.627290128520645, "grad_norm": 0.021274514496326447, "learning_rate": 0.000413625304136253, "loss": 0.7503, "step": 1201 }, { "epoch": 2.629477713973202, "grad_norm": 0.021467119455337524, "learning_rate": 0.00041119221411192215, "loss": 0.9202, "step": 1202 }, { "epoch": 2.6316652994257588, "grad_norm": 0.011900427751243114, "learning_rate": 0.00040875912408759124, "loss": 0.7084, "step": 1203 }, { "epoch": 2.6338528848783156, "grad_norm": 0.010819557122886181, "learning_rate": 0.00040632603406326034, "loss": 1.0455, "step": 1204 }, { "epoch": 2.6360404703308724, "grad_norm": 0.012575685046613216, "learning_rate": 0.0004038929440389295, "loss": 0.6894, "step": 1205 }, { "epoch": 2.6382280557834292, "grad_norm": 0.011274064891040325, "learning_rate": 0.0004014598540145986, "loss": 0.8449, "step": 1206 }, { "epoch": 2.640415641235986, "grad_norm": 0.013194631785154343, "learning_rate": 0.0003990267639902676, "loss": 0.8192, "step": 1207 }, { "epoch": 2.6426032266885424, "grad_norm": 0.009542672894895077, "learning_rate": 0.0003965936739659367, "loss": 0.8768, "step": 1208 }, { "epoch": 2.6447908121410992, "grad_norm": 0.016639290377497673, "learning_rate": 0.00039416058394160585, "loss": 0.7371, "step": 1209 }, { "epoch": 2.646978397593656, "grad_norm": 0.02203970216214657, "learning_rate": 0.00039172749391727494, "loss": 0.6598, "step": 1210 }, { "epoch": 2.649165983046213, "grad_norm": 0.027763044461607933, "learning_rate": 0.00038929440389294404, "loss": 0.6819, "step": 1211 }, { "epoch": 2.6513535684987692, "grad_norm": 0.01537309866398573, "learning_rate": 0.0003868613138686132, "loss": 0.8249, "step": 1212 }, { "epoch": 2.653541153951326, "grad_norm": 0.01565646007657051, "learning_rate": 0.0003844282238442823, "loss": 0.569, "step": 1213 }, { "epoch": 2.655728739403883, "grad_norm": 0.01048749778419733, "learning_rate": 0.0003819951338199513, "loss": 0.6359, "step": 1214 }, { "epoch": 2.6579163248564397, "grad_norm": 0.061209116131067276, "learning_rate": 0.00037956204379562046, "loss": 0.7011, "step": 1215 }, { "epoch": 2.6601039103089965, "grad_norm": 0.016036316752433777, "learning_rate": 0.00037712895377128955, "loss": 0.5889, "step": 1216 }, { "epoch": 2.6622914957615533, "grad_norm": 0.014299210160970688, "learning_rate": 0.00037469586374695864, "loss": 0.7685, "step": 1217 }, { "epoch": 2.66447908121411, "grad_norm": 0.010716800577938557, "learning_rate": 0.00037226277372262774, "loss": 0.7795, "step": 1218 }, { "epoch": 2.6666666666666665, "grad_norm": 0.007198740262538195, "learning_rate": 0.0003698296836982969, "loss": 0.8868, "step": 1219 }, { "epoch": 2.6688542521192233, "grad_norm": 0.018458040431141853, "learning_rate": 0.0003673965936739659, "loss": 0.6935, "step": 1220 }, { "epoch": 2.67104183757178, "grad_norm": 0.011869457550346851, "learning_rate": 0.00036496350364963507, "loss": 0.7638, "step": 1221 }, { "epoch": 2.673229423024337, "grad_norm": 0.00896628387272358, "learning_rate": 0.0003625304136253041, "loss": 0.7615, "step": 1222 }, { "epoch": 2.675417008476894, "grad_norm": 0.008536278270184994, "learning_rate": 0.00036009732360097325, "loss": 0.6647, "step": 1223 }, { "epoch": 2.67760459392945, "grad_norm": 0.02423817664384842, "learning_rate": 0.00035766423357664234, "loss": 0.6876, "step": 1224 }, { "epoch": 2.679792179382007, "grad_norm": 0.011117582209408283, "learning_rate": 0.00035523114355231144, "loss": 0.665, "step": 1225 }, { "epoch": 2.681979764834564, "grad_norm": 0.009505179710686207, "learning_rate": 0.00035279805352798053, "loss": 0.6284, "step": 1226 }, { "epoch": 2.6841673502871206, "grad_norm": 0.0063440497033298016, "learning_rate": 0.0003503649635036496, "loss": 0.8279, "step": 1227 }, { "epoch": 2.6863549357396774, "grad_norm": 0.0201023630797863, "learning_rate": 0.00034793187347931877, "loss": 0.8996, "step": 1228 }, { "epoch": 2.6885425211922342, "grad_norm": 0.006452304311096668, "learning_rate": 0.0003454987834549878, "loss": 0.8563, "step": 1229 }, { "epoch": 2.690730106644791, "grad_norm": 0.00840191449970007, "learning_rate": 0.00034306569343065695, "loss": 0.6543, "step": 1230 }, { "epoch": 2.6929176920973474, "grad_norm": 0.011340702883899212, "learning_rate": 0.00034063260340632605, "loss": 0.733, "step": 1231 }, { "epoch": 2.6951052775499043, "grad_norm": 0.01761777698993683, "learning_rate": 0.00033819951338199514, "loss": 0.9136, "step": 1232 }, { "epoch": 2.697292863002461, "grad_norm": 0.012587963603436947, "learning_rate": 0.00033576642335766423, "loss": 0.801, "step": 1233 }, { "epoch": 2.699480448455018, "grad_norm": 0.006971995811909437, "learning_rate": 0.0003333333333333333, "loss": 0.8079, "step": 1234 }, { "epoch": 2.7016680339075743, "grad_norm": 0.00921553373336792, "learning_rate": 0.0003309002433090024, "loss": 0.6801, "step": 1235 }, { "epoch": 2.703855619360131, "grad_norm": 0.012788954190909863, "learning_rate": 0.00032846715328467156, "loss": 0.8119, "step": 1236 }, { "epoch": 2.706043204812688, "grad_norm": 0.01745203509926796, "learning_rate": 0.00032603406326034065, "loss": 0.808, "step": 1237 }, { "epoch": 2.7082307902652447, "grad_norm": 0.010819566436111927, "learning_rate": 0.00032360097323600975, "loss": 0.6882, "step": 1238 }, { "epoch": 2.7104183757178015, "grad_norm": 0.013807238079607487, "learning_rate": 0.00032116788321167884, "loss": 0.5872, "step": 1239 }, { "epoch": 2.7126059611703583, "grad_norm": 0.015879668295383453, "learning_rate": 0.00031873479318734793, "loss": 0.7541, "step": 1240 }, { "epoch": 2.714793546622915, "grad_norm": 0.008229264058172703, "learning_rate": 0.0003163017031630171, "loss": 0.8002, "step": 1241 }, { "epoch": 2.7169811320754715, "grad_norm": 0.011732214130461216, "learning_rate": 0.0003138686131386861, "loss": 0.7049, "step": 1242 }, { "epoch": 2.7191687175280284, "grad_norm": 0.008688759990036488, "learning_rate": 0.00031143552311435526, "loss": 0.9007, "step": 1243 }, { "epoch": 2.721356302980585, "grad_norm": 0.014027293771505356, "learning_rate": 0.0003090024330900243, "loss": 0.6098, "step": 1244 }, { "epoch": 2.723543888433142, "grad_norm": 0.00831068679690361, "learning_rate": 0.00030656934306569345, "loss": 0.7435, "step": 1245 }, { "epoch": 2.725731473885699, "grad_norm": 0.017324576154351234, "learning_rate": 0.00030413625304136254, "loss": 0.7317, "step": 1246 }, { "epoch": 2.727919059338255, "grad_norm": 0.01490398496389389, "learning_rate": 0.00030170316301703163, "loss": 0.7434, "step": 1247 }, { "epoch": 2.730106644790812, "grad_norm": 0.02181348390877247, "learning_rate": 0.0002992700729927007, "loss": 0.7395, "step": 1248 }, { "epoch": 2.732294230243369, "grad_norm": 0.017193686217069626, "learning_rate": 0.0002968369829683698, "loss": 1.0303, "step": 1249 }, { "epoch": 2.7344818156959256, "grad_norm": 0.011623183265328407, "learning_rate": 0.00029440389294403896, "loss": 0.5918, "step": 1250 }, { "epoch": 2.7366694011484825, "grad_norm": 0.007596330717206001, "learning_rate": 0.00029197080291970805, "loss": 0.6441, "step": 1251 }, { "epoch": 2.7388569866010393, "grad_norm": 0.022759029641747475, "learning_rate": 0.00028953771289537715, "loss": 0.6192, "step": 1252 }, { "epoch": 2.741044572053596, "grad_norm": 0.0065732188522815704, "learning_rate": 0.00028710462287104624, "loss": 0.73, "step": 1253 }, { "epoch": 2.7432321575061525, "grad_norm": 0.009496266953647137, "learning_rate": 0.00028467153284671533, "loss": 0.839, "step": 1254 }, { "epoch": 2.7454197429587093, "grad_norm": 0.007220600266009569, "learning_rate": 0.0002822384428223844, "loss": 0.6448, "step": 1255 }, { "epoch": 2.747607328411266, "grad_norm": 0.015215203166007996, "learning_rate": 0.00027980535279805357, "loss": 0.7697, "step": 1256 }, { "epoch": 2.749794913863823, "grad_norm": 0.015471878461539745, "learning_rate": 0.0002773722627737226, "loss": 0.7398, "step": 1257 }, { "epoch": 2.7519824993163793, "grad_norm": 0.009130065329372883, "learning_rate": 0.00027493917274939175, "loss": 0.6993, "step": 1258 }, { "epoch": 2.754170084768936, "grad_norm": 0.007493583485484123, "learning_rate": 0.0002725060827250608, "loss": 0.6525, "step": 1259 }, { "epoch": 2.756357670221493, "grad_norm": 0.018882576376199722, "learning_rate": 0.00027007299270072994, "loss": 0.785, "step": 1260 }, { "epoch": 2.7585452556740497, "grad_norm": 0.010290750302374363, "learning_rate": 0.00026763990267639903, "loss": 0.6355, "step": 1261 }, { "epoch": 2.7607328411266066, "grad_norm": 0.020789271220564842, "learning_rate": 0.0002652068126520681, "loss": 0.6681, "step": 1262 }, { "epoch": 2.7629204265791634, "grad_norm": 0.010807972401380539, "learning_rate": 0.00026277372262773727, "loss": 0.8581, "step": 1263 }, { "epoch": 2.76510801203172, "grad_norm": 0.006756063550710678, "learning_rate": 0.0002603406326034063, "loss": 0.7499, "step": 1264 }, { "epoch": 2.767295597484277, "grad_norm": 0.013115596026182175, "learning_rate": 0.00025790754257907546, "loss": 0.6298, "step": 1265 }, { "epoch": 2.7694831829368334, "grad_norm": 0.010143927298486233, "learning_rate": 0.0002554744525547445, "loss": 0.7911, "step": 1266 }, { "epoch": 2.77167076838939, "grad_norm": 0.011593978852033615, "learning_rate": 0.00025304136253041364, "loss": 0.6558, "step": 1267 }, { "epoch": 2.773858353841947, "grad_norm": 0.011897698044776917, "learning_rate": 0.00025060827250608273, "loss": 0.7177, "step": 1268 }, { "epoch": 2.776045939294504, "grad_norm": 0.011287844739854336, "learning_rate": 0.0002481751824817518, "loss": 0.8625, "step": 1269 }, { "epoch": 2.77823352474706, "grad_norm": 0.017498012632131577, "learning_rate": 0.0002457420924574209, "loss": 0.896, "step": 1270 }, { "epoch": 2.780421110199617, "grad_norm": 0.011069230735301971, "learning_rate": 0.00024330900243309, "loss": 0.6567, "step": 1271 }, { "epoch": 2.782608695652174, "grad_norm": 0.005669731646776199, "learning_rate": 0.00024087591240875913, "loss": 0.7313, "step": 1272 }, { "epoch": 2.7847962811047307, "grad_norm": 0.02650737576186657, "learning_rate": 0.00023844282238442825, "loss": 0.8647, "step": 1273 }, { "epoch": 2.7869838665572875, "grad_norm": 0.010408868081867695, "learning_rate": 0.0002360097323600973, "loss": 0.8034, "step": 1274 }, { "epoch": 2.7891714520098443, "grad_norm": 0.013187460601329803, "learning_rate": 0.00023357664233576643, "loss": 0.8, "step": 1275 }, { "epoch": 2.791359037462401, "grad_norm": 0.009964399971067905, "learning_rate": 0.0002311435523114355, "loss": 0.8949, "step": 1276 }, { "epoch": 2.7935466229149575, "grad_norm": 0.01696036383509636, "learning_rate": 0.00022871046228710462, "loss": 0.678, "step": 1277 }, { "epoch": 2.7957342083675143, "grad_norm": 0.07283343374729156, "learning_rate": 0.00022627737226277374, "loss": 0.7264, "step": 1278 }, { "epoch": 2.797921793820071, "grad_norm": 0.007607647217810154, "learning_rate": 0.00022384428223844283, "loss": 0.8112, "step": 1279 }, { "epoch": 2.800109379272628, "grad_norm": 0.015119451098144054, "learning_rate": 0.00022141119221411192, "loss": 0.6995, "step": 1280 }, { "epoch": 2.8022969647251843, "grad_norm": 0.013507510535418987, "learning_rate": 0.00021897810218978101, "loss": 0.8193, "step": 1281 }, { "epoch": 2.804484550177741, "grad_norm": 0.007651912048459053, "learning_rate": 0.00021654501216545013, "loss": 0.5999, "step": 1282 }, { "epoch": 2.806672135630298, "grad_norm": 0.010115343146026134, "learning_rate": 0.00021411192214111925, "loss": 0.7694, "step": 1283 }, { "epoch": 2.8088597210828548, "grad_norm": 0.011188814416527748, "learning_rate": 0.00021167883211678832, "loss": 0.8099, "step": 1284 }, { "epoch": 2.8110473065354116, "grad_norm": 0.007763843517750502, "learning_rate": 0.00020924574209245744, "loss": 0.7182, "step": 1285 }, { "epoch": 2.8132348919879684, "grad_norm": 0.00900893472135067, "learning_rate": 0.0002068126520681265, "loss": 0.6297, "step": 1286 }, { "epoch": 2.815422477440525, "grad_norm": 0.006093029864132404, "learning_rate": 0.00020437956204379562, "loss": 1.0166, "step": 1287 }, { "epoch": 2.817610062893082, "grad_norm": 0.008186981081962585, "learning_rate": 0.00020194647201946474, "loss": 0.6606, "step": 1288 }, { "epoch": 2.8197976483456384, "grad_norm": 0.011285791173577309, "learning_rate": 0.0001995133819951338, "loss": 0.672, "step": 1289 }, { "epoch": 2.821985233798195, "grad_norm": 0.011607305146753788, "learning_rate": 0.00019708029197080293, "loss": 0.6903, "step": 1290 }, { "epoch": 2.824172819250752, "grad_norm": 0.008523947559297085, "learning_rate": 0.00019464720194647202, "loss": 0.8383, "step": 1291 }, { "epoch": 2.826360404703309, "grad_norm": 0.010200290009379387, "learning_rate": 0.00019221411192214114, "loss": 0.7475, "step": 1292 }, { "epoch": 2.828547990155865, "grad_norm": 0.01312936469912529, "learning_rate": 0.00018978102189781023, "loss": 0.7571, "step": 1293 }, { "epoch": 2.830735575608422, "grad_norm": 0.021754464134573936, "learning_rate": 0.00018734793187347932, "loss": 0.7915, "step": 1294 }, { "epoch": 2.832923161060979, "grad_norm": 0.022569775581359863, "learning_rate": 0.00018491484184914844, "loss": 0.7305, "step": 1295 }, { "epoch": 2.8351107465135357, "grad_norm": 0.009172527119517326, "learning_rate": 0.00018248175182481753, "loss": 0.8616, "step": 1296 }, { "epoch": 2.8372983319660925, "grad_norm": 0.00900851096957922, "learning_rate": 0.00018004866180048663, "loss": 0.8411, "step": 1297 }, { "epoch": 2.8394859174186493, "grad_norm": 0.033786166459321976, "learning_rate": 0.00017761557177615572, "loss": 0.6755, "step": 1298 }, { "epoch": 2.841673502871206, "grad_norm": 0.006091755349189043, "learning_rate": 0.0001751824817518248, "loss": 0.7822, "step": 1299 }, { "epoch": 2.8438610883237625, "grad_norm": 0.011280403472483158, "learning_rate": 0.0001727493917274939, "loss": 0.8669, "step": 1300 }, { "epoch": 2.8460486737763193, "grad_norm": 0.007846282795071602, "learning_rate": 0.00017031630170316302, "loss": 0.752, "step": 1301 }, { "epoch": 2.848236259228876, "grad_norm": 0.008928561583161354, "learning_rate": 0.00016788321167883211, "loss": 0.7062, "step": 1302 }, { "epoch": 2.850423844681433, "grad_norm": 0.0234297476708889, "learning_rate": 0.0001654501216545012, "loss": 0.7319, "step": 1303 }, { "epoch": 2.8526114301339898, "grad_norm": 0.07628759741783142, "learning_rate": 0.00016301703163017033, "loss": 0.8256, "step": 1304 }, { "epoch": 2.854799015586546, "grad_norm": 0.00962966587394476, "learning_rate": 0.00016058394160583942, "loss": 0.825, "step": 1305 }, { "epoch": 2.856986601039103, "grad_norm": 0.008182559162378311, "learning_rate": 0.00015815085158150854, "loss": 0.7628, "step": 1306 }, { "epoch": 2.8591741864916598, "grad_norm": 0.0483902171254158, "learning_rate": 0.00015571776155717763, "loss": 0.8631, "step": 1307 }, { "epoch": 2.8613617719442166, "grad_norm": 0.01323285885155201, "learning_rate": 0.00015328467153284672, "loss": 0.7958, "step": 1308 }, { "epoch": 2.8635493573967734, "grad_norm": 0.009712522849440575, "learning_rate": 0.00015085158150851582, "loss": 0.6506, "step": 1309 }, { "epoch": 2.8657369428493302, "grad_norm": 0.0073866224847733974, "learning_rate": 0.0001484184914841849, "loss": 0.5997, "step": 1310 }, { "epoch": 2.867924528301887, "grad_norm": 0.009534020908176899, "learning_rate": 0.00014598540145985403, "loss": 0.7732, "step": 1311 }, { "epoch": 2.8701121137544434, "grad_norm": 0.008029601536691189, "learning_rate": 0.00014355231143552312, "loss": 0.7837, "step": 1312 }, { "epoch": 2.8722996992070002, "grad_norm": 0.01388575229793787, "learning_rate": 0.0001411192214111922, "loss": 0.6959, "step": 1313 }, { "epoch": 2.874487284659557, "grad_norm": 0.011830773204565048, "learning_rate": 0.0001386861313868613, "loss": 0.7597, "step": 1314 }, { "epoch": 2.876674870112114, "grad_norm": 0.013655097223818302, "learning_rate": 0.0001362530413625304, "loss": 0.6103, "step": 1315 }, { "epoch": 2.8788624555646702, "grad_norm": 0.009793232195079327, "learning_rate": 0.00013381995133819952, "loss": 0.7327, "step": 1316 }, { "epoch": 2.881050041017227, "grad_norm": 0.009699089452624321, "learning_rate": 0.00013138686131386864, "loss": 0.7882, "step": 1317 }, { "epoch": 2.883237626469784, "grad_norm": 0.01353220921009779, "learning_rate": 0.00012895377128953773, "loss": 0.7567, "step": 1318 }, { "epoch": 2.8854252119223407, "grad_norm": 0.012468249537050724, "learning_rate": 0.00012652068126520682, "loss": 0.6502, "step": 1319 }, { "epoch": 2.8876127973748975, "grad_norm": 0.010982934385538101, "learning_rate": 0.0001240875912408759, "loss": 0.6542, "step": 1320 }, { "epoch": 2.8898003828274543, "grad_norm": 0.008489643223583698, "learning_rate": 0.000121654501216545, "loss": 0.7122, "step": 1321 }, { "epoch": 2.891987968280011, "grad_norm": 0.009710462763905525, "learning_rate": 0.00011922141119221412, "loss": 0.8059, "step": 1322 }, { "epoch": 2.8941755537325675, "grad_norm": 0.008519637398421764, "learning_rate": 0.00011678832116788322, "loss": 0.668, "step": 1323 }, { "epoch": 2.8963631391851243, "grad_norm": 0.012375866994261742, "learning_rate": 0.00011435523114355231, "loss": 0.8298, "step": 1324 }, { "epoch": 2.898550724637681, "grad_norm": 0.011852890253067017, "learning_rate": 0.00011192214111922141, "loss": 1.0037, "step": 1325 }, { "epoch": 2.900738310090238, "grad_norm": 0.01731940545141697, "learning_rate": 0.00010948905109489051, "loss": 0.7002, "step": 1326 }, { "epoch": 2.902925895542795, "grad_norm": 0.026805153116583824, "learning_rate": 0.00010705596107055963, "loss": 0.9983, "step": 1327 }, { "epoch": 2.905113480995351, "grad_norm": 0.011630130000412464, "learning_rate": 0.00010462287104622872, "loss": 0.575, "step": 1328 }, { "epoch": 2.907301066447908, "grad_norm": 0.012041180394589901, "learning_rate": 0.00010218978102189781, "loss": 0.6631, "step": 1329 }, { "epoch": 2.909488651900465, "grad_norm": 0.009331166744232178, "learning_rate": 9.97566909975669e-05, "loss": 0.7661, "step": 1330 }, { "epoch": 2.9116762373530216, "grad_norm": 0.010035173036158085, "learning_rate": 9.732360097323601e-05, "loss": 0.7367, "step": 1331 }, { "epoch": 2.9138638228055784, "grad_norm": 0.0184579249471426, "learning_rate": 9.489051094890511e-05, "loss": 0.7267, "step": 1332 }, { "epoch": 2.9160514082581352, "grad_norm": 0.019723238423466682, "learning_rate": 9.245742092457422e-05, "loss": 0.9285, "step": 1333 }, { "epoch": 2.918238993710692, "grad_norm": 0.01119768712669611, "learning_rate": 9.002433090024331e-05, "loss": 0.8886, "step": 1334 }, { "epoch": 2.9204265791632484, "grad_norm": 0.010187883861362934, "learning_rate": 8.75912408759124e-05, "loss": 0.6872, "step": 1335 }, { "epoch": 2.9226141646158053, "grad_norm": 0.006695912219583988, "learning_rate": 8.515815085158151e-05, "loss": 0.6093, "step": 1336 }, { "epoch": 2.924801750068362, "grad_norm": 0.009726252406835556, "learning_rate": 8.27250608272506e-05, "loss": 0.735, "step": 1337 }, { "epoch": 2.926989335520919, "grad_norm": 0.006968527100980282, "learning_rate": 8.029197080291971e-05, "loss": 0.9525, "step": 1338 }, { "epoch": 2.9291769209734753, "grad_norm": 0.019444549456238747, "learning_rate": 7.785888077858882e-05, "loss": 0.7423, "step": 1339 }, { "epoch": 2.931364506426032, "grad_norm": 0.014326276257634163, "learning_rate": 7.542579075425791e-05, "loss": 0.7437, "step": 1340 }, { "epoch": 2.933552091878589, "grad_norm": 0.008168605156242847, "learning_rate": 7.299270072992701e-05, "loss": 0.7014, "step": 1341 }, { "epoch": 2.9357396773311457, "grad_norm": 0.010011604055762291, "learning_rate": 7.05596107055961e-05, "loss": 0.6541, "step": 1342 }, { "epoch": 2.9379272627837025, "grad_norm": 0.013739430345594883, "learning_rate": 6.81265206812652e-05, "loss": 0.7885, "step": 1343 }, { "epoch": 2.9401148482362593, "grad_norm": 0.01414500456303358, "learning_rate": 6.569343065693432e-05, "loss": 0.9111, "step": 1344 }, { "epoch": 2.942302433688816, "grad_norm": 0.010208160616457462, "learning_rate": 6.326034063260341e-05, "loss": 0.6641, "step": 1345 }, { "epoch": 2.944490019141373, "grad_norm": 0.012237477116286755, "learning_rate": 6.08272506082725e-05, "loss": 0.6199, "step": 1346 }, { "epoch": 2.9466776045939294, "grad_norm": 0.008850525133311749, "learning_rate": 5.839416058394161e-05, "loss": 0.8436, "step": 1347 }, { "epoch": 2.948865190046486, "grad_norm": 0.01408157218247652, "learning_rate": 5.596107055961071e-05, "loss": 0.667, "step": 1348 }, { "epoch": 2.951052775499043, "grad_norm": 0.017354557290673256, "learning_rate": 5.352798053527981e-05, "loss": 0.7591, "step": 1349 }, { "epoch": 2.9532403609516, "grad_norm": 0.013411460444331169, "learning_rate": 5.1094890510948905e-05, "loss": 0.8248, "step": 1350 }, { "epoch": 2.955427946404156, "grad_norm": 0.018828334286808968, "learning_rate": 4.8661800486618005e-05, "loss": 0.8297, "step": 1351 }, { "epoch": 2.957615531856713, "grad_norm": 0.012131531722843647, "learning_rate": 4.622871046228711e-05, "loss": 0.8469, "step": 1352 }, { "epoch": 2.95980311730927, "grad_norm": 0.017933214083313942, "learning_rate": 4.37956204379562e-05, "loss": 0.886, "step": 1353 }, { "epoch": 2.9619907027618266, "grad_norm": 0.007120661437511444, "learning_rate": 4.13625304136253e-05, "loss": 0.7975, "step": 1354 }, { "epoch": 2.9641782882143834, "grad_norm": 0.008959448896348476, "learning_rate": 3.892944038929441e-05, "loss": 0.7624, "step": 1355 }, { "epoch": 2.9663658736669403, "grad_norm": 0.00703001581132412, "learning_rate": 3.649635036496351e-05, "loss": 0.9414, "step": 1356 }, { "epoch": 2.968553459119497, "grad_norm": 0.009628667496144772, "learning_rate": 3.40632603406326e-05, "loss": 0.7348, "step": 1357 }, { "epoch": 2.9707410445720535, "grad_norm": 0.010123343206942081, "learning_rate": 3.1630170316301705e-05, "loss": 0.5589, "step": 1358 }, { "epoch": 2.9729286300246103, "grad_norm": 0.012991656549274921, "learning_rate": 2.9197080291970804e-05, "loss": 0.7015, "step": 1359 }, { "epoch": 2.975116215477167, "grad_norm": 0.008844063617289066, "learning_rate": 2.6763990267639907e-05, "loss": 0.7395, "step": 1360 }, { "epoch": 2.977303800929724, "grad_norm": 0.010974117554724216, "learning_rate": 2.4330900243309002e-05, "loss": 0.815, "step": 1361 }, { "epoch": 2.9794913863822803, "grad_norm": 0.011202923953533173, "learning_rate": 2.18978102189781e-05, "loss": 0.7593, "step": 1362 }, { "epoch": 2.981678971834837, "grad_norm": 0.011004596017301083, "learning_rate": 1.9464720194647204e-05, "loss": 0.6727, "step": 1363 }, { "epoch": 2.983866557287394, "grad_norm": 0.009554206393659115, "learning_rate": 1.70316301703163e-05, "loss": 0.8229, "step": 1364 }, { "epoch": 2.9860541427399507, "grad_norm": 0.013814912177622318, "learning_rate": 1.4598540145985402e-05, "loss": 1.0031, "step": 1365 }, { "epoch": 2.9882417281925076, "grad_norm": 0.006289259064942598, "learning_rate": 1.2165450121654501e-05, "loss": 0.6995, "step": 1366 }, { "epoch": 2.9904293136450644, "grad_norm": 0.008405916392803192, "learning_rate": 9.732360097323602e-06, "loss": 0.7135, "step": 1367 }, { "epoch": 2.992616899097621, "grad_norm": 0.012755095958709717, "learning_rate": 7.299270072992701e-06, "loss": 0.8523, "step": 1368 }, { "epoch": 2.994804484550178, "grad_norm": 0.011079053394496441, "learning_rate": 4.866180048661801e-06, "loss": 0.6673, "step": 1369 }, { "epoch": 2.9969920700027344, "grad_norm": 0.011697685346007347, "learning_rate": 2.4330900243309005e-06, "loss": 0.7831, "step": 1370 }, { "epoch": 2.999179655455291, "grad_norm": 0.0072103943675756454, "learning_rate": 0.0, "loss": 0.8479, "step": 1371 }, { "epoch": 2.999179655455291, "step": 1371, "total_flos": 4.3134948379459584e+17, "train_loss": 0.7785058324133541, "train_runtime": 1561.6761, "train_samples_per_second": 14.048, "train_steps_per_second": 0.878 } ], "logging_steps": 1.0, "max_steps": 1371, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.3134948379459584e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }