| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.999179655455291, | |
| "eval_steps": 500, | |
| "global_step": 1371, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0021875854525567405, | |
| "grad_norm": 0.017111310735344887, | |
| "learning_rate": 2.173913043478261e-05, | |
| "loss": 1.059, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.004375170905113481, | |
| "grad_norm": 0.018623707816004753, | |
| "learning_rate": 4.347826086956522e-05, | |
| "loss": 1.315, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.006562756357670222, | |
| "grad_norm": 0.018533790484070778, | |
| "learning_rate": 6.521739130434782e-05, | |
| "loss": 0.9224, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.008750341810226962, | |
| "grad_norm": 0.015920396894216537, | |
| "learning_rate": 8.695652173913044e-05, | |
| "loss": 0.9201, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.010937927262783703, | |
| "grad_norm": 0.01558469608426094, | |
| "learning_rate": 0.00010869565217391305, | |
| "loss": 0.8457, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.013125512715340444, | |
| "grad_norm": 0.023962153121829033, | |
| "learning_rate": 0.00013043478260869564, | |
| "loss": 0.9347, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.015313098167897183, | |
| "grad_norm": 0.029316680505871773, | |
| "learning_rate": 0.00015217391304347827, | |
| "loss": 0.8043, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.017500683620453924, | |
| "grad_norm": 0.028927722945809364, | |
| "learning_rate": 0.00017391304347826088, | |
| "loss": 0.9963, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.019688269073010665, | |
| "grad_norm": 0.025050047785043716, | |
| "learning_rate": 0.0001956521739130435, | |
| "loss": 0.7861, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.021875854525567406, | |
| "grad_norm": 0.04486666992306709, | |
| "learning_rate": 0.0002173913043478261, | |
| "loss": 1.0884, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.024063439978124147, | |
| "grad_norm": 0.035062652081251144, | |
| "learning_rate": 0.00023913043478260867, | |
| "loss": 0.9876, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.026251025430680888, | |
| "grad_norm": 0.033111147582530975, | |
| "learning_rate": 0.0002608695652173913, | |
| "loss": 0.8838, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.028438610883237625, | |
| "grad_norm": 0.04362301528453827, | |
| "learning_rate": 0.00028260869565217394, | |
| "loss": 0.8189, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.030626196335794366, | |
| "grad_norm": 0.04369740933179855, | |
| "learning_rate": 0.00030434782608695655, | |
| "loss": 0.9065, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03281378178835111, | |
| "grad_norm": 0.04280918091535568, | |
| "learning_rate": 0.0003260869565217391, | |
| "loss": 0.8706, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.03500136724090785, | |
| "grad_norm": 0.06369622051715851, | |
| "learning_rate": 0.00034782608695652176, | |
| "loss": 1.0769, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.03718895269346459, | |
| "grad_norm": 0.04528605565428734, | |
| "learning_rate": 0.00036956521739130437, | |
| "loss": 0.9662, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.03937653814602133, | |
| "grad_norm": 0.045731619000434875, | |
| "learning_rate": 0.000391304347826087, | |
| "loss": 0.7727, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04156412359857807, | |
| "grad_norm": 0.03585459291934967, | |
| "learning_rate": 0.0004130434782608696, | |
| "loss": 0.7813, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.04375170905113481, | |
| "grad_norm": 0.07666835933923721, | |
| "learning_rate": 0.0004347826086956522, | |
| "loss": 1.1307, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04593929450369155, | |
| "grad_norm": 0.02985576167702675, | |
| "learning_rate": 0.0004565217391304348, | |
| "loss": 0.785, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.04812687995624829, | |
| "grad_norm": 0.02983052283525467, | |
| "learning_rate": 0.00047826086956521735, | |
| "loss": 0.8323, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.050314465408805034, | |
| "grad_norm": 0.09363115578889847, | |
| "learning_rate": 0.0005, | |
| "loss": 0.6885, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.052502050861361775, | |
| "grad_norm": 0.47768905758857727, | |
| "learning_rate": 0.0005217391304347826, | |
| "loss": 1.0143, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.05468963631391851, | |
| "grad_norm": 0.1065075695514679, | |
| "learning_rate": 0.0005434782608695652, | |
| "loss": 0.8331, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.05687722176647525, | |
| "grad_norm": 0.023991186171770096, | |
| "learning_rate": 0.0005652173913043479, | |
| "loss": 0.837, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.05906480721903199, | |
| "grad_norm": 0.020501986145973206, | |
| "learning_rate": 0.0005869565217391304, | |
| "loss": 0.7117, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.06125239267158873, | |
| "grad_norm": 0.052776217460632324, | |
| "learning_rate": 0.0006086956521739131, | |
| "loss": 0.9429, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06343997812414548, | |
| "grad_norm": 0.0542248971760273, | |
| "learning_rate": 0.0006304347826086957, | |
| "loss": 0.7759, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.06562756357670221, | |
| "grad_norm": 0.061998218297958374, | |
| "learning_rate": 0.0006521739130434782, | |
| "loss": 0.7904, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06781514902925896, | |
| "grad_norm": 0.03401396796107292, | |
| "learning_rate": 0.0006739130434782609, | |
| "loss": 0.7386, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.0700027344818157, | |
| "grad_norm": 0.027073826640844345, | |
| "learning_rate": 0.0006956521739130435, | |
| "loss": 0.6702, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.07219031993437244, | |
| "grad_norm": 0.0468217171728611, | |
| "learning_rate": 0.0007173913043478261, | |
| "loss": 0.9944, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.07437790538692918, | |
| "grad_norm": 0.1130312904715538, | |
| "learning_rate": 0.0007391304347826087, | |
| "loss": 0.8396, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.07656549083948591, | |
| "grad_norm": 0.05662137269973755, | |
| "learning_rate": 0.0007608695652173914, | |
| "loss": 0.918, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.07875307629204266, | |
| "grad_norm": 0.030792295932769775, | |
| "learning_rate": 0.000782608695652174, | |
| "loss": 1.0882, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0809406617445994, | |
| "grad_norm": 0.02346004731953144, | |
| "learning_rate": 0.0008043478260869566, | |
| "loss": 0.9276, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.08312824719715614, | |
| "grad_norm": 0.0640161782503128, | |
| "learning_rate": 0.0008260869565217392, | |
| "loss": 0.9607, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08531583264971287, | |
| "grad_norm": 0.01127663068473339, | |
| "learning_rate": 0.0008478260869565217, | |
| "loss": 0.7476, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.08750341810226962, | |
| "grad_norm": 0.020388390868902206, | |
| "learning_rate": 0.0008695652173913044, | |
| "loss": 0.9294, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08969100355482636, | |
| "grad_norm": 0.011159627698361874, | |
| "learning_rate": 0.0008913043478260869, | |
| "loss": 0.7403, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.0918785890073831, | |
| "grad_norm": 0.01922360621392727, | |
| "learning_rate": 0.0009130434782608696, | |
| "loss": 0.9901, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.09406617445993984, | |
| "grad_norm": 0.022499792277812958, | |
| "learning_rate": 0.0009347826086956521, | |
| "loss": 0.8491, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.09625375991249659, | |
| "grad_norm": 0.019557347521185875, | |
| "learning_rate": 0.0009565217391304347, | |
| "loss": 0.8855, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.09844134536505332, | |
| "grad_norm": 0.023644007742404938, | |
| "learning_rate": 0.0009782608695652175, | |
| "loss": 0.6828, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.10062893081761007, | |
| "grad_norm": 0.0180030707269907, | |
| "learning_rate": 0.001, | |
| "loss": 1.0325, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.1028165162701668, | |
| "grad_norm": 0.026917221024632454, | |
| "learning_rate": 0.0010217391304347826, | |
| "loss": 1.0596, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.10500410172272355, | |
| "grad_norm": 0.016843197867274284, | |
| "learning_rate": 0.0010434782608695651, | |
| "loss": 0.6183, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.10719168717528028, | |
| "grad_norm": 0.0393221415579319, | |
| "learning_rate": 0.001065217391304348, | |
| "loss": 0.9009, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.10937927262783702, | |
| "grad_norm": 0.025003232061862946, | |
| "learning_rate": 0.0010869565217391304, | |
| "loss": 0.9701, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11156685808039377, | |
| "grad_norm": 0.029358206316828728, | |
| "learning_rate": 0.001108695652173913, | |
| "loss": 0.8351, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.1137544435329505, | |
| "grad_norm": 0.02484138496220112, | |
| "learning_rate": 0.0011304347826086958, | |
| "loss": 0.906, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.11594202898550725, | |
| "grad_norm": 0.012963383458554745, | |
| "learning_rate": 0.0011521739130434783, | |
| "loss": 1.098, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.11812961443806398, | |
| "grad_norm": 0.022173907607793808, | |
| "learning_rate": 0.0011739130434782609, | |
| "loss": 0.9086, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.12031719989062073, | |
| "grad_norm": 0.018844394013285637, | |
| "learning_rate": 0.0011956521739130434, | |
| "loss": 0.8087, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.12250478534317746, | |
| "grad_norm": 0.01081649400293827, | |
| "learning_rate": 0.0012173913043478262, | |
| "loss": 0.6714, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.12469237079573421, | |
| "grad_norm": 0.012590788304805756, | |
| "learning_rate": 0.0012391304347826087, | |
| "loss": 0.8224, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.12687995624829096, | |
| "grad_norm": 0.007173096761107445, | |
| "learning_rate": 0.0012608695652173913, | |
| "loss": 0.9208, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.1290675417008477, | |
| "grad_norm": 0.023659205064177513, | |
| "learning_rate": 0.001282608695652174, | |
| "loss": 0.8385, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.13125512715340443, | |
| "grad_norm": 0.016100220382213593, | |
| "learning_rate": 0.0013043478260869564, | |
| "loss": 0.6562, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13344271260596116, | |
| "grad_norm": 0.01680757850408554, | |
| "learning_rate": 0.0013260869565217392, | |
| "loss": 0.8996, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.13563029805851792, | |
| "grad_norm": 0.02230382151901722, | |
| "learning_rate": 0.0013478260869565217, | |
| "loss": 0.7927, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.13781788351107466, | |
| "grad_norm": 0.03682897984981537, | |
| "learning_rate": 0.0013695652173913043, | |
| "loss": 0.8507, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.1400054689636314, | |
| "grad_norm": 0.02873164229094982, | |
| "learning_rate": 0.001391304347826087, | |
| "loss": 0.7379, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.14219305441618812, | |
| "grad_norm": 0.08291471749544144, | |
| "learning_rate": 0.0014130434782608696, | |
| "loss": 0.9686, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.1443806398687449, | |
| "grad_norm": 0.005648148711770773, | |
| "learning_rate": 0.0014347826086956522, | |
| "loss": 0.9365, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.14656822532130162, | |
| "grad_norm": 0.010281619615852833, | |
| "learning_rate": 0.0014565217391304347, | |
| "loss": 0.7989, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.14875581077385835, | |
| "grad_norm": 0.010221214964985847, | |
| "learning_rate": 0.0014782608695652175, | |
| "loss": 0.7567, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1509433962264151, | |
| "grad_norm": 0.008735002018511295, | |
| "learning_rate": 0.0015, | |
| "loss": 0.9644, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.15313098167897182, | |
| "grad_norm": 0.021798064932227135, | |
| "learning_rate": 0.0015217391304347828, | |
| "loss": 0.8451, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.15531856713152858, | |
| "grad_norm": 0.011285695247352123, | |
| "learning_rate": 0.0015434782608695651, | |
| "loss": 0.9334, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.15750615258408532, | |
| "grad_norm": 0.005321874748915434, | |
| "learning_rate": 0.001565217391304348, | |
| "loss": 0.9424, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.15969373803664205, | |
| "grad_norm": 0.010020822286605835, | |
| "learning_rate": 0.0015869565217391305, | |
| "loss": 0.7219, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.1618813234891988, | |
| "grad_norm": 0.008384345099329948, | |
| "learning_rate": 0.0016086956521739132, | |
| "loss": 0.8169, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.16406890894175555, | |
| "grad_norm": 0.010866906493902206, | |
| "learning_rate": 0.0016304347826086956, | |
| "loss": 0.7556, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.16625649439431228, | |
| "grad_norm": 0.01588907279074192, | |
| "learning_rate": 0.0016521739130434783, | |
| "loss": 0.766, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.16844407984686902, | |
| "grad_norm": 0.018410617485642433, | |
| "learning_rate": 0.001673913043478261, | |
| "loss": 0.8201, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.17063166529942575, | |
| "grad_norm": 0.01711914874613285, | |
| "learning_rate": 0.0016956521739130434, | |
| "loss": 1.0454, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.1728192507519825, | |
| "grad_norm": 0.040495071560144424, | |
| "learning_rate": 0.001717391304347826, | |
| "loss": 0.9048, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.17500683620453925, | |
| "grad_norm": 0.008844586089253426, | |
| "learning_rate": 0.0017391304347826088, | |
| "loss": 0.9716, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.17719442165709598, | |
| "grad_norm": 0.020504243671894073, | |
| "learning_rate": 0.0017608695652173915, | |
| "loss": 0.706, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.1793820071096527, | |
| "grad_norm": 0.005656155291944742, | |
| "learning_rate": 0.0017826086956521739, | |
| "loss": 0.8948, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.18156959256220945, | |
| "grad_norm": 0.011604691855609417, | |
| "learning_rate": 0.0018043478260869566, | |
| "loss": 0.9465, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.1837571780147662, | |
| "grad_norm": 0.004078141879290342, | |
| "learning_rate": 0.0018260869565217392, | |
| "loss": 0.9462, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.18594476346732294, | |
| "grad_norm": 0.008594767190515995, | |
| "learning_rate": 0.0018478260869565217, | |
| "loss": 0.9287, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.18813234891987968, | |
| "grad_norm": 0.008353278040885925, | |
| "learning_rate": 0.0018695652173913043, | |
| "loss": 0.8743, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.1903199343724364, | |
| "grad_norm": 0.010675789788365364, | |
| "learning_rate": 0.001891304347826087, | |
| "loss": 0.7836, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.19250751982499317, | |
| "grad_norm": 0.004695142153650522, | |
| "learning_rate": 0.0019130434782608694, | |
| "loss": 0.7587, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.1946951052775499, | |
| "grad_norm": 0.005462712608277798, | |
| "learning_rate": 0.0019347826086956522, | |
| "loss": 0.9685, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.19688269073010664, | |
| "grad_norm": 0.005652555730193853, | |
| "learning_rate": 0.001956521739130435, | |
| "loss": 0.7821, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.19907027618266337, | |
| "grad_norm": 0.0058873966336250305, | |
| "learning_rate": 0.0019782608695652175, | |
| "loss": 0.8027, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.20125786163522014, | |
| "grad_norm": 0.004672915209084749, | |
| "learning_rate": 0.002, | |
| "loss": 0.8367, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.20344544708777687, | |
| "grad_norm": 0.004535092506557703, | |
| "learning_rate": 0.0020217391304347826, | |
| "loss": 0.7124, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.2056330325403336, | |
| "grad_norm": 0.003576159942895174, | |
| "learning_rate": 0.002043478260869565, | |
| "loss": 0.8596, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.20782061799289034, | |
| "grad_norm": 0.005423200782388449, | |
| "learning_rate": 0.0020652173913043477, | |
| "loss": 0.8582, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.2100082034454471, | |
| "grad_norm": 0.00573402363806963, | |
| "learning_rate": 0.0020869565217391303, | |
| "loss": 0.6699, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.21219578889800383, | |
| "grad_norm": 0.004158989991992712, | |
| "learning_rate": 0.0021086956521739132, | |
| "loss": 0.9819, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.21438337435056057, | |
| "grad_norm": 0.009630310349166393, | |
| "learning_rate": 0.002130434782608696, | |
| "loss": 1.0568, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2165709598031173, | |
| "grad_norm": 0.006703687831759453, | |
| "learning_rate": 0.0021521739130434783, | |
| "loss": 0.7342, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.21875854525567404, | |
| "grad_norm": 0.008040892891585827, | |
| "learning_rate": 0.002173913043478261, | |
| "loss": 0.8339, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2209461307082308, | |
| "grad_norm": 0.00757247768342495, | |
| "learning_rate": 0.0021956521739130434, | |
| "loss": 0.934, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.22313371616078753, | |
| "grad_norm": 0.009981849230825901, | |
| "learning_rate": 0.002217391304347826, | |
| "loss": 0.9303, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.22532130161334427, | |
| "grad_norm": 0.007599060423672199, | |
| "learning_rate": 0.0022391304347826086, | |
| "loss": 0.7928, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.227508887065901, | |
| "grad_norm": 0.010048196651041508, | |
| "learning_rate": 0.0022608695652173915, | |
| "loss": 0.8811, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.22969647251845776, | |
| "grad_norm": 0.008185365237295628, | |
| "learning_rate": 0.002282608695652174, | |
| "loss": 0.8586, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.2318840579710145, | |
| "grad_norm": 0.02917146123945713, | |
| "learning_rate": 0.0023043478260869566, | |
| "loss": 0.8296, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.23407164342357123, | |
| "grad_norm": 0.03497055917978287, | |
| "learning_rate": 0.002326086956521739, | |
| "loss": 0.9071, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.23625922887612796, | |
| "grad_norm": 0.0127785699442029, | |
| "learning_rate": 0.0023478260869565218, | |
| "loss": 0.8497, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.23844681432868473, | |
| "grad_norm": 0.007704317104071379, | |
| "learning_rate": 0.0023695652173913043, | |
| "loss": 0.836, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.24063439978124146, | |
| "grad_norm": 0.007215225137770176, | |
| "learning_rate": 0.002391304347826087, | |
| "loss": 0.9434, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2428219852337982, | |
| "grad_norm": 0.008061757311224937, | |
| "learning_rate": 0.00241304347826087, | |
| "loss": 0.9648, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.24500957068635493, | |
| "grad_norm": 0.007565490435808897, | |
| "learning_rate": 0.0024347826086956524, | |
| "loss": 0.9409, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.2471971561389117, | |
| "grad_norm": 0.00492995698004961, | |
| "learning_rate": 0.002456521739130435, | |
| "loss": 0.8753, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.24938474159146842, | |
| "grad_norm": 0.005053890403360128, | |
| "learning_rate": 0.0024782608695652175, | |
| "loss": 0.8239, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.25157232704402516, | |
| "grad_norm": 0.009602397680282593, | |
| "learning_rate": 0.0025, | |
| "loss": 0.7825, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.2537599124965819, | |
| "grad_norm": 0.004248041659593582, | |
| "learning_rate": 0.0025217391304347826, | |
| "loss": 0.7295, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2559474979491386, | |
| "grad_norm": 0.009284190833568573, | |
| "learning_rate": 0.002543478260869565, | |
| "loss": 0.9205, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.2581350834016954, | |
| "grad_norm": 0.00780320493504405, | |
| "learning_rate": 0.002565217391304348, | |
| "loss": 0.8928, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.2603226688542521, | |
| "grad_norm": 0.014100235886871815, | |
| "learning_rate": 0.0025869565217391307, | |
| "loss": 1.0, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.26251025430680885, | |
| "grad_norm": 0.0031979018822312355, | |
| "learning_rate": 0.002608695652173913, | |
| "loss": 0.8661, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2646978397593656, | |
| "grad_norm": 0.010853100568056107, | |
| "learning_rate": 0.002630434782608696, | |
| "loss": 0.7197, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.2668854252119223, | |
| "grad_norm": 0.00902815256267786, | |
| "learning_rate": 0.0026521739130434784, | |
| "loss": 0.8987, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2690730106644791, | |
| "grad_norm": 0.006882428657263517, | |
| "learning_rate": 0.002673913043478261, | |
| "loss": 0.7676, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.27126059611703585, | |
| "grad_norm": 0.014947020448744297, | |
| "learning_rate": 0.0026956521739130435, | |
| "loss": 0.8857, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.27344818156959255, | |
| "grad_norm": 0.005454353056848049, | |
| "learning_rate": 0.002717391304347826, | |
| "loss": 0.7277, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.2756357670221493, | |
| "grad_norm": 0.0050047156400978565, | |
| "learning_rate": 0.0027391304347826086, | |
| "loss": 0.9257, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.277823352474706, | |
| "grad_norm": 0.008737878873944283, | |
| "learning_rate": 0.002760869565217391, | |
| "loss": 0.8598, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.2800109379272628, | |
| "grad_norm": 0.008086539804935455, | |
| "learning_rate": 0.002782608695652174, | |
| "loss": 0.9844, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.28219852337981954, | |
| "grad_norm": 0.01979847252368927, | |
| "learning_rate": 0.0028043478260869567, | |
| "loss": 0.9718, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.28438610883237625, | |
| "grad_norm": 0.016869032755494118, | |
| "learning_rate": 0.002826086956521739, | |
| "loss": 0.8311, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.286573694284933, | |
| "grad_norm": 0.008929664269089699, | |
| "learning_rate": 0.0028478260869565218, | |
| "loss": 0.6482, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.2887612797374898, | |
| "grad_norm": 0.013361562974750996, | |
| "learning_rate": 0.0028695652173913043, | |
| "loss": 0.991, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.2909488651900465, | |
| "grad_norm": 0.0223986953496933, | |
| "learning_rate": 0.002891304347826087, | |
| "loss": 0.8566, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.29313645064260324, | |
| "grad_norm": 0.00690645445138216, | |
| "learning_rate": 0.0029130434782608694, | |
| "loss": 0.7706, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.29532403609515995, | |
| "grad_norm": 0.007177585270255804, | |
| "learning_rate": 0.0029347826086956524, | |
| "loss": 0.7896, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.2975116215477167, | |
| "grad_norm": 0.024162376299500465, | |
| "learning_rate": 0.002956521739130435, | |
| "loss": 0.8316, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.29969920700027347, | |
| "grad_norm": 0.009236878715455532, | |
| "learning_rate": 0.0029782608695652175, | |
| "loss": 0.7563, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.3018867924528302, | |
| "grad_norm": 0.008098084479570389, | |
| "learning_rate": 0.003, | |
| "loss": 0.9313, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.30407437790538694, | |
| "grad_norm": 0.01629616692662239, | |
| "learning_rate": 0.002997566909975669, | |
| "loss": 0.7942, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.30626196335794365, | |
| "grad_norm": 0.013256334699690342, | |
| "learning_rate": 0.0029951338199513382, | |
| "loss": 0.819, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3084495488105004, | |
| "grad_norm": 0.016614550724625587, | |
| "learning_rate": 0.0029927007299270073, | |
| "loss": 0.823, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.31063713426305717, | |
| "grad_norm": 0.015185157768428326, | |
| "learning_rate": 0.0029902676399026764, | |
| "loss": 0.8534, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.3128247197156139, | |
| "grad_norm": 0.012511268258094788, | |
| "learning_rate": 0.0029878345498783455, | |
| "loss": 1.0021, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.31501230516817064, | |
| "grad_norm": 0.03368072584271431, | |
| "learning_rate": 0.0029854014598540146, | |
| "loss": 0.9333, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3171998906207274, | |
| "grad_norm": 0.014194028452038765, | |
| "learning_rate": 0.0029829683698296837, | |
| "loss": 0.7353, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.3193874760732841, | |
| "grad_norm": 0.022817425429821014, | |
| "learning_rate": 0.002980535279805353, | |
| "loss": 0.8653, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.32157506152584087, | |
| "grad_norm": 0.034395311027765274, | |
| "learning_rate": 0.002978102189781022, | |
| "loss": 0.7872, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.3237626469783976, | |
| "grad_norm": 0.04415661096572876, | |
| "learning_rate": 0.002975669099756691, | |
| "loss": 0.8668, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.32595023243095433, | |
| "grad_norm": 0.013315894640982151, | |
| "learning_rate": 0.0029732360097323605, | |
| "loss": 0.7012, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.3281378178835111, | |
| "grad_norm": 0.01931261457502842, | |
| "learning_rate": 0.002970802919708029, | |
| "loss": 0.6949, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3303254033360678, | |
| "grad_norm": 0.01766936294734478, | |
| "learning_rate": 0.0029683698296836983, | |
| "loss": 1.0637, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.33251298878862456, | |
| "grad_norm": 0.04097762331366539, | |
| "learning_rate": 0.002965936739659368, | |
| "loss": 0.6945, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.33470057424118127, | |
| "grad_norm": 0.019335204735398293, | |
| "learning_rate": 0.0029635036496350364, | |
| "loss": 0.9677, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.33688815969373803, | |
| "grad_norm": 0.02775772474706173, | |
| "learning_rate": 0.0029610705596107055, | |
| "loss": 0.9445, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3390757451462948, | |
| "grad_norm": 0.012738276273012161, | |
| "learning_rate": 0.002958637469586375, | |
| "loss": 0.7254, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.3412633305988515, | |
| "grad_norm": 0.025990145280957222, | |
| "learning_rate": 0.0029562043795620437, | |
| "loss": 0.7296, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.34345091605140826, | |
| "grad_norm": 0.08288227766752243, | |
| "learning_rate": 0.002953771289537713, | |
| "loss": 0.8595, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.345638501503965, | |
| "grad_norm": 0.05340643599629402, | |
| "learning_rate": 0.002951338199513382, | |
| "loss": 0.7425, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.34782608695652173, | |
| "grad_norm": 0.030417539179325104, | |
| "learning_rate": 0.0029489051094890514, | |
| "loss": 0.7976, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.3500136724090785, | |
| "grad_norm": 0.04232973977923393, | |
| "learning_rate": 0.00294647201946472, | |
| "loss": 0.8764, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3522012578616352, | |
| "grad_norm": 0.025519737973809242, | |
| "learning_rate": 0.002944038929440389, | |
| "loss": 0.8247, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.35438884331419196, | |
| "grad_norm": 0.046103380620479584, | |
| "learning_rate": 0.0029416058394160587, | |
| "loss": 0.8432, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3565764287667487, | |
| "grad_norm": 0.01843344047665596, | |
| "learning_rate": 0.0029391727493917274, | |
| "loss": 0.8721, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.3587640142193054, | |
| "grad_norm": 0.029839089140295982, | |
| "learning_rate": 0.0029367396593673965, | |
| "loss": 0.7955, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.3609515996718622, | |
| "grad_norm": 0.023799125105142593, | |
| "learning_rate": 0.002934306569343066, | |
| "loss": 0.8929, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.3631391851244189, | |
| "grad_norm": 0.01695132628083229, | |
| "learning_rate": 0.0029318734793187346, | |
| "loss": 0.8149, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.36532677057697566, | |
| "grad_norm": 0.01710570976138115, | |
| "learning_rate": 0.0029294403892944037, | |
| "loss": 0.9953, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.3675143560295324, | |
| "grad_norm": 0.008958813734352589, | |
| "learning_rate": 0.0029270072992700733, | |
| "loss": 0.6486, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.3697019414820891, | |
| "grad_norm": 0.02033080905675888, | |
| "learning_rate": 0.002924574209245742, | |
| "loss": 0.7759, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.3718895269346459, | |
| "grad_norm": 0.01737876608967781, | |
| "learning_rate": 0.002922141119221411, | |
| "loss": 0.7998, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.37407711238720265, | |
| "grad_norm": 0.011925026774406433, | |
| "learning_rate": 0.0029197080291970805, | |
| "loss": 0.6405, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.37626469783975935, | |
| "grad_norm": 0.010621492750942707, | |
| "learning_rate": 0.0029172749391727496, | |
| "loss": 0.735, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3784522832923161, | |
| "grad_norm": 0.02744341269135475, | |
| "learning_rate": 0.0029148418491484183, | |
| "loss": 0.9386, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.3806398687448728, | |
| "grad_norm": 0.010641987435519695, | |
| "learning_rate": 0.002912408759124088, | |
| "loss": 0.6368, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3828274541974296, | |
| "grad_norm": 0.016506191343069077, | |
| "learning_rate": 0.002909975669099757, | |
| "loss": 0.7212, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.38501503964998635, | |
| "grad_norm": 0.029457593336701393, | |
| "learning_rate": 0.0029075425790754256, | |
| "loss": 0.8386, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.38720262510254305, | |
| "grad_norm": 0.008680049329996109, | |
| "learning_rate": 0.002905109489051095, | |
| "loss": 0.8827, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.3893902105550998, | |
| "grad_norm": 0.029479682445526123, | |
| "learning_rate": 0.002902676399026764, | |
| "loss": 0.797, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.3915777960076566, | |
| "grad_norm": 0.01670117862522602, | |
| "learning_rate": 0.002900243309002433, | |
| "loss": 0.7164, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.3937653814602133, | |
| "grad_norm": 0.019070839509367943, | |
| "learning_rate": 0.0028978102189781024, | |
| "loss": 0.7425, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.39595296691277004, | |
| "grad_norm": 0.010363463312387466, | |
| "learning_rate": 0.0028953771289537715, | |
| "loss": 0.9502, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.39814055236532675, | |
| "grad_norm": 0.02518656477332115, | |
| "learning_rate": 0.0028929440389294406, | |
| "loss": 0.7689, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.4003281378178835, | |
| "grad_norm": 0.014663388952612877, | |
| "learning_rate": 0.0028905109489051097, | |
| "loss": 0.8839, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.4025157232704403, | |
| "grad_norm": 0.009784224443137646, | |
| "learning_rate": 0.0028880778588807787, | |
| "loss": 0.82, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.404703308722997, | |
| "grad_norm": 0.02763255313038826, | |
| "learning_rate": 0.002885644768856448, | |
| "loss": 0.8051, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.40689089417555374, | |
| "grad_norm": 0.023367729038000107, | |
| "learning_rate": 0.002883211678832117, | |
| "loss": 0.7191, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.40907847962811045, | |
| "grad_norm": 0.025467796251177788, | |
| "learning_rate": 0.002880778588807786, | |
| "loss": 0.7385, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.4112660650806672, | |
| "grad_norm": 0.03302817419171333, | |
| "learning_rate": 0.002878345498783455, | |
| "loss": 0.6099, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.41345365053322397, | |
| "grad_norm": 0.016808858141303062, | |
| "learning_rate": 0.002875912408759124, | |
| "loss": 0.687, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.4156412359857807, | |
| "grad_norm": 0.030584512278437614, | |
| "learning_rate": 0.0028734793187347933, | |
| "loss": 0.7922, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.41782882143833744, | |
| "grad_norm": 0.05187975615262985, | |
| "learning_rate": 0.0028710462287104624, | |
| "loss": 0.8282, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.4200164068908942, | |
| "grad_norm": 0.03264329209923744, | |
| "learning_rate": 0.0028686131386861315, | |
| "loss": 0.542, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.4222039923434509, | |
| "grad_norm": 0.08889129012823105, | |
| "learning_rate": 0.0028661800486618006, | |
| "loss": 0.7642, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.42439157779600767, | |
| "grad_norm": 0.017528299242258072, | |
| "learning_rate": 0.0028637469586374697, | |
| "loss": 1.0019, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.4265791632485644, | |
| "grad_norm": 0.042831018567085266, | |
| "learning_rate": 0.0028613138686131388, | |
| "loss": 0.7849, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.42876674870112114, | |
| "grad_norm": 0.06844168901443481, | |
| "learning_rate": 0.002858880778588808, | |
| "loss": 0.8431, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.4309543341536779, | |
| "grad_norm": 0.056285906583070755, | |
| "learning_rate": 0.002856447688564477, | |
| "loss": 0.766, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.4331419196062346, | |
| "grad_norm": 0.03165756165981293, | |
| "learning_rate": 0.002854014598540146, | |
| "loss": 0.526, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.43532950505879137, | |
| "grad_norm": 0.01906641758978367, | |
| "learning_rate": 0.002851581508515815, | |
| "loss": 0.634, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.4375170905113481, | |
| "grad_norm": 0.03528127446770668, | |
| "learning_rate": 0.0028491484184914842, | |
| "loss": 0.7152, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.43970467596390483, | |
| "grad_norm": 0.03441726043820381, | |
| "learning_rate": 0.0028467153284671533, | |
| "loss": 0.807, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.4418922614164616, | |
| "grad_norm": 0.07585262507200241, | |
| "learning_rate": 0.0028442822384428224, | |
| "loss": 0.8068, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.4440798468690183, | |
| "grad_norm": 0.04637427628040314, | |
| "learning_rate": 0.0028418491484184915, | |
| "loss": 0.6726, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.44626743232157506, | |
| "grad_norm": 0.014708532020449638, | |
| "learning_rate": 0.0028394160583941606, | |
| "loss": 0.7633, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.4484550177741318, | |
| "grad_norm": 0.06609700620174408, | |
| "learning_rate": 0.0028369829683698297, | |
| "loss": 0.9395, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.45064260322668853, | |
| "grad_norm": 0.014884551987051964, | |
| "learning_rate": 0.0028345498783454988, | |
| "loss": 0.7629, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4528301886792453, | |
| "grad_norm": 0.02310200408101082, | |
| "learning_rate": 0.002832116788321168, | |
| "loss": 0.6696, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.455017774131802, | |
| "grad_norm": 0.020516803488135338, | |
| "learning_rate": 0.002829683698296837, | |
| "loss": 0.6966, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.45720535958435876, | |
| "grad_norm": 0.018198775127530098, | |
| "learning_rate": 0.002827250608272506, | |
| "loss": 0.936, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.4593929450369155, | |
| "grad_norm": 0.032083529978990555, | |
| "learning_rate": 0.002824817518248175, | |
| "loss": 0.853, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.46158053048947223, | |
| "grad_norm": 0.01605304516851902, | |
| "learning_rate": 0.0028223844282238442, | |
| "loss": 0.8602, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.463768115942029, | |
| "grad_norm": 0.024932844564318657, | |
| "learning_rate": 0.0028199513381995133, | |
| "loss": 0.9888, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.46595570139458575, | |
| "grad_norm": 0.04917526990175247, | |
| "learning_rate": 0.0028175182481751824, | |
| "loss": 0.7155, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.46814328684714246, | |
| "grad_norm": 0.017666855826973915, | |
| "learning_rate": 0.002815085158150852, | |
| "loss": 0.7597, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.4703308722996992, | |
| "grad_norm": 0.06158105283975601, | |
| "learning_rate": 0.0028126520681265206, | |
| "loss": 0.808, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.4725184577522559, | |
| "grad_norm": 0.028100378811359406, | |
| "learning_rate": 0.0028102189781021897, | |
| "loss": 0.8217, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.4747060432048127, | |
| "grad_norm": 0.02049509435892105, | |
| "learning_rate": 0.0028077858880778592, | |
| "loss": 0.8441, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.47689362865736945, | |
| "grad_norm": 0.018524937331676483, | |
| "learning_rate": 0.002805352798053528, | |
| "loss": 0.7565, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.47908121410992616, | |
| "grad_norm": 0.017941996455192566, | |
| "learning_rate": 0.002802919708029197, | |
| "loss": 0.7098, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.4812687995624829, | |
| "grad_norm": 0.042154472321271896, | |
| "learning_rate": 0.0028004866180048665, | |
| "loss": 0.8742, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4834563850150396, | |
| "grad_norm": 0.026872573420405388, | |
| "learning_rate": 0.002798053527980535, | |
| "loss": 0.7273, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.4856439704675964, | |
| "grad_norm": 0.02051514759659767, | |
| "learning_rate": 0.0027956204379562043, | |
| "loss": 0.795, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.48783155592015315, | |
| "grad_norm": 0.02145540714263916, | |
| "learning_rate": 0.0027931873479318738, | |
| "loss": 0.8192, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.49001914137270985, | |
| "grad_norm": 0.04769520461559296, | |
| "learning_rate": 0.002790754257907543, | |
| "loss": 0.8592, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.4922067268252666, | |
| "grad_norm": 0.01415792852640152, | |
| "learning_rate": 0.0027883211678832115, | |
| "loss": 0.7553, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.4943943122778234, | |
| "grad_norm": 0.012172535061836243, | |
| "learning_rate": 0.002785888077858881, | |
| "loss": 0.739, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.4965818977303801, | |
| "grad_norm": 0.055700596421957016, | |
| "learning_rate": 0.00278345498783455, | |
| "loss": 0.9405, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.49876948318293685, | |
| "grad_norm": 0.025790488347411156, | |
| "learning_rate": 0.002781021897810219, | |
| "loss": 0.6641, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5009570686354936, | |
| "grad_norm": 0.013937574811279774, | |
| "learning_rate": 0.0027785888077858883, | |
| "loss": 0.7727, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.5031446540880503, | |
| "grad_norm": 0.03238683566451073, | |
| "learning_rate": 0.0027761557177615574, | |
| "loss": 0.8109, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.505332239540607, | |
| "grad_norm": 0.06841892749071121, | |
| "learning_rate": 0.002773722627737226, | |
| "loss": 0.7827, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.5075198249931638, | |
| "grad_norm": 0.05782823637127876, | |
| "learning_rate": 0.002771289537712895, | |
| "loss": 0.9616, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.5097074104457205, | |
| "grad_norm": 0.1389644742012024, | |
| "learning_rate": 0.0027688564476885647, | |
| "loss": 0.7447, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.5118949958982772, | |
| "grad_norm": 0.07213829457759857, | |
| "learning_rate": 0.002766423357664234, | |
| "loss": 0.8738, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.5140825813508341, | |
| "grad_norm": 0.03161882609128952, | |
| "learning_rate": 0.0027639902676399025, | |
| "loss": 0.5307, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.5162701668033908, | |
| "grad_norm": 0.03051130659878254, | |
| "learning_rate": 0.002761557177615572, | |
| "loss": 0.7123, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5184577522559475, | |
| "grad_norm": 0.02562803030014038, | |
| "learning_rate": 0.002759124087591241, | |
| "loss": 0.8167, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.5206453377085042, | |
| "grad_norm": 0.03016614355146885, | |
| "learning_rate": 0.0027566909975669097, | |
| "loss": 0.6904, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.522832923161061, | |
| "grad_norm": 0.01147315464913845, | |
| "learning_rate": 0.0027542579075425793, | |
| "loss": 0.7007, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.5250205086136177, | |
| "grad_norm": 0.017779918387532234, | |
| "learning_rate": 0.0027518248175182483, | |
| "loss": 0.9054, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5272080940661744, | |
| "grad_norm": 0.03238027170300484, | |
| "learning_rate": 0.002749391727493917, | |
| "loss": 0.7599, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.5293956795187312, | |
| "grad_norm": 0.007716326508671045, | |
| "learning_rate": 0.0027469586374695865, | |
| "loss": 0.7561, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.5315832649712879, | |
| "grad_norm": 0.028708985075354576, | |
| "learning_rate": 0.0027445255474452556, | |
| "loss": 0.6842, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.5337708504238446, | |
| "grad_norm": 0.021554840728640556, | |
| "learning_rate": 0.0027420924574209247, | |
| "loss": 0.9046, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5359584358764015, | |
| "grad_norm": 0.010056296363472939, | |
| "learning_rate": 0.002739659367396594, | |
| "loss": 0.7747, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.5381460213289582, | |
| "grad_norm": 0.014583374373614788, | |
| "learning_rate": 0.002737226277372263, | |
| "loss": 0.8104, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.5403336067815149, | |
| "grad_norm": 0.10760743170976639, | |
| "learning_rate": 0.002734793187347932, | |
| "loss": 1.0181, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.5425211922340717, | |
| "grad_norm": 0.030982421711087227, | |
| "learning_rate": 0.002732360097323601, | |
| "loss": 0.7125, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5447087776866284, | |
| "grad_norm": 0.017710238695144653, | |
| "learning_rate": 0.00272992700729927, | |
| "loss": 0.9256, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.5468963631391851, | |
| "grad_norm": 0.027831239625811577, | |
| "learning_rate": 0.0027274939172749393, | |
| "loss": 0.7537, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5490839485917418, | |
| "grad_norm": 0.019798962399363518, | |
| "learning_rate": 0.0027250608272506084, | |
| "loss": 0.6165, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.5512715340442986, | |
| "grad_norm": 0.00836907234042883, | |
| "learning_rate": 0.0027226277372262775, | |
| "loss": 0.7968, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5534591194968553, | |
| "grad_norm": 0.018117599189281464, | |
| "learning_rate": 0.0027201946472019465, | |
| "loss": 0.6087, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.555646704949412, | |
| "grad_norm": 0.017056763172149658, | |
| "learning_rate": 0.0027177615571776156, | |
| "loss": 0.7837, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.5578342904019689, | |
| "grad_norm": 0.009035620838403702, | |
| "learning_rate": 0.0027153284671532847, | |
| "loss": 0.6376, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.5600218758545256, | |
| "grad_norm": 0.015250611118972301, | |
| "learning_rate": 0.002712895377128954, | |
| "loss": 0.7869, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5622094613070823, | |
| "grad_norm": 0.014554915949702263, | |
| "learning_rate": 0.002710462287104623, | |
| "loss": 0.9046, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.5643970467596391, | |
| "grad_norm": 0.011779931373894215, | |
| "learning_rate": 0.002708029197080292, | |
| "loss": 0.8662, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5665846322121958, | |
| "grad_norm": 0.012663912028074265, | |
| "learning_rate": 0.002705596107055961, | |
| "loss": 1.3081, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.5687722176647525, | |
| "grad_norm": 0.0059722489677369595, | |
| "learning_rate": 0.00270316301703163, | |
| "loss": 0.6796, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5709598031173093, | |
| "grad_norm": 0.03664208948612213, | |
| "learning_rate": 0.0027007299270072993, | |
| "loss": 0.9093, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.573147388569866, | |
| "grad_norm": 0.042986199259757996, | |
| "learning_rate": 0.0026982968369829684, | |
| "loss": 0.9444, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5753349740224227, | |
| "grad_norm": 0.012048511765897274, | |
| "learning_rate": 0.0026958637469586375, | |
| "loss": 0.8134, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.5775225594749795, | |
| "grad_norm": 0.012062503024935722, | |
| "learning_rate": 0.0026934306569343066, | |
| "loss": 0.7274, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5797101449275363, | |
| "grad_norm": 0.02607789821922779, | |
| "learning_rate": 0.0026909975669099757, | |
| "loss": 0.6531, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.581897730380093, | |
| "grad_norm": 0.014329343102872372, | |
| "learning_rate": 0.002688564476885645, | |
| "loss": 0.6966, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5840853158326497, | |
| "grad_norm": 0.013629244640469551, | |
| "learning_rate": 0.002686131386861314, | |
| "loss": 0.7831, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.5862729012852065, | |
| "grad_norm": 0.009315542876720428, | |
| "learning_rate": 0.002683698296836983, | |
| "loss": 0.6297, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.5884604867377632, | |
| "grad_norm": 0.051916949450969696, | |
| "learning_rate": 0.002681265206812652, | |
| "loss": 0.7651, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.5906480721903199, | |
| "grad_norm": 0.012272450141608715, | |
| "learning_rate": 0.002678832116788321, | |
| "loss": 0.6713, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5928356576428767, | |
| "grad_norm": 0.011517216451466084, | |
| "learning_rate": 0.00267639902676399, | |
| "loss": 0.6117, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.5950232430954334, | |
| "grad_norm": 0.010973330587148666, | |
| "learning_rate": 0.0026739659367396593, | |
| "loss": 0.7631, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.5972108285479901, | |
| "grad_norm": 0.06580788642168045, | |
| "learning_rate": 0.0026715328467153284, | |
| "loss": 0.9153, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.5993984140005469, | |
| "grad_norm": 0.011350773274898529, | |
| "learning_rate": 0.0026690997566909975, | |
| "loss": 0.8094, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.6015859994531036, | |
| "grad_norm": 0.019090717658400536, | |
| "learning_rate": 0.0026666666666666666, | |
| "loss": 0.9304, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.6037735849056604, | |
| "grad_norm": 0.015177314169704914, | |
| "learning_rate": 0.002664233576642336, | |
| "loss": 0.6859, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.6059611703582172, | |
| "grad_norm": 0.020254317671060562, | |
| "learning_rate": 0.0026618004866180048, | |
| "loss": 0.8386, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.6081487558107739, | |
| "grad_norm": 0.014171348884701729, | |
| "learning_rate": 0.002659367396593674, | |
| "loss": 0.8112, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.6103363412633306, | |
| "grad_norm": 0.00894536729902029, | |
| "learning_rate": 0.0026569343065693434, | |
| "loss": 0.6877, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.6125239267158873, | |
| "grad_norm": 0.011850811541080475, | |
| "learning_rate": 0.002654501216545012, | |
| "loss": 0.8639, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6147115121684441, | |
| "grad_norm": 0.012202342972159386, | |
| "learning_rate": 0.002652068126520681, | |
| "loss": 0.7851, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.6168990976210008, | |
| "grad_norm": 0.014019378460943699, | |
| "learning_rate": 0.0026496350364963507, | |
| "loss": 0.945, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.6190866830735575, | |
| "grad_norm": 0.013264323584735394, | |
| "learning_rate": 0.0026472019464720193, | |
| "loss": 0.6363, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.6212742685261143, | |
| "grad_norm": 0.010803530924022198, | |
| "learning_rate": 0.0026447688564476884, | |
| "loss": 0.7855, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.623461853978671, | |
| "grad_norm": 0.015852496027946472, | |
| "learning_rate": 0.002642335766423358, | |
| "loss": 0.6334, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.6256494394312277, | |
| "grad_norm": 0.023904947564005852, | |
| "learning_rate": 0.002639902676399027, | |
| "loss": 0.5551, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6278370248837846, | |
| "grad_norm": 0.00868566520512104, | |
| "learning_rate": 0.0026374695863746957, | |
| "loss": 0.9256, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.6300246103363413, | |
| "grad_norm": 0.011297028511762619, | |
| "learning_rate": 0.002635036496350365, | |
| "loss": 0.7896, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.632212195788898, | |
| "grad_norm": 0.01018528826534748, | |
| "learning_rate": 0.0026326034063260343, | |
| "loss": 0.8198, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.6343997812414548, | |
| "grad_norm": 0.015003956854343414, | |
| "learning_rate": 0.002630170316301703, | |
| "loss": 0.7424, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6365873666940115, | |
| "grad_norm": 0.007440235000103712, | |
| "learning_rate": 0.0026277372262773725, | |
| "loss": 0.6904, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.6387749521465682, | |
| "grad_norm": 0.014310602098703384, | |
| "learning_rate": 0.0026253041362530416, | |
| "loss": 0.7179, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.6409625375991249, | |
| "grad_norm": 0.008294426836073399, | |
| "learning_rate": 0.0026228710462287102, | |
| "loss": 0.827, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.6431501230516817, | |
| "grad_norm": 0.006840107962489128, | |
| "learning_rate": 0.0026204379562043798, | |
| "loss": 0.6749, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.6453377085042384, | |
| "grad_norm": 0.008538591675460339, | |
| "learning_rate": 0.002618004866180049, | |
| "loss": 0.7467, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.6475252939567951, | |
| "grad_norm": 0.007157974410802126, | |
| "learning_rate": 0.0026155717761557175, | |
| "loss": 0.7233, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.649712879409352, | |
| "grad_norm": 0.030327659100294113, | |
| "learning_rate": 0.002613138686131387, | |
| "loss": 0.6642, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.6519004648619087, | |
| "grad_norm": 0.012880248948931694, | |
| "learning_rate": 0.002610705596107056, | |
| "loss": 0.9694, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6540880503144654, | |
| "grad_norm": 0.014233557507395744, | |
| "learning_rate": 0.0026082725060827252, | |
| "loss": 0.7686, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.6562756357670222, | |
| "grad_norm": 0.008432603441178799, | |
| "learning_rate": 0.0026058394160583943, | |
| "loss": 0.9355, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6584632212195789, | |
| "grad_norm": 0.009492720477283001, | |
| "learning_rate": 0.0026034063260340634, | |
| "loss": 0.7637, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.6606508066721356, | |
| "grad_norm": 0.008224152028560638, | |
| "learning_rate": 0.0026009732360097325, | |
| "loss": 0.7609, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6628383921246924, | |
| "grad_norm": 0.011647099629044533, | |
| "learning_rate": 0.0025985401459854016, | |
| "loss": 0.6565, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.6650259775772491, | |
| "grad_norm": 0.0120640117675066, | |
| "learning_rate": 0.0025961070559610707, | |
| "loss": 0.6751, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6672135630298058, | |
| "grad_norm": 0.014007077552378178, | |
| "learning_rate": 0.0025936739659367398, | |
| "loss": 0.8132, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.6694011484823625, | |
| "grad_norm": 0.014167044311761856, | |
| "learning_rate": 0.002591240875912409, | |
| "loss": 0.8102, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6715887339349194, | |
| "grad_norm": 0.016142327338457108, | |
| "learning_rate": 0.002588807785888078, | |
| "loss": 0.8004, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.6737763193874761, | |
| "grad_norm": 0.007279639132320881, | |
| "learning_rate": 0.002586374695863747, | |
| "loss": 0.732, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6759639048400328, | |
| "grad_norm": 0.011619196273386478, | |
| "learning_rate": 0.002583941605839416, | |
| "loss": 0.603, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.6781514902925896, | |
| "grad_norm": 0.011564897373318672, | |
| "learning_rate": 0.0025815085158150852, | |
| "loss": 0.9163, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6803390757451463, | |
| "grad_norm": 0.010117938742041588, | |
| "learning_rate": 0.0025790754257907543, | |
| "loss": 0.8683, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.682526661197703, | |
| "grad_norm": 0.017769185826182365, | |
| "learning_rate": 0.0025766423357664234, | |
| "loss": 0.6244, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6847142466502598, | |
| "grad_norm": 0.012199788354337215, | |
| "learning_rate": 0.0025742092457420925, | |
| "loss": 0.7076, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.6869018321028165, | |
| "grad_norm": 0.008083075284957886, | |
| "learning_rate": 0.0025717761557177616, | |
| "loss": 0.8658, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.6890894175553732, | |
| "grad_norm": 0.01086794026196003, | |
| "learning_rate": 0.0025693430656934307, | |
| "loss": 0.6941, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.69127700300793, | |
| "grad_norm": 0.010161925107240677, | |
| "learning_rate": 0.0025669099756691, | |
| "loss": 0.6715, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.6934645884604868, | |
| "grad_norm": 0.008891239762306213, | |
| "learning_rate": 0.002564476885644769, | |
| "loss": 0.8093, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.6956521739130435, | |
| "grad_norm": 0.018787039443850517, | |
| "learning_rate": 0.002562043795620438, | |
| "loss": 0.8482, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.6978397593656002, | |
| "grad_norm": 0.02541973814368248, | |
| "learning_rate": 0.002559610705596107, | |
| "loss": 0.76, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.700027344818157, | |
| "grad_norm": 0.011948470957577229, | |
| "learning_rate": 0.002557177615571776, | |
| "loss": 0.7625, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.7022149302707137, | |
| "grad_norm": 0.009559310041368008, | |
| "learning_rate": 0.0025547445255474453, | |
| "loss": 0.7354, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.7044025157232704, | |
| "grad_norm": 0.008267502300441265, | |
| "learning_rate": 0.0025523114355231144, | |
| "loss": 0.7065, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.7065901011758272, | |
| "grad_norm": 0.010692731477320194, | |
| "learning_rate": 0.0025498783454987834, | |
| "loss": 0.983, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.7087776866283839, | |
| "grad_norm": 0.0124723045155406, | |
| "learning_rate": 0.0025474452554744525, | |
| "loss": 0.6154, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.7109652720809406, | |
| "grad_norm": 0.015448692254722118, | |
| "learning_rate": 0.0025450121654501216, | |
| "loss": 0.6129, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.7131528575334974, | |
| "grad_norm": 0.013601388782262802, | |
| "learning_rate": 0.0025425790754257907, | |
| "loss": 0.7214, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.7153404429860541, | |
| "grad_norm": 0.012070258148014545, | |
| "learning_rate": 0.00254014598540146, | |
| "loss": 0.7077, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.7175280284386109, | |
| "grad_norm": 0.05267300084233284, | |
| "learning_rate": 0.0025377128953771293, | |
| "loss": 0.7714, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.7197156138911677, | |
| "grad_norm": 0.012087949551641941, | |
| "learning_rate": 0.002535279805352798, | |
| "loss": 0.9047, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.7219031993437244, | |
| "grad_norm": 0.01940520666539669, | |
| "learning_rate": 0.002532846715328467, | |
| "loss": 0.7804, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7240907847962811, | |
| "grad_norm": 0.011884646490216255, | |
| "learning_rate": 0.0025304136253041366, | |
| "loss": 0.6859, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.7262783702488378, | |
| "grad_norm": 0.02514353021979332, | |
| "learning_rate": 0.0025279805352798053, | |
| "loss": 0.7764, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7284659557013946, | |
| "grad_norm": 0.015074629336595535, | |
| "learning_rate": 0.0025255474452554744, | |
| "loss": 0.6756, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.7306535411539513, | |
| "grad_norm": 0.036420077085494995, | |
| "learning_rate": 0.002523114355231144, | |
| "loss": 0.7407, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.732841126606508, | |
| "grad_norm": 0.015621097758412361, | |
| "learning_rate": 0.0025206812652068126, | |
| "loss": 0.8072, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.7350287120590648, | |
| "grad_norm": 0.010994632728397846, | |
| "learning_rate": 0.0025182481751824816, | |
| "loss": 0.9436, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.7372162975116215, | |
| "grad_norm": 0.017064619809389114, | |
| "learning_rate": 0.002515815085158151, | |
| "loss": 0.9386, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.7394038829641782, | |
| "grad_norm": 0.023198846727609634, | |
| "learning_rate": 0.00251338199513382, | |
| "loss": 0.7892, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.7415914684167351, | |
| "grad_norm": 0.005636582616716623, | |
| "learning_rate": 0.002510948905109489, | |
| "loss": 0.8005, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.7437790538692918, | |
| "grad_norm": 0.008022590540349483, | |
| "learning_rate": 0.0025085158150851584, | |
| "loss": 0.9142, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7459666393218485, | |
| "grad_norm": 0.013106726109981537, | |
| "learning_rate": 0.0025060827250608275, | |
| "loss": 0.6845, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.7481542247744053, | |
| "grad_norm": 0.015878600999712944, | |
| "learning_rate": 0.002503649635036496, | |
| "loss": 0.8528, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.750341810226962, | |
| "grad_norm": 0.013783195056021214, | |
| "learning_rate": 0.0025012165450121657, | |
| "loss": 0.8487, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.7525293956795187, | |
| "grad_norm": 0.05050954222679138, | |
| "learning_rate": 0.002498783454987835, | |
| "loss": 0.9014, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.7547169811320755, | |
| "grad_norm": 0.009747706353664398, | |
| "learning_rate": 0.0024963503649635035, | |
| "loss": 0.8331, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.7569045665846322, | |
| "grad_norm": 0.27641791105270386, | |
| "learning_rate": 0.0024939172749391726, | |
| "loss": 0.8328, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7590921520371889, | |
| "grad_norm": 0.022615063935518265, | |
| "learning_rate": 0.002491484184914842, | |
| "loss": 1.025, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.7612797374897456, | |
| "grad_norm": 0.018037477508187294, | |
| "learning_rate": 0.0024890510948905108, | |
| "loss": 0.8058, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7634673229423025, | |
| "grad_norm": 0.03229966387152672, | |
| "learning_rate": 0.00248661800486618, | |
| "loss": 0.8224, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.7656549083948592, | |
| "grad_norm": 0.03468572720885277, | |
| "learning_rate": 0.0024841849148418494, | |
| "loss": 0.6558, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7678424938474159, | |
| "grad_norm": 0.04352645203471184, | |
| "learning_rate": 0.0024817518248175185, | |
| "loss": 0.7869, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.7700300792999727, | |
| "grad_norm": 0.0520501509308815, | |
| "learning_rate": 0.002479318734793187, | |
| "loss": 0.8318, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7722176647525294, | |
| "grad_norm": 0.025180073454976082, | |
| "learning_rate": 0.0024768856447688566, | |
| "loss": 0.8454, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.7744052502050861, | |
| "grad_norm": 0.013843162916600704, | |
| "learning_rate": 0.0024744525547445257, | |
| "loss": 0.979, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7765928356576429, | |
| "grad_norm": 0.026960408315062523, | |
| "learning_rate": 0.0024720194647201944, | |
| "loss": 0.7692, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.7787804211101996, | |
| "grad_norm": 0.02509387582540512, | |
| "learning_rate": 0.002469586374695864, | |
| "loss": 0.7471, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7809680065627563, | |
| "grad_norm": 0.014011479914188385, | |
| "learning_rate": 0.002467153284671533, | |
| "loss": 0.7752, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.7831555920153132, | |
| "grad_norm": 0.01862008310854435, | |
| "learning_rate": 0.0024647201946472017, | |
| "loss": 0.9891, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.7853431774678699, | |
| "grad_norm": 0.01249686349183321, | |
| "learning_rate": 0.002462287104622871, | |
| "loss": 0.9046, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.7875307629204266, | |
| "grad_norm": 0.018710242584347725, | |
| "learning_rate": 0.0024598540145985403, | |
| "loss": 0.7926, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7897183483729833, | |
| "grad_norm": 0.015550883486866951, | |
| "learning_rate": 0.0024574209245742094, | |
| "loss": 0.9209, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.7919059338255401, | |
| "grad_norm": 0.011178571730852127, | |
| "learning_rate": 0.0024549878345498785, | |
| "loss": 0.7962, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.7940935192780968, | |
| "grad_norm": 0.017678866162896156, | |
| "learning_rate": 0.0024525547445255476, | |
| "loss": 0.9532, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.7962811047306535, | |
| "grad_norm": 0.021445617079734802, | |
| "learning_rate": 0.0024501216545012167, | |
| "loss": 0.8302, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.7984686901832103, | |
| "grad_norm": 0.015537573955953121, | |
| "learning_rate": 0.0024476885644768858, | |
| "loss": 0.7665, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.800656275635767, | |
| "grad_norm": 0.015302474610507488, | |
| "learning_rate": 0.002445255474452555, | |
| "loss": 0.7161, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.8028438610883237, | |
| "grad_norm": 0.013649791479110718, | |
| "learning_rate": 0.002442822384428224, | |
| "loss": 0.6766, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.8050314465408805, | |
| "grad_norm": 0.01138269528746605, | |
| "learning_rate": 0.002440389294403893, | |
| "loss": 0.7797, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.8072190319934373, | |
| "grad_norm": 0.014025691896677017, | |
| "learning_rate": 0.002437956204379562, | |
| "loss": 0.779, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.809406617445994, | |
| "grad_norm": 0.011000445112586021, | |
| "learning_rate": 0.002435523114355231, | |
| "loss": 0.8064, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8115942028985508, | |
| "grad_norm": 0.010309292934834957, | |
| "learning_rate": 0.0024330900243309003, | |
| "loss": 0.7252, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.8137817883511075, | |
| "grad_norm": 0.007664249278604984, | |
| "learning_rate": 0.0024306569343065694, | |
| "loss": 0.7081, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.8159693738036642, | |
| "grad_norm": 0.015154222957789898, | |
| "learning_rate": 0.0024282238442822385, | |
| "loss": 0.7869, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.8181569592562209, | |
| "grad_norm": 0.01371028833091259, | |
| "learning_rate": 0.0024257907542579076, | |
| "loss": 0.7423, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.8203445447087777, | |
| "grad_norm": 0.012794865295290947, | |
| "learning_rate": 0.0024233576642335767, | |
| "loss": 0.9341, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.8225321301613344, | |
| "grad_norm": 0.011340939439833164, | |
| "learning_rate": 0.0024209245742092458, | |
| "loss": 1.0406, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.8247197156138911, | |
| "grad_norm": 0.013491635210812092, | |
| "learning_rate": 0.002418491484184915, | |
| "loss": 0.763, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.8269073010664479, | |
| "grad_norm": 0.008016029372811317, | |
| "learning_rate": 0.002416058394160584, | |
| "loss": 0.7132, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8290948865190046, | |
| "grad_norm": 0.011460046283900738, | |
| "learning_rate": 0.002413625304136253, | |
| "loss": 0.6306, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.8312824719715614, | |
| "grad_norm": 0.0110190873965621, | |
| "learning_rate": 0.002411192214111922, | |
| "loss": 0.6944, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8334700574241182, | |
| "grad_norm": 0.008347691036760807, | |
| "learning_rate": 0.0024087591240875912, | |
| "loss": 0.8926, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.8356576428766749, | |
| "grad_norm": 0.007940311916172504, | |
| "learning_rate": 0.0024063260340632603, | |
| "loss": 0.8666, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.8378452283292316, | |
| "grad_norm": 0.011534546501934528, | |
| "learning_rate": 0.0024038929440389294, | |
| "loss": 0.9077, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.8400328137817884, | |
| "grad_norm": 0.010218126699328423, | |
| "learning_rate": 0.0024014598540145985, | |
| "loss": 0.8393, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8422203992343451, | |
| "grad_norm": 0.01117737777531147, | |
| "learning_rate": 0.0023990267639902676, | |
| "loss": 0.8401, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.8444079846869018, | |
| "grad_norm": 0.01495604682713747, | |
| "learning_rate": 0.0023965936739659367, | |
| "loss": 0.6524, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.8465955701394585, | |
| "grad_norm": 0.01132154744118452, | |
| "learning_rate": 0.002394160583941606, | |
| "loss": 0.6973, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.8487831555920153, | |
| "grad_norm": 0.016704557463526726, | |
| "learning_rate": 0.002391727493917275, | |
| "loss": 0.8638, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.850970741044572, | |
| "grad_norm": 0.03163198381662369, | |
| "learning_rate": 0.002389294403892944, | |
| "loss": 0.6569, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.8531583264971287, | |
| "grad_norm": 0.009892611764371395, | |
| "learning_rate": 0.002386861313868613, | |
| "loss": 0.8507, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8553459119496856, | |
| "grad_norm": 0.009704566560685635, | |
| "learning_rate": 0.002384428223844282, | |
| "loss": 0.7567, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.8575334974022423, | |
| "grad_norm": 0.011233623139560223, | |
| "learning_rate": 0.0023819951338199512, | |
| "loss": 0.9072, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.859721082854799, | |
| "grad_norm": 0.017818894237279892, | |
| "learning_rate": 0.0023795620437956208, | |
| "loss": 0.6716, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.8619086683073558, | |
| "grad_norm": 0.009800358675420284, | |
| "learning_rate": 0.0023771289537712894, | |
| "loss": 0.6331, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8640962537599125, | |
| "grad_norm": 0.00855625793337822, | |
| "learning_rate": 0.0023746958637469585, | |
| "loss": 0.8208, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.8662838392124692, | |
| "grad_norm": 0.007912772707641125, | |
| "learning_rate": 0.002372262773722628, | |
| "loss": 0.6897, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.868471424665026, | |
| "grad_norm": 0.015991948544979095, | |
| "learning_rate": 0.0023698296836982967, | |
| "loss": 0.5838, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.8706590101175827, | |
| "grad_norm": 0.013330014422535896, | |
| "learning_rate": 0.002367396593673966, | |
| "loss": 0.7765, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.8728465955701394, | |
| "grad_norm": 0.0108262337744236, | |
| "learning_rate": 0.0023649635036496353, | |
| "loss": 0.8259, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.8750341810226961, | |
| "grad_norm": 0.01277016382664442, | |
| "learning_rate": 0.002362530413625304, | |
| "loss": 0.5084, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.877221766475253, | |
| "grad_norm": 0.00825558416545391, | |
| "learning_rate": 0.002360097323600973, | |
| "loss": 0.8388, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.8794093519278097, | |
| "grad_norm": 0.008703862316906452, | |
| "learning_rate": 0.0023576642335766426, | |
| "loss": 0.889, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.8815969373803664, | |
| "grad_norm": 0.009978721849620342, | |
| "learning_rate": 0.0023552311435523117, | |
| "loss": 0.7724, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.8837845228329232, | |
| "grad_norm": 0.009193633683025837, | |
| "learning_rate": 0.0023527980535279804, | |
| "loss": 0.9257, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.8859721082854799, | |
| "grad_norm": 0.009905806742608547, | |
| "learning_rate": 0.00235036496350365, | |
| "loss": 0.9046, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.8881596937380366, | |
| "grad_norm": 0.0108295027166605, | |
| "learning_rate": 0.002347931873479319, | |
| "loss": 0.6427, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.8903472791905934, | |
| "grad_norm": 0.010898306965827942, | |
| "learning_rate": 0.0023454987834549876, | |
| "loss": 0.6888, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.8925348646431501, | |
| "grad_norm": 0.013794617727398872, | |
| "learning_rate": 0.002343065693430657, | |
| "loss": 0.8544, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.8947224500957068, | |
| "grad_norm": 0.014423336833715439, | |
| "learning_rate": 0.0023406326034063262, | |
| "loss": 0.7525, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.8969100355482637, | |
| "grad_norm": 0.010249799117445946, | |
| "learning_rate": 0.002338199513381995, | |
| "loss": 0.7588, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8990976210008204, | |
| "grad_norm": 0.014359788969159126, | |
| "learning_rate": 0.0023357664233576644, | |
| "loss": 0.8303, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.9012852064533771, | |
| "grad_norm": 0.007848945446312428, | |
| "learning_rate": 0.0023333333333333335, | |
| "loss": 0.7478, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.9034727919059339, | |
| "grad_norm": 0.010217231698334217, | |
| "learning_rate": 0.0023309002433090026, | |
| "loss": 0.8758, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.9056603773584906, | |
| "grad_norm": 0.008166585117578506, | |
| "learning_rate": 0.0023284671532846717, | |
| "loss": 0.8669, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.9078479628110473, | |
| "grad_norm": 0.08122234046459198, | |
| "learning_rate": 0.002326034063260341, | |
| "loss": 0.8672, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.910035548263604, | |
| "grad_norm": 0.026630746200680733, | |
| "learning_rate": 0.00232360097323601, | |
| "loss": 0.8429, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.9122231337161608, | |
| "grad_norm": 0.011199391447007656, | |
| "learning_rate": 0.002321167883211679, | |
| "loss": 0.7394, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.9144107191687175, | |
| "grad_norm": 0.034359946846961975, | |
| "learning_rate": 0.002318734793187348, | |
| "loss": 0.757, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.9165983046212742, | |
| "grad_norm": 0.007310883607715368, | |
| "learning_rate": 0.002316301703163017, | |
| "loss": 0.8614, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.918785890073831, | |
| "grad_norm": 0.017180046066641808, | |
| "learning_rate": 0.002313868613138686, | |
| "loss": 0.7018, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9209734755263878, | |
| "grad_norm": 0.010772480629384518, | |
| "learning_rate": 0.0023114355231143554, | |
| "loss": 1.0247, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.9231610609789445, | |
| "grad_norm": 0.013757293112576008, | |
| "learning_rate": 0.0023090024330900244, | |
| "loss": 0.7243, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.9253486464315013, | |
| "grad_norm": 0.010658146813511848, | |
| "learning_rate": 0.0023065693430656935, | |
| "loss": 0.8289, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.927536231884058, | |
| "grad_norm": 0.013902239501476288, | |
| "learning_rate": 0.0023041362530413626, | |
| "loss": 0.7706, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.9297238173366147, | |
| "grad_norm": 0.011173736304044724, | |
| "learning_rate": 0.0023017031630170317, | |
| "loss": 0.8055, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.9319114027891715, | |
| "grad_norm": 0.011386138387024403, | |
| "learning_rate": 0.002299270072992701, | |
| "loss": 0.6273, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9340989882417282, | |
| "grad_norm": 0.008862471207976341, | |
| "learning_rate": 0.00229683698296837, | |
| "loss": 0.7032, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.9362865736942849, | |
| "grad_norm": 0.02106628008186817, | |
| "learning_rate": 0.002294403892944039, | |
| "loss": 0.7835, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9384741591468416, | |
| "grad_norm": 0.010091581381857395, | |
| "learning_rate": 0.002291970802919708, | |
| "loss": 0.6805, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.9406617445993984, | |
| "grad_norm": 0.012447184883058071, | |
| "learning_rate": 0.002289537712895377, | |
| "loss": 0.7323, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9428493300519551, | |
| "grad_norm": 0.015980314463377, | |
| "learning_rate": 0.0022871046228710463, | |
| "loss": 0.8842, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.9450369155045119, | |
| "grad_norm": 0.007705094758421183, | |
| "learning_rate": 0.0022846715328467154, | |
| "loss": 0.8907, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.9472245009570687, | |
| "grad_norm": 0.00878717191517353, | |
| "learning_rate": 0.0022822384428223845, | |
| "loss": 0.7455, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.9494120864096254, | |
| "grad_norm": 0.026101326569914818, | |
| "learning_rate": 0.0022798053527980536, | |
| "loss": 0.6827, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.9515996718621821, | |
| "grad_norm": 0.008718657307326794, | |
| "learning_rate": 0.0022773722627737226, | |
| "loss": 0.9253, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.9537872573147389, | |
| "grad_norm": 0.009151890873908997, | |
| "learning_rate": 0.0022749391727493917, | |
| "loss": 0.8735, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.9559748427672956, | |
| "grad_norm": 0.012189007364213467, | |
| "learning_rate": 0.002272506082725061, | |
| "loss": 0.94, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.9581624282198523, | |
| "grad_norm": 0.00890439935028553, | |
| "learning_rate": 0.00227007299270073, | |
| "loss": 0.7572, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.9603500136724091, | |
| "grad_norm": 0.013200386427342892, | |
| "learning_rate": 0.002267639902676399, | |
| "loss": 0.7361, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.9625375991249658, | |
| "grad_norm": 0.011736634187400341, | |
| "learning_rate": 0.002265206812652068, | |
| "loss": 0.6326, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9647251845775225, | |
| "grad_norm": 0.006781425327062607, | |
| "learning_rate": 0.002262773722627737, | |
| "loss": 0.7254, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.9669127700300792, | |
| "grad_norm": 0.008296315558254719, | |
| "learning_rate": 0.0022603406326034063, | |
| "loss": 0.6898, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9691003554826361, | |
| "grad_norm": 0.008293522521853447, | |
| "learning_rate": 0.0022579075425790754, | |
| "loss": 0.7953, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.9712879409351928, | |
| "grad_norm": 0.00848364643752575, | |
| "learning_rate": 0.0022554744525547445, | |
| "loss": 0.8203, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9734755263877495, | |
| "grad_norm": 0.012193895876407623, | |
| "learning_rate": 0.002253041362530414, | |
| "loss": 0.6794, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.9756631118403063, | |
| "grad_norm": 0.018784867599606514, | |
| "learning_rate": 0.0022506082725060827, | |
| "loss": 0.5793, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.977850697292863, | |
| "grad_norm": 0.008517356589436531, | |
| "learning_rate": 0.0022481751824817518, | |
| "loss": 0.4866, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.9800382827454197, | |
| "grad_norm": 0.017300793901085854, | |
| "learning_rate": 0.0022457420924574213, | |
| "loss": 0.8304, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.9822258681979765, | |
| "grad_norm": 0.010441828519105911, | |
| "learning_rate": 0.00224330900243309, | |
| "loss": 0.9823, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.9844134536505332, | |
| "grad_norm": 0.013992452062666416, | |
| "learning_rate": 0.002240875912408759, | |
| "loss": 0.7828, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9866010391030899, | |
| "grad_norm": 0.006943755783140659, | |
| "learning_rate": 0.0022384428223844286, | |
| "loss": 0.6205, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.9887886245556468, | |
| "grad_norm": 0.0063702561892569065, | |
| "learning_rate": 0.0022360097323600972, | |
| "loss": 1.0355, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.9909762100082035, | |
| "grad_norm": 0.007510766386985779, | |
| "learning_rate": 0.0022335766423357663, | |
| "loss": 0.7581, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.9931637954607602, | |
| "grad_norm": 0.010165141895413399, | |
| "learning_rate": 0.002231143552311436, | |
| "loss": 0.8831, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.9953513809133169, | |
| "grad_norm": 0.012972669675946236, | |
| "learning_rate": 0.002228710462287105, | |
| "loss": 0.6523, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.9975389663658737, | |
| "grad_norm": 0.007454239297658205, | |
| "learning_rate": 0.0022262773722627736, | |
| "loss": 0.8721, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.9997265518184304, | |
| "grad_norm": 0.007078221533447504, | |
| "learning_rate": 0.0022238442822384427, | |
| "loss": 0.6737, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.0019141372709872, | |
| "grad_norm": 0.021942665800452232, | |
| "learning_rate": 0.002221411192214112, | |
| "loss": 0.8231, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.0041017227235438, | |
| "grad_norm": 0.019108066335320473, | |
| "learning_rate": 0.002218978102189781, | |
| "loss": 0.6809, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.0062893081761006, | |
| "grad_norm": 0.013495873659849167, | |
| "learning_rate": 0.00221654501216545, | |
| "loss": 0.7663, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.0084768936286574, | |
| "grad_norm": 0.009844646789133549, | |
| "learning_rate": 0.0022141119221411195, | |
| "loss": 0.8189, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.010664479081214, | |
| "grad_norm": 0.008135687559843063, | |
| "learning_rate": 0.002211678832116788, | |
| "loss": 0.7935, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.0128520645337709, | |
| "grad_norm": 0.01022945623844862, | |
| "learning_rate": 0.0022092457420924572, | |
| "loss": 0.7855, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.0150396499863277, | |
| "grad_norm": 0.011145783588290215, | |
| "learning_rate": 0.0022068126520681268, | |
| "loss": 0.9334, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.0172272354388843, | |
| "grad_norm": 0.014914394356310368, | |
| "learning_rate": 0.002204379562043796, | |
| "loss": 0.8769, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.019414820891441, | |
| "grad_norm": 0.010317330248653889, | |
| "learning_rate": 0.0022019464720194645, | |
| "loss": 0.9083, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.021602406343998, | |
| "grad_norm": 0.012516210786998272, | |
| "learning_rate": 0.002199513381995134, | |
| "loss": 0.7169, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.0237899917965545, | |
| "grad_norm": 0.015528671443462372, | |
| "learning_rate": 0.002197080291970803, | |
| "loss": 0.6738, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.0259775772491113, | |
| "grad_norm": 0.007066753227263689, | |
| "learning_rate": 0.002194647201946472, | |
| "loss": 0.5918, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.0281651627016681, | |
| "grad_norm": 0.007939637638628483, | |
| "learning_rate": 0.0021922141119221413, | |
| "loss": 0.6588, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.0303527481542247, | |
| "grad_norm": 0.007144363131374121, | |
| "learning_rate": 0.0021897810218978104, | |
| "loss": 0.4427, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.0325403336067815, | |
| "grad_norm": 0.007886086590588093, | |
| "learning_rate": 0.002187347931873479, | |
| "loss": 0.7392, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0347279190593381, | |
| "grad_norm": 0.007826312445104122, | |
| "learning_rate": 0.0021849148418491486, | |
| "loss": 0.743, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.036915504511895, | |
| "grad_norm": 0.007945370860397816, | |
| "learning_rate": 0.0021824817518248177, | |
| "loss": 0.6567, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0391030899644518, | |
| "grad_norm": 0.009234143421053886, | |
| "learning_rate": 0.0021800486618004863, | |
| "loss": 0.8079, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.0412906754170084, | |
| "grad_norm": 0.011828969232738018, | |
| "learning_rate": 0.002177615571776156, | |
| "loss": 0.7132, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0434782608695652, | |
| "grad_norm": 0.008681892417371273, | |
| "learning_rate": 0.002175182481751825, | |
| "loss": 0.8417, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.045665846322122, | |
| "grad_norm": 0.008761374279856682, | |
| "learning_rate": 0.002172749391727494, | |
| "loss": 0.7446, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.0478534317746786, | |
| "grad_norm": 0.014171335846185684, | |
| "learning_rate": 0.002170316301703163, | |
| "loss": 0.739, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.0500410172272354, | |
| "grad_norm": 0.011624401435256004, | |
| "learning_rate": 0.0021678832116788322, | |
| "loss": 0.8935, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0522286026797922, | |
| "grad_norm": 0.019760416820645332, | |
| "learning_rate": 0.0021654501216545013, | |
| "loss": 0.9159, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.0544161881323488, | |
| "grad_norm": 0.0076353419572114944, | |
| "learning_rate": 0.0021630170316301704, | |
| "loss": 0.8153, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.0566037735849056, | |
| "grad_norm": 0.009698878973722458, | |
| "learning_rate": 0.0021605839416058395, | |
| "loss": 0.8043, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.0587913590374625, | |
| "grad_norm": 0.007674135267734528, | |
| "learning_rate": 0.0021581508515815086, | |
| "loss": 0.6816, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.060978944490019, | |
| "grad_norm": 0.01642732322216034, | |
| "learning_rate": 0.0021557177615571777, | |
| "loss": 0.9525, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.0631665299425759, | |
| "grad_norm": 0.016669275239109993, | |
| "learning_rate": 0.002153284671532847, | |
| "loss": 0.5482, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.0653541153951327, | |
| "grad_norm": 0.012565388344228268, | |
| "learning_rate": 0.002150851581508516, | |
| "loss": 0.6211, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.0675417008476893, | |
| "grad_norm": 0.01363010797649622, | |
| "learning_rate": 0.002148418491484185, | |
| "loss": 0.5152, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.069729286300246, | |
| "grad_norm": 0.020599598065018654, | |
| "learning_rate": 0.002145985401459854, | |
| "loss": 0.8035, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.071916871752803, | |
| "grad_norm": 0.013294585980474949, | |
| "learning_rate": 0.002143552311435523, | |
| "loss": 0.8999, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.0741044572053595, | |
| "grad_norm": 0.038667161017656326, | |
| "learning_rate": 0.0021411192214111923, | |
| "loss": 0.7514, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.0762920426579163, | |
| "grad_norm": 0.010547326877713203, | |
| "learning_rate": 0.0021386861313868613, | |
| "loss": 0.6819, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.0784796281104732, | |
| "grad_norm": 0.009484006091952324, | |
| "learning_rate": 0.0021362530413625304, | |
| "loss": 0.6253, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.0806672135630297, | |
| "grad_norm": 0.009657086804509163, | |
| "learning_rate": 0.0021338199513381995, | |
| "loss": 0.7112, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.0828547990155866, | |
| "grad_norm": 0.01714419014751911, | |
| "learning_rate": 0.0021313868613138686, | |
| "loss": 0.9098, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.0850423844681434, | |
| "grad_norm": 0.01343261357396841, | |
| "learning_rate": 0.0021289537712895377, | |
| "loss": 0.7902, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.0872299699207, | |
| "grad_norm": 0.00883649941533804, | |
| "learning_rate": 0.002126520681265207, | |
| "loss": 0.9971, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.0894175553732568, | |
| "grad_norm": 0.00613701157271862, | |
| "learning_rate": 0.002124087591240876, | |
| "loss": 0.7527, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.0916051408258136, | |
| "grad_norm": 0.009846502915024757, | |
| "learning_rate": 0.002121654501216545, | |
| "loss": 0.7402, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.0937927262783702, | |
| "grad_norm": 0.010731893591582775, | |
| "learning_rate": 0.002119221411192214, | |
| "loss": 0.7848, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.095980311730927, | |
| "grad_norm": 0.011895066127181053, | |
| "learning_rate": 0.002116788321167883, | |
| "loss": 0.7164, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.0981678971834836, | |
| "grad_norm": 0.007519803941249847, | |
| "learning_rate": 0.0021143552311435523, | |
| "loss": 0.9606, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.1003554826360404, | |
| "grad_norm": 0.009692378342151642, | |
| "learning_rate": 0.0021119221411192214, | |
| "loss": 0.7633, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.1025430680885973, | |
| "grad_norm": 0.011364142410457134, | |
| "learning_rate": 0.0021094890510948905, | |
| "loss": 0.6945, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.1047306535411539, | |
| "grad_norm": 0.007994066923856735, | |
| "learning_rate": 0.0021070559610705595, | |
| "loss": 0.6423, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.1069182389937107, | |
| "grad_norm": 0.02612650953233242, | |
| "learning_rate": 0.0021046228710462286, | |
| "loss": 0.8676, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.1091058244462675, | |
| "grad_norm": 0.007825646549463272, | |
| "learning_rate": 0.002102189781021898, | |
| "loss": 0.5687, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.111293409898824, | |
| "grad_norm": 0.008077848702669144, | |
| "learning_rate": 0.002099756690997567, | |
| "loss": 0.7509, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.113480995351381, | |
| "grad_norm": 0.009620738215744495, | |
| "learning_rate": 0.002097323600973236, | |
| "loss": 0.5996, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.1156685808039377, | |
| "grad_norm": 0.0255615022033453, | |
| "learning_rate": 0.0020948905109489054, | |
| "loss": 0.6696, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.1178561662564943, | |
| "grad_norm": 0.010550931096076965, | |
| "learning_rate": 0.002092457420924574, | |
| "loss": 0.7019, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.1200437517090511, | |
| "grad_norm": 0.028004566207528114, | |
| "learning_rate": 0.002090024330900243, | |
| "loss": 0.8809, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.122231337161608, | |
| "grad_norm": 0.013075259514153004, | |
| "learning_rate": 0.0020875912408759127, | |
| "loss": 0.6108, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.1244189226141645, | |
| "grad_norm": 0.015426448546350002, | |
| "learning_rate": 0.0020851581508515814, | |
| "loss": 0.7146, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.1266065080667214, | |
| "grad_norm": 0.007735779043287039, | |
| "learning_rate": 0.0020827250608272505, | |
| "loss": 0.8517, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.1287940935192782, | |
| "grad_norm": 0.012412245385348797, | |
| "learning_rate": 0.00208029197080292, | |
| "loss": 0.6694, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.1309816789718348, | |
| "grad_norm": 0.009669258259236813, | |
| "learning_rate": 0.0020778588807785887, | |
| "loss": 0.612, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.1331692644243916, | |
| "grad_norm": 0.010346516966819763, | |
| "learning_rate": 0.0020754257907542577, | |
| "loss": 0.7956, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.1353568498769484, | |
| "grad_norm": 0.008683484978973866, | |
| "learning_rate": 0.0020729927007299273, | |
| "loss": 0.7012, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.137544435329505, | |
| "grad_norm": 0.009093291126191616, | |
| "learning_rate": 0.0020705596107055964, | |
| "loss": 0.6406, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.1397320207820618, | |
| "grad_norm": 0.019143717363476753, | |
| "learning_rate": 0.002068126520681265, | |
| "loss": 0.6632, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.1419196062346186, | |
| "grad_norm": 0.008810199797153473, | |
| "learning_rate": 0.0020656934306569345, | |
| "loss": 0.6248, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.1441071916871752, | |
| "grad_norm": 0.009826627559959888, | |
| "learning_rate": 0.0020632603406326036, | |
| "loss": 0.7367, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.146294777139732, | |
| "grad_norm": 0.007178613916039467, | |
| "learning_rate": 0.0020608272506082723, | |
| "loss": 0.6688, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.1484823625922886, | |
| "grad_norm": 0.00853504054248333, | |
| "learning_rate": 0.002058394160583942, | |
| "loss": 0.6802, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.1506699480448455, | |
| "grad_norm": 0.011418921872973442, | |
| "learning_rate": 0.002055961070559611, | |
| "loss": 0.5832, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.1528575334974023, | |
| "grad_norm": 0.015032613649964333, | |
| "learning_rate": 0.0020535279805352796, | |
| "loss": 0.6841, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.155045118949959, | |
| "grad_norm": 0.008302520960569382, | |
| "learning_rate": 0.002051094890510949, | |
| "loss": 0.7869, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.1572327044025157, | |
| "grad_norm": 0.006403745152056217, | |
| "learning_rate": 0.002048661800486618, | |
| "loss": 0.7054, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.1594202898550725, | |
| "grad_norm": 0.00577664515003562, | |
| "learning_rate": 0.0020462287104622873, | |
| "loss": 0.8063, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.161607875307629, | |
| "grad_norm": 0.011647713370621204, | |
| "learning_rate": 0.002043795620437956, | |
| "loss": 0.7921, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.163795460760186, | |
| "grad_norm": 0.011479120701551437, | |
| "learning_rate": 0.0020413625304136255, | |
| "loss": 0.9256, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.1659830462127427, | |
| "grad_norm": 0.007622700184583664, | |
| "learning_rate": 0.0020389294403892946, | |
| "loss": 0.722, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.1681706316652993, | |
| "grad_norm": 0.0064216419123113155, | |
| "learning_rate": 0.0020364963503649632, | |
| "loss": 0.6979, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.1703582171178561, | |
| "grad_norm": 0.007917587645351887, | |
| "learning_rate": 0.0020340632603406327, | |
| "loss": 0.8049, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.172545802570413, | |
| "grad_norm": 0.0061738938093185425, | |
| "learning_rate": 0.002031630170316302, | |
| "loss": 0.7057, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.1747333880229696, | |
| "grad_norm": 0.0060928682796657085, | |
| "learning_rate": 0.0020291970802919705, | |
| "loss": 0.8, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.1769209734755264, | |
| "grad_norm": 0.00664818799123168, | |
| "learning_rate": 0.00202676399026764, | |
| "loss": 0.7944, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.1791085589280832, | |
| "grad_norm": 0.027486886829137802, | |
| "learning_rate": 0.002024330900243309, | |
| "loss": 0.8446, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.1812961443806398, | |
| "grad_norm": 0.01736626587808132, | |
| "learning_rate": 0.002021897810218978, | |
| "loss": 0.8303, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1834837298331966, | |
| "grad_norm": 0.0084115294739604, | |
| "learning_rate": 0.0020194647201946473, | |
| "loss": 0.7323, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.1856713152857534, | |
| "grad_norm": 0.01464123371988535, | |
| "learning_rate": 0.0020170316301703164, | |
| "loss": 0.8395, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.18785890073831, | |
| "grad_norm": 0.007480619940906763, | |
| "learning_rate": 0.0020145985401459855, | |
| "loss": 0.7309, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.1900464861908668, | |
| "grad_norm": 0.014315255917608738, | |
| "learning_rate": 0.0020121654501216546, | |
| "loss": 0.6468, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.1922340716434237, | |
| "grad_norm": 0.009927434846758842, | |
| "learning_rate": 0.0020097323600973237, | |
| "loss": 0.7544, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.1944216570959802, | |
| "grad_norm": 0.019481701776385307, | |
| "learning_rate": 0.0020072992700729928, | |
| "loss": 0.8124, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.196609242548537, | |
| "grad_norm": 0.007046518847346306, | |
| "learning_rate": 0.002004866180048662, | |
| "loss": 0.6582, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.1987968280010939, | |
| "grad_norm": 0.012643888592720032, | |
| "learning_rate": 0.002002433090024331, | |
| "loss": 0.8098, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.2009844134536505, | |
| "grad_norm": 0.008585029281675816, | |
| "learning_rate": 0.002, | |
| "loss": 0.7206, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.2031719989062073, | |
| "grad_norm": 0.014269394800066948, | |
| "learning_rate": 0.001997566909975669, | |
| "loss": 0.8426, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.2053595843587641, | |
| "grad_norm": 0.006986747495830059, | |
| "learning_rate": 0.0019951338199513382, | |
| "loss": 0.7793, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.2075471698113207, | |
| "grad_norm": 0.014269756153225899, | |
| "learning_rate": 0.0019927007299270073, | |
| "loss": 0.7668, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.2097347552638775, | |
| "grad_norm": 0.009506807662546635, | |
| "learning_rate": 0.0019902676399026764, | |
| "loss": 0.771, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.2119223407164341, | |
| "grad_norm": 0.008203186094760895, | |
| "learning_rate": 0.0019878345498783455, | |
| "loss": 0.8037, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.214109926168991, | |
| "grad_norm": 0.01714324578642845, | |
| "learning_rate": 0.0019854014598540146, | |
| "loss": 0.66, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.2162975116215478, | |
| "grad_norm": 0.01466370839625597, | |
| "learning_rate": 0.0019829683698296837, | |
| "loss": 0.8761, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.2184850970741046, | |
| "grad_norm": 0.049504704773426056, | |
| "learning_rate": 0.0019805352798053528, | |
| "loss": 0.7717, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.2206726825266612, | |
| "grad_norm": 0.010891391895711422, | |
| "learning_rate": 0.001978102189781022, | |
| "loss": 0.7754, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.222860267979218, | |
| "grad_norm": 0.007297700271010399, | |
| "learning_rate": 0.001975669099756691, | |
| "loss": 0.882, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.2250478534317746, | |
| "grad_norm": 0.010113504715263844, | |
| "learning_rate": 0.00197323600973236, | |
| "loss": 0.7514, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.2272354388843314, | |
| "grad_norm": 0.0076246317476034164, | |
| "learning_rate": 0.001970802919708029, | |
| "loss": 0.9311, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.2294230243368882, | |
| "grad_norm": 0.010274101980030537, | |
| "learning_rate": 0.0019683698296836987, | |
| "loss": 0.9348, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.2316106097894448, | |
| "grad_norm": 0.007466154173016548, | |
| "learning_rate": 0.0019659367396593673, | |
| "loss": 0.6847, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.2337981952420016, | |
| "grad_norm": 0.012906615622341633, | |
| "learning_rate": 0.0019635036496350364, | |
| "loss": 0.9068, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.2359857806945584, | |
| "grad_norm": 0.008850296027958393, | |
| "learning_rate": 0.001961070559610706, | |
| "loss": 0.9032, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.238173366147115, | |
| "grad_norm": 0.009153778664767742, | |
| "learning_rate": 0.0019586374695863746, | |
| "loss": 0.7872, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.2403609515996719, | |
| "grad_norm": 0.014177209697663784, | |
| "learning_rate": 0.0019562043795620437, | |
| "loss": 0.7902, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.2425485370522287, | |
| "grad_norm": 0.008819716051220894, | |
| "learning_rate": 0.001953771289537713, | |
| "loss": 0.5116, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.2447361225047853, | |
| "grad_norm": 0.012600511312484741, | |
| "learning_rate": 0.001951338199513382, | |
| "loss": 0.8224, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.246923707957342, | |
| "grad_norm": 0.012330558151006699, | |
| "learning_rate": 0.001948905109489051, | |
| "loss": 0.6959, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.249111293409899, | |
| "grad_norm": 0.013719186186790466, | |
| "learning_rate": 0.00194647201946472, | |
| "loss": 0.8555, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.2512988788624555, | |
| "grad_norm": 0.019239958375692368, | |
| "learning_rate": 0.0019440389294403894, | |
| "loss": 0.8459, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.2534864643150123, | |
| "grad_norm": 0.00825503934174776, | |
| "learning_rate": 0.0019416058394160585, | |
| "loss": 0.6807, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.2556740497675691, | |
| "grad_norm": 0.00811754260212183, | |
| "learning_rate": 0.0019391727493917273, | |
| "loss": 0.661, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.2578616352201257, | |
| "grad_norm": 0.009656975045800209, | |
| "learning_rate": 0.0019367396593673967, | |
| "loss": 0.693, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.2600492206726825, | |
| "grad_norm": 0.01010841503739357, | |
| "learning_rate": 0.0019343065693430658, | |
| "loss": 0.7331, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.2622368061252391, | |
| "grad_norm": 0.01344444788992405, | |
| "learning_rate": 0.0019318734793187346, | |
| "loss": 0.89, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.264424391577796, | |
| "grad_norm": 0.009256028570234776, | |
| "learning_rate": 0.001929440389294404, | |
| "loss": 0.7227, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.2666119770303528, | |
| "grad_norm": 0.009699441492557526, | |
| "learning_rate": 0.001927007299270073, | |
| "loss": 0.6758, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.2687995624829096, | |
| "grad_norm": 0.013547690585255623, | |
| "learning_rate": 0.001924574209245742, | |
| "loss": 0.8159, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2709871479354662, | |
| "grad_norm": 0.011569716967642307, | |
| "learning_rate": 0.0019221411192214114, | |
| "loss": 0.7126, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.273174733388023, | |
| "grad_norm": 0.009194127283990383, | |
| "learning_rate": 0.0019197080291970803, | |
| "loss": 0.8327, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.2753623188405796, | |
| "grad_norm": 0.01622292585670948, | |
| "learning_rate": 0.0019172749391727494, | |
| "loss": 0.8118, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.2775499042931364, | |
| "grad_norm": 0.016841020435094833, | |
| "learning_rate": 0.0019148418491484187, | |
| "loss": 0.8746, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.2797374897456932, | |
| "grad_norm": 0.011160912923514843, | |
| "learning_rate": 0.0019124087591240876, | |
| "loss": 0.7846, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.28192507519825, | |
| "grad_norm": 0.013098710216581821, | |
| "learning_rate": 0.0019099756690997567, | |
| "loss": 0.666, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.2841126606508066, | |
| "grad_norm": 0.008245709352195263, | |
| "learning_rate": 0.001907542579075426, | |
| "loss": 0.7799, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.2863002461033635, | |
| "grad_norm": 0.005503001157194376, | |
| "learning_rate": 0.0019051094890510949, | |
| "loss": 0.605, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.28848783155592, | |
| "grad_norm": 0.014160554856061935, | |
| "learning_rate": 0.001902676399026764, | |
| "loss": 0.7715, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.2906754170084769, | |
| "grad_norm": 0.06220156326889992, | |
| "learning_rate": 0.0019002433090024333, | |
| "loss": 1.0173, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.2928630024610337, | |
| "grad_norm": 0.023459481075406075, | |
| "learning_rate": 0.0018978102189781021, | |
| "loss": 0.7195, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.2950505879135905, | |
| "grad_norm": 0.02028430998325348, | |
| "learning_rate": 0.0018953771289537712, | |
| "loss": 0.8889, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.297238173366147, | |
| "grad_norm": 0.007861199788749218, | |
| "learning_rate": 0.0018929440389294405, | |
| "loss": 0.8249, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.299425758818704, | |
| "grad_norm": 0.008794757537543774, | |
| "learning_rate": 0.0018905109489051096, | |
| "loss": 0.8978, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.3016133442712605, | |
| "grad_norm": 0.027899743989109993, | |
| "learning_rate": 0.0018880778588807785, | |
| "loss": 0.8259, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.3038009297238173, | |
| "grad_norm": 0.006755333859473467, | |
| "learning_rate": 0.0018856447688564478, | |
| "loss": 0.8913, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.3059885151763742, | |
| "grad_norm": 0.016409730538725853, | |
| "learning_rate": 0.001883211678832117, | |
| "loss": 0.7902, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.3081761006289307, | |
| "grad_norm": 0.012431084178388119, | |
| "learning_rate": 0.0018807785888077858, | |
| "loss": 0.5474, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.3103636860814876, | |
| "grad_norm": 0.0099630793556571, | |
| "learning_rate": 0.001878345498783455, | |
| "loss": 0.7595, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.3125512715340442, | |
| "grad_norm": 0.027248527854681015, | |
| "learning_rate": 0.0018759124087591242, | |
| "loss": 1.0273, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.314738856986601, | |
| "grad_norm": 0.008029641583561897, | |
| "learning_rate": 0.001873479318734793, | |
| "loss": 0.6951, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.3169264424391578, | |
| "grad_norm": 0.011218305677175522, | |
| "learning_rate": 0.0018710462287104626, | |
| "loss": 0.9217, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.3191140278917146, | |
| "grad_norm": 0.024159464985132217, | |
| "learning_rate": 0.0018686131386861315, | |
| "loss": 0.7839, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.3213016133442712, | |
| "grad_norm": 0.01127669122070074, | |
| "learning_rate": 0.0018661800486618006, | |
| "loss": 0.6711, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.323489198796828, | |
| "grad_norm": 0.014322164468467236, | |
| "learning_rate": 0.0018637469586374699, | |
| "loss": 0.8935, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.3256767842493846, | |
| "grad_norm": 0.010018724948167801, | |
| "learning_rate": 0.0018613138686131387, | |
| "loss": 0.7622, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.3278643697019414, | |
| "grad_norm": 0.02816806361079216, | |
| "learning_rate": 0.0018588807785888078, | |
| "loss": 0.8948, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.3300519551544983, | |
| "grad_norm": 0.011105911806225777, | |
| "learning_rate": 0.0018564476885644767, | |
| "loss": 0.754, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.332239540607055, | |
| "grad_norm": 0.007195697631686926, | |
| "learning_rate": 0.001854014598540146, | |
| "loss": 0.6923, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.3344271260596117, | |
| "grad_norm": 0.010149553418159485, | |
| "learning_rate": 0.001851581508515815, | |
| "loss": 0.8129, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.3366147115121685, | |
| "grad_norm": 0.006798075046390295, | |
| "learning_rate": 0.001849148418491484, | |
| "loss": 0.5858, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.338802296964725, | |
| "grad_norm": 0.006904991343617439, | |
| "learning_rate": 0.0018467153284671533, | |
| "loss": 0.7058, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.340989882417282, | |
| "grad_norm": 0.019244657829403877, | |
| "learning_rate": 0.0018442822384428224, | |
| "loss": 0.7452, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.3431774678698387, | |
| "grad_norm": 0.10027986764907837, | |
| "learning_rate": 0.0018418491484184915, | |
| "loss": 0.7935, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.3453650533223955, | |
| "grad_norm": 0.028616629540920258, | |
| "learning_rate": 0.0018394160583941608, | |
| "loss": 0.7798, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.3475526387749521, | |
| "grad_norm": 0.02287200279533863, | |
| "learning_rate": 0.0018369829683698297, | |
| "loss": 0.7231, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.349740224227509, | |
| "grad_norm": 0.029162835329771042, | |
| "learning_rate": 0.0018345498783454988, | |
| "loss": 0.7196, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.3519278096800655, | |
| "grad_norm": 0.00748335849493742, | |
| "learning_rate": 0.001832116788321168, | |
| "loss": 0.6841, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.3541153951326224, | |
| "grad_norm": 0.012842601165175438, | |
| "learning_rate": 0.001829683698296837, | |
| "loss": 0.8114, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.3563029805851792, | |
| "grad_norm": 0.01425047405064106, | |
| "learning_rate": 0.001827250608272506, | |
| "loss": 0.713, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.3584905660377358, | |
| "grad_norm": 0.011411231942474842, | |
| "learning_rate": 0.0018248175182481753, | |
| "loss": 0.8576, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.3606781514902926, | |
| "grad_norm": 0.02541513741016388, | |
| "learning_rate": 0.0018223844282238442, | |
| "loss": 0.7529, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.3628657369428494, | |
| "grad_norm": 0.009776429273188114, | |
| "learning_rate": 0.0018199513381995133, | |
| "loss": 0.6062, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.365053322395406, | |
| "grad_norm": 0.01603938452899456, | |
| "learning_rate": 0.0018175182481751826, | |
| "loss": 1.3558, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.3672409078479628, | |
| "grad_norm": 0.01858574151992798, | |
| "learning_rate": 0.0018150851581508517, | |
| "loss": 0.7067, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.3694284933005196, | |
| "grad_norm": 0.014604609459638596, | |
| "learning_rate": 0.0018126520681265206, | |
| "loss": 0.65, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.3716160787530762, | |
| "grad_norm": 0.01383352093398571, | |
| "learning_rate": 0.00181021897810219, | |
| "loss": 0.724, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.373803664205633, | |
| "grad_norm": 0.007166001014411449, | |
| "learning_rate": 0.001807785888077859, | |
| "loss": 0.7063, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.3759912496581896, | |
| "grad_norm": 0.01364620216190815, | |
| "learning_rate": 0.0018053527980535279, | |
| "loss": 0.942, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.3781788351107465, | |
| "grad_norm": 0.013178148306906223, | |
| "learning_rate": 0.0018029197080291972, | |
| "loss": 0.7134, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.3803664205633033, | |
| "grad_norm": 0.016469091176986694, | |
| "learning_rate": 0.0018004866180048663, | |
| "loss": 0.8652, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.38255400601586, | |
| "grad_norm": 0.008818808011710644, | |
| "learning_rate": 0.0017980535279805351, | |
| "loss": 0.7157, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.3847415914684167, | |
| "grad_norm": 0.006165484432131052, | |
| "learning_rate": 0.0017956204379562047, | |
| "loss": 0.8267, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.3869291769209735, | |
| "grad_norm": 0.017317302525043488, | |
| "learning_rate": 0.0017931873479318735, | |
| "loss": 0.7661, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.38911676237353, | |
| "grad_norm": 0.01045684702694416, | |
| "learning_rate": 0.0017907542579075426, | |
| "loss": 0.797, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.391304347826087, | |
| "grad_norm": 0.004696684889495373, | |
| "learning_rate": 0.001788321167883212, | |
| "loss": 0.793, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.3934919332786437, | |
| "grad_norm": 0.01570739410817623, | |
| "learning_rate": 0.0017858880778588808, | |
| "loss": 0.9052, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.3956795187312006, | |
| "grad_norm": 0.006558465771377087, | |
| "learning_rate": 0.00178345498783455, | |
| "loss": 0.7475, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.3978671041837571, | |
| "grad_norm": 0.008167284540832043, | |
| "learning_rate": 0.0017810218978102192, | |
| "loss": 0.7801, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.400054689636314, | |
| "grad_norm": 0.007898733019828796, | |
| "learning_rate": 0.001778588807785888, | |
| "loss": 0.7694, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.4022422750888706, | |
| "grad_norm": 0.011702708899974823, | |
| "learning_rate": 0.0017761557177615572, | |
| "loss": 0.7104, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.4044298605414274, | |
| "grad_norm": 0.01823602244257927, | |
| "learning_rate": 0.0017737226277372265, | |
| "loss": 0.837, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.4066174459939842, | |
| "grad_norm": 0.019088082015514374, | |
| "learning_rate": 0.0017712895377128954, | |
| "loss": 0.8105, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.408805031446541, | |
| "grad_norm": 0.008738362230360508, | |
| "learning_rate": 0.0017688564476885645, | |
| "loss": 0.9423, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.4109926168990976, | |
| "grad_norm": 0.010799618437886238, | |
| "learning_rate": 0.0017664233576642336, | |
| "loss": 0.7173, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.4131802023516544, | |
| "grad_norm": 0.007114489562809467, | |
| "learning_rate": 0.0017639902676399029, | |
| "loss": 0.7322, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.415367787804211, | |
| "grad_norm": 0.021334782242774963, | |
| "learning_rate": 0.0017615571776155717, | |
| "loss": 0.7808, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.4175553732567678, | |
| "grad_norm": 0.06464671343564987, | |
| "learning_rate": 0.0017591240875912408, | |
| "loss": 0.8948, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.4197429587093247, | |
| "grad_norm": 0.016822345554828644, | |
| "learning_rate": 0.0017566909975669101, | |
| "loss": 0.7481, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.4219305441618812, | |
| "grad_norm": 0.01005722675472498, | |
| "learning_rate": 0.001754257907542579, | |
| "loss": 0.8027, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.424118129614438, | |
| "grad_norm": 0.01469690166413784, | |
| "learning_rate": 0.0017518248175182481, | |
| "loss": 0.7487, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.4263057150669949, | |
| "grad_norm": 0.013352830894291401, | |
| "learning_rate": 0.0017493917274939174, | |
| "loss": 0.9439, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.4284933005195515, | |
| "grad_norm": 0.01574932225048542, | |
| "learning_rate": 0.0017469586374695863, | |
| "loss": 0.7926, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.4306808859721083, | |
| "grad_norm": 0.012712597846984863, | |
| "learning_rate": 0.0017445255474452554, | |
| "loss": 0.8369, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.4328684714246651, | |
| "grad_norm": 0.018248263746500015, | |
| "learning_rate": 0.0017420924574209247, | |
| "loss": 0.6585, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.4350560568772217, | |
| "grad_norm": 0.0181551706045866, | |
| "learning_rate": 0.0017396593673965938, | |
| "loss": 0.8487, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.4372436423297785, | |
| "grad_norm": 0.009059487842023373, | |
| "learning_rate": 0.0017372262773722627, | |
| "loss": 0.8897, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.4394312277823351, | |
| "grad_norm": 0.007483980618417263, | |
| "learning_rate": 0.001734793187347932, | |
| "loss": 0.6673, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.441618813234892, | |
| "grad_norm": 0.007589507382363081, | |
| "learning_rate": 0.001732360097323601, | |
| "loss": 0.7013, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.4438063986874488, | |
| "grad_norm": 0.011493782512843609, | |
| "learning_rate": 0.00172992700729927, | |
| "loss": 0.5457, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.4459939841400056, | |
| "grad_norm": 0.027656735852360725, | |
| "learning_rate": 0.0017274939172749392, | |
| "loss": 0.7251, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.4481815695925622, | |
| "grad_norm": 0.022569406777620316, | |
| "learning_rate": 0.0017250608272506083, | |
| "loss": 0.7104, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.450369155045119, | |
| "grad_norm": 0.028735000640153885, | |
| "learning_rate": 0.0017226277372262772, | |
| "loss": 0.8682, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.4525567404976756, | |
| "grad_norm": 0.012052370235323906, | |
| "learning_rate": 0.0017201946472019465, | |
| "loss": 0.7508, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.4547443259502324, | |
| "grad_norm": 0.008707467466592789, | |
| "learning_rate": 0.0017177615571776156, | |
| "loss": 0.83, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.4569319114027892, | |
| "grad_norm": 0.01061397884041071, | |
| "learning_rate": 0.0017153284671532847, | |
| "loss": 0.9431, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.459119496855346, | |
| "grad_norm": 0.011903772130608559, | |
| "learning_rate": 0.001712895377128954, | |
| "loss": 0.723, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.4613070823079026, | |
| "grad_norm": 0.03922785073518753, | |
| "learning_rate": 0.001710462287104623, | |
| "loss": 0.6581, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.4634946677604594, | |
| "grad_norm": 0.014414667151868343, | |
| "learning_rate": 0.001708029197080292, | |
| "loss": 0.8511, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.465682253213016, | |
| "grad_norm": 0.010338617488741875, | |
| "learning_rate": 0.0017055961070559613, | |
| "loss": 0.7162, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.4678698386655729, | |
| "grad_norm": 0.011176107451319695, | |
| "learning_rate": 0.0017031630170316302, | |
| "loss": 0.8674, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.4700574241181297, | |
| "grad_norm": 0.014365148730576038, | |
| "learning_rate": 0.0017007299270072993, | |
| "loss": 0.7739, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.4722450095706865, | |
| "grad_norm": 0.019749363884329796, | |
| "learning_rate": 0.0016982968369829686, | |
| "loss": 0.7571, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.474432595023243, | |
| "grad_norm": 0.011761876754462719, | |
| "learning_rate": 0.0016958637469586374, | |
| "loss": 0.7208, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.4766201804758, | |
| "grad_norm": 0.025715123862028122, | |
| "learning_rate": 0.0016934306569343065, | |
| "loss": 0.7554, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.4788077659283565, | |
| "grad_norm": 0.028069710358977318, | |
| "learning_rate": 0.0016909975669099759, | |
| "loss": 0.6652, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.4809953513809133, | |
| "grad_norm": 0.02627987042069435, | |
| "learning_rate": 0.001688564476885645, | |
| "loss": 0.7924, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.4831829368334701, | |
| "grad_norm": 0.005099075846374035, | |
| "learning_rate": 0.0016861313868613138, | |
| "loss": 0.75, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.4853705222860267, | |
| "grad_norm": 0.007156622130423784, | |
| "learning_rate": 0.0016836982968369831, | |
| "loss": 0.8034, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.4875581077385835, | |
| "grad_norm": 0.008162274025380611, | |
| "learning_rate": 0.0016812652068126522, | |
| "loss": 0.6174, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.4897456931911401, | |
| "grad_norm": 0.01390012539923191, | |
| "learning_rate": 0.001678832116788321, | |
| "loss": 0.7813, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.491933278643697, | |
| "grad_norm": 0.03663848340511322, | |
| "learning_rate": 0.0016763990267639902, | |
| "loss": 0.6028, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.4941208640962538, | |
| "grad_norm": 0.01389587577432394, | |
| "learning_rate": 0.0016739659367396595, | |
| "loss": 0.9186, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.4963084495488106, | |
| "grad_norm": 0.007214284967631102, | |
| "learning_rate": 0.0016715328467153284, | |
| "loss": 1.0112, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.4984960350013672, | |
| "grad_norm": 0.01086746621876955, | |
| "learning_rate": 0.0016690997566909975, | |
| "loss": 0.7628, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.500683620453924, | |
| "grad_norm": 0.006750196684151888, | |
| "learning_rate": 0.0016666666666666668, | |
| "loss": 0.8025, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.5028712059064806, | |
| "grad_norm": 0.012172271497547626, | |
| "learning_rate": 0.0016642335766423359, | |
| "loss": 0.7559, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.5050587913590374, | |
| "grad_norm": 0.03923722356557846, | |
| "learning_rate": 0.0016618004866180047, | |
| "loss": 0.8227, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.5072463768115942, | |
| "grad_norm": 0.020949123427271843, | |
| "learning_rate": 0.001659367396593674, | |
| "loss": 0.7272, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.509433962264151, | |
| "grad_norm": 0.012365633621811867, | |
| "learning_rate": 0.0016569343065693431, | |
| "loss": 0.9127, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.5116215477167076, | |
| "grad_norm": 0.012725708074867725, | |
| "learning_rate": 0.001654501216545012, | |
| "loss": 0.7576, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.5138091331692645, | |
| "grad_norm": 0.014691759832203388, | |
| "learning_rate": 0.0016520681265206813, | |
| "loss": 0.6951, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.515996718621821, | |
| "grad_norm": 0.009719770401716232, | |
| "learning_rate": 0.0016496350364963504, | |
| "loss": 0.6947, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.5181843040743779, | |
| "grad_norm": 0.0074682896956801414, | |
| "learning_rate": 0.0016472019464720193, | |
| "loss": 0.8467, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.5203718895269347, | |
| "grad_norm": 0.011303418315947056, | |
| "learning_rate": 0.0016447688564476886, | |
| "loss": 0.7453, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.5225594749794915, | |
| "grad_norm": 0.009616104885935783, | |
| "learning_rate": 0.0016423357664233577, | |
| "loss": 0.8284, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.524747060432048, | |
| "grad_norm": 0.004562855698168278, | |
| "learning_rate": 0.0016399026763990268, | |
| "loss": 0.776, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.5269346458846047, | |
| "grad_norm": 0.0057913740165531635, | |
| "learning_rate": 0.001637469586374696, | |
| "loss": 0.5635, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.5291222313371615, | |
| "grad_norm": 0.011465840972959995, | |
| "learning_rate": 0.001635036496350365, | |
| "loss": 0.7466, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.5313098167897183, | |
| "grad_norm": 0.009356693364679813, | |
| "learning_rate": 0.001632603406326034, | |
| "loss": 0.7555, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.5334974022422752, | |
| "grad_norm": 0.01132314745336771, | |
| "learning_rate": 0.0016301703163017034, | |
| "loss": 0.6987, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.535684987694832, | |
| "grad_norm": 0.011162355542182922, | |
| "learning_rate": 0.0016277372262773723, | |
| "loss": 0.7895, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.5378725731473886, | |
| "grad_norm": 0.008752882480621338, | |
| "learning_rate": 0.0016253041362530413, | |
| "loss": 0.7829, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.5400601585999452, | |
| "grad_norm": 0.0067902375012636185, | |
| "learning_rate": 0.0016228710462287107, | |
| "loss": 0.7541, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.542247744052502, | |
| "grad_norm": 0.010398069396615028, | |
| "learning_rate": 0.0016204379562043795, | |
| "loss": 0.84, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.5444353295050588, | |
| "grad_norm": 0.006489087361842394, | |
| "learning_rate": 0.0016180048661800486, | |
| "loss": 0.7745, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.5466229149576156, | |
| "grad_norm": 0.00789352972060442, | |
| "learning_rate": 0.001615571776155718, | |
| "loss": 0.7006, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.5488105004101724, | |
| "grad_norm": 0.005906807258725166, | |
| "learning_rate": 0.001613138686131387, | |
| "loss": 0.826, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.550998085862729, | |
| "grad_norm": 0.006026630289852619, | |
| "learning_rate": 0.001610705596107056, | |
| "loss": 0.6783, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.5531856713152856, | |
| "grad_norm": 0.010388746857643127, | |
| "learning_rate": 0.0016082725060827252, | |
| "loss": 0.8531, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.5553732567678424, | |
| "grad_norm": 0.01053705345839262, | |
| "learning_rate": 0.0016058394160583943, | |
| "loss": 0.7257, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.5575608422203993, | |
| "grad_norm": 0.006276300642639399, | |
| "learning_rate": 0.0016034063260340632, | |
| "loss": 0.7996, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.559748427672956, | |
| "grad_norm": 0.006276302970945835, | |
| "learning_rate": 0.0016009732360097325, | |
| "loss": 0.8443, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.5619360131255127, | |
| "grad_norm": 0.008509790524840355, | |
| "learning_rate": 0.0015985401459854016, | |
| "loss": 0.7289, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.5641235985780695, | |
| "grad_norm": 0.01978105679154396, | |
| "learning_rate": 0.0015961070559610705, | |
| "loss": 0.846, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.566311184030626, | |
| "grad_norm": 0.012076129205524921, | |
| "learning_rate": 0.0015936739659367398, | |
| "loss": 0.7292, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.568498769483183, | |
| "grad_norm": 0.01716456562280655, | |
| "learning_rate": 0.0015912408759124089, | |
| "loss": 0.7655, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.5706863549357397, | |
| "grad_norm": 0.016601664945483208, | |
| "learning_rate": 0.001588807785888078, | |
| "loss": 0.7277, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.5728739403882965, | |
| "grad_norm": 0.010958652012050152, | |
| "learning_rate": 0.0015863746958637468, | |
| "loss": 0.7392, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.5750615258408531, | |
| "grad_norm": 0.007287964224815369, | |
| "learning_rate": 0.0015839416058394161, | |
| "loss": 0.822, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.57724911129341, | |
| "grad_norm": 0.010577067732810974, | |
| "learning_rate": 0.0015815085158150852, | |
| "loss": 0.732, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.5794366967459665, | |
| "grad_norm": 0.007742591667920351, | |
| "learning_rate": 0.001579075425790754, | |
| "loss": 0.8312, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.5816242821985234, | |
| "grad_norm": 0.009659879840910435, | |
| "learning_rate": 0.0015766423357664234, | |
| "loss": 0.8213, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.5838118676510802, | |
| "grad_norm": 0.015149835497140884, | |
| "learning_rate": 0.0015742092457420925, | |
| "loss": 0.6992, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.585999453103637, | |
| "grad_norm": 0.007888193242251873, | |
| "learning_rate": 0.0015717761557177614, | |
| "loss": 0.8853, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.5881870385561936, | |
| "grad_norm": 0.011876450851559639, | |
| "learning_rate": 0.0015693430656934307, | |
| "loss": 0.7645, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.5903746240087502, | |
| "grad_norm": 0.015837261453270912, | |
| "learning_rate": 0.0015669099756690998, | |
| "loss": 0.8061, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.592562209461307, | |
| "grad_norm": 0.006944081746041775, | |
| "learning_rate": 0.0015644768856447687, | |
| "loss": 0.6043, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.5947497949138638, | |
| "grad_norm": 0.01456182450056076, | |
| "learning_rate": 0.0015620437956204382, | |
| "loss": 0.9343, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.5969373803664206, | |
| "grad_norm": 0.007655070163309574, | |
| "learning_rate": 0.001559610705596107, | |
| "loss": 0.727, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.5991249658189775, | |
| "grad_norm": 0.014365557581186295, | |
| "learning_rate": 0.0015571776155717761, | |
| "loss": 0.6884, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.601312551271534, | |
| "grad_norm": 0.013196627609431744, | |
| "learning_rate": 0.0015547445255474455, | |
| "loss": 0.6522, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.6035001367240906, | |
| "grad_norm": 0.0069740209728479385, | |
| "learning_rate": 0.0015523114355231143, | |
| "loss": 0.812, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.6056877221766475, | |
| "grad_norm": 0.018947165459394455, | |
| "learning_rate": 0.0015498783454987834, | |
| "loss": 0.7464, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.6078753076292043, | |
| "grad_norm": 0.02975570783019066, | |
| "learning_rate": 0.0015474452554744527, | |
| "loss": 1.0338, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.610062893081761, | |
| "grad_norm": 0.01144670695066452, | |
| "learning_rate": 0.0015450121654501216, | |
| "loss": 0.9582, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.612250478534318, | |
| "grad_norm": 0.08359838277101517, | |
| "learning_rate": 0.0015425790754257907, | |
| "loss": 0.6188, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.6144380639868745, | |
| "grad_norm": 0.005582269746810198, | |
| "learning_rate": 0.00154014598540146, | |
| "loss": 0.6557, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.616625649439431, | |
| "grad_norm": 0.008966202847659588, | |
| "learning_rate": 0.001537712895377129, | |
| "loss": 0.6564, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.618813234891988, | |
| "grad_norm": 0.011794374324381351, | |
| "learning_rate": 0.001535279805352798, | |
| "loss": 0.8051, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.6210008203445447, | |
| "grad_norm": 0.00766439875587821, | |
| "learning_rate": 0.0015328467153284673, | |
| "loss": 0.8145, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.6231884057971016, | |
| "grad_norm": 0.014379739761352539, | |
| "learning_rate": 0.0015304136253041364, | |
| "loss": 0.8658, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.6253759912496581, | |
| "grad_norm": 0.01025471929460764, | |
| "learning_rate": 0.0015279805352798053, | |
| "loss": 0.6969, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.627563576702215, | |
| "grad_norm": 0.012737879529595375, | |
| "learning_rate": 0.0015255474452554746, | |
| "loss": 0.9006, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.6297511621547716, | |
| "grad_norm": 0.0110158147290349, | |
| "learning_rate": 0.0015231143552311437, | |
| "loss": 0.7326, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.6319387476073284, | |
| "grad_norm": 0.011220619082450867, | |
| "learning_rate": 0.0015206812652068125, | |
| "loss": 0.8275, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.6341263330598852, | |
| "grad_norm": 0.00941223930567503, | |
| "learning_rate": 0.0015182481751824818, | |
| "loss": 0.8187, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.636313918512442, | |
| "grad_norm": 0.004144694656133652, | |
| "learning_rate": 0.001515815085158151, | |
| "loss": 0.7248, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.6385015039649986, | |
| "grad_norm": 0.013639383018016815, | |
| "learning_rate": 0.0015133819951338198, | |
| "loss": 0.7966, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.6406890894175554, | |
| "grad_norm": 0.006385320797562599, | |
| "learning_rate": 0.0015109489051094893, | |
| "loss": 0.6772, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.642876674870112, | |
| "grad_norm": 0.011585132218897343, | |
| "learning_rate": 0.0015085158150851582, | |
| "loss": 0.6696, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.6450642603226688, | |
| "grad_norm": 0.023672277107834816, | |
| "learning_rate": 0.0015060827250608273, | |
| "loss": 0.6978, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.6472518457752257, | |
| "grad_norm": 0.014683379791676998, | |
| "learning_rate": 0.0015036496350364966, | |
| "loss": 0.5735, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.6494394312277825, | |
| "grad_norm": 0.010881925001740456, | |
| "learning_rate": 0.0015012165450121655, | |
| "loss": 0.6773, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.651627016680339, | |
| "grad_norm": 0.009006233885884285, | |
| "learning_rate": 0.0014987834549878346, | |
| "loss": 0.7773, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.6538146021328957, | |
| "grad_norm": 0.01426916103810072, | |
| "learning_rate": 0.0014963503649635037, | |
| "loss": 0.7436, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.6560021875854525, | |
| "grad_norm": 0.005649265833199024, | |
| "learning_rate": 0.0014939172749391728, | |
| "loss": 0.7041, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.6581897730380093, | |
| "grad_norm": 0.008767529390752316, | |
| "learning_rate": 0.0014914841849148419, | |
| "loss": 0.6701, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.6603773584905661, | |
| "grad_norm": 0.007580756675451994, | |
| "learning_rate": 0.001489051094890511, | |
| "loss": 0.6593, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.662564943943123, | |
| "grad_norm": 0.010842681862413883, | |
| "learning_rate": 0.0014866180048661803, | |
| "loss": 0.9414, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.6647525293956795, | |
| "grad_norm": 0.008890979923307896, | |
| "learning_rate": 0.0014841849148418491, | |
| "loss": 0.8333, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.6669401148482361, | |
| "grad_norm": 0.00815370213240385, | |
| "learning_rate": 0.0014817518248175182, | |
| "loss": 0.8596, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.669127700300793, | |
| "grad_norm": 0.007434117142111063, | |
| "learning_rate": 0.0014793187347931875, | |
| "loss": 0.631, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 1.6713152857533498, | |
| "grad_norm": 0.007965626195073128, | |
| "learning_rate": 0.0014768856447688564, | |
| "loss": 0.7377, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.6735028712059066, | |
| "grad_norm": 0.014369670301675797, | |
| "learning_rate": 0.0014744525547445257, | |
| "loss": 0.6907, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.6756904566584632, | |
| "grad_norm": 0.013002739287912846, | |
| "learning_rate": 0.0014720194647201946, | |
| "loss": 0.8491, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.67787804211102, | |
| "grad_norm": 0.008742110803723335, | |
| "learning_rate": 0.0014695863746958637, | |
| "loss": 1.0319, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 1.6800656275635766, | |
| "grad_norm": 0.01362073328346014, | |
| "learning_rate": 0.001467153284671533, | |
| "loss": 0.596, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.6822532130161334, | |
| "grad_norm": 0.007842877879738808, | |
| "learning_rate": 0.0014647201946472019, | |
| "loss": 0.848, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 1.6844407984686902, | |
| "grad_norm": 0.007685767021030188, | |
| "learning_rate": 0.001462287104622871, | |
| "loss": 0.6811, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.686628383921247, | |
| "grad_norm": 0.07299596816301346, | |
| "learning_rate": 0.0014598540145985403, | |
| "loss": 0.7739, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.6888159693738036, | |
| "grad_norm": 0.02475287765264511, | |
| "learning_rate": 0.0014574209245742091, | |
| "loss": 0.8412, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.6910035548263604, | |
| "grad_norm": 0.02310485951602459, | |
| "learning_rate": 0.0014549878345498785, | |
| "loss": 0.7707, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.693191140278917, | |
| "grad_norm": 0.006614830810576677, | |
| "learning_rate": 0.0014525547445255475, | |
| "loss": 0.9116, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.6953787257314739, | |
| "grad_norm": 0.017114151269197464, | |
| "learning_rate": 0.0014501216545012164, | |
| "loss": 0.7767, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.6975663111840307, | |
| "grad_norm": 0.007972135208547115, | |
| "learning_rate": 0.0014476885644768857, | |
| "loss": 0.8053, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.6997538966365875, | |
| "grad_norm": 0.013452711515128613, | |
| "learning_rate": 0.0014452554744525548, | |
| "loss": 0.633, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.701941482089144, | |
| "grad_norm": 0.01562053058296442, | |
| "learning_rate": 0.001442822384428224, | |
| "loss": 0.8312, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.7041290675417007, | |
| "grad_norm": 0.006510770879685879, | |
| "learning_rate": 0.001440389294403893, | |
| "loss": 0.7721, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.7063166529942575, | |
| "grad_norm": 0.011892448179423809, | |
| "learning_rate": 0.001437956204379562, | |
| "loss": 0.6629, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.7085042384468143, | |
| "grad_norm": 0.005237538833171129, | |
| "learning_rate": 0.0014355231143552312, | |
| "loss": 0.5767, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.7106918238993711, | |
| "grad_norm": 0.020627424120903015, | |
| "learning_rate": 0.0014330900243309003, | |
| "loss": 0.8974, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.712879409351928, | |
| "grad_norm": 0.012742357328534126, | |
| "learning_rate": 0.0014306569343065694, | |
| "loss": 0.5843, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.7150669948044845, | |
| "grad_norm": 0.011114447377622128, | |
| "learning_rate": 0.0014282238442822385, | |
| "loss": 0.9336, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.7172545802570411, | |
| "grad_norm": 0.01212508138269186, | |
| "learning_rate": 0.0014257907542579076, | |
| "loss": 0.853, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.719442165709598, | |
| "grad_norm": 0.006842518225312233, | |
| "learning_rate": 0.0014233576642335767, | |
| "loss": 0.8329, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.7216297511621548, | |
| "grad_norm": 0.008684076368808746, | |
| "learning_rate": 0.0014209245742092457, | |
| "loss": 0.8503, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 1.7238173366147116, | |
| "grad_norm": 0.009845465421676636, | |
| "learning_rate": 0.0014184914841849148, | |
| "loss": 0.9911, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.7260049220672684, | |
| "grad_norm": 0.007301978301256895, | |
| "learning_rate": 0.001416058394160584, | |
| "loss": 0.6684, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 1.728192507519825, | |
| "grad_norm": 0.010263817384839058, | |
| "learning_rate": 0.001413625304136253, | |
| "loss": 0.6852, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.7303800929723816, | |
| "grad_norm": 0.012078475207090378, | |
| "learning_rate": 0.0014111922141119221, | |
| "loss": 0.6509, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 1.7325676784249384, | |
| "grad_norm": 0.012108572758734226, | |
| "learning_rate": 0.0014087591240875912, | |
| "loss": 0.7183, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.7347552638774952, | |
| "grad_norm": 0.011477826163172722, | |
| "learning_rate": 0.0014063260340632603, | |
| "loss": 0.8856, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 1.736942849330052, | |
| "grad_norm": 0.007066864520311356, | |
| "learning_rate": 0.0014038929440389296, | |
| "loss": 0.6114, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.7391304347826086, | |
| "grad_norm": 0.011538154445588589, | |
| "learning_rate": 0.0014014598540145985, | |
| "loss": 0.6716, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.7413180202351655, | |
| "grad_norm": 0.008611057884991169, | |
| "learning_rate": 0.0013990267639902676, | |
| "loss": 0.9979, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.743505605687722, | |
| "grad_norm": 0.013740317896008492, | |
| "learning_rate": 0.0013965936739659369, | |
| "loss": 0.8166, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 1.7456931911402789, | |
| "grad_norm": 0.008636080659925938, | |
| "learning_rate": 0.0013941605839416058, | |
| "loss": 0.8138, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.7478807765928357, | |
| "grad_norm": 0.008637238293886185, | |
| "learning_rate": 0.001391727493917275, | |
| "loss": 0.9225, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 1.7500683620453925, | |
| "grad_norm": 0.022517461329698563, | |
| "learning_rate": 0.0013892944038929442, | |
| "loss": 0.735, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.752255947497949, | |
| "grad_norm": 0.005302282981574535, | |
| "learning_rate": 0.001386861313868613, | |
| "loss": 0.657, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 1.754443532950506, | |
| "grad_norm": 0.04943990707397461, | |
| "learning_rate": 0.0013844282238442824, | |
| "loss": 0.623, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.7566311184030625, | |
| "grad_norm": 0.011758695356547832, | |
| "learning_rate": 0.0013819951338199512, | |
| "loss": 0.8038, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 1.7588187038556193, | |
| "grad_norm": 0.009712104685604572, | |
| "learning_rate": 0.0013795620437956205, | |
| "loss": 0.7268, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.7610062893081762, | |
| "grad_norm": 0.007741864304989576, | |
| "learning_rate": 0.0013771289537712896, | |
| "loss": 0.7049, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.763193874760733, | |
| "grad_norm": 0.010713865980505943, | |
| "learning_rate": 0.0013746958637469585, | |
| "loss": 0.6425, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.7653814602132896, | |
| "grad_norm": 0.006576141808182001, | |
| "learning_rate": 0.0013722627737226278, | |
| "loss": 0.7601, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 1.7675690456658462, | |
| "grad_norm": 0.007796050515025854, | |
| "learning_rate": 0.001369829683698297, | |
| "loss": 0.659, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.769756631118403, | |
| "grad_norm": 0.01460753008723259, | |
| "learning_rate": 0.001367396593673966, | |
| "loss": 0.769, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 1.7719442165709598, | |
| "grad_norm": 0.010747969150543213, | |
| "learning_rate": 0.001364963503649635, | |
| "loss": 0.8531, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.7741318020235166, | |
| "grad_norm": 0.011500733904540539, | |
| "learning_rate": 0.0013625304136253042, | |
| "loss": 0.7294, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 1.7763193874760734, | |
| "grad_norm": 0.013433235697448254, | |
| "learning_rate": 0.0013600973236009733, | |
| "loss": 0.6442, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.77850697292863, | |
| "grad_norm": 0.019317343831062317, | |
| "learning_rate": 0.0013576642335766424, | |
| "loss": 0.6254, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 1.7806945583811866, | |
| "grad_norm": 0.020062780007719994, | |
| "learning_rate": 0.0013552311435523115, | |
| "loss": 0.6957, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.7828821438337434, | |
| "grad_norm": 0.00756926229223609, | |
| "learning_rate": 0.0013527980535279806, | |
| "loss": 0.7532, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.7850697292863003, | |
| "grad_norm": 0.0089380769059062, | |
| "learning_rate": 0.0013503649635036496, | |
| "loss": 0.6534, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.787257314738857, | |
| "grad_norm": 0.006980338133871555, | |
| "learning_rate": 0.0013479318734793187, | |
| "loss": 0.7314, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 1.789444900191414, | |
| "grad_norm": 0.0074529629200696945, | |
| "learning_rate": 0.0013454987834549878, | |
| "loss": 0.8291, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.7916324856439705, | |
| "grad_norm": 0.02699979580938816, | |
| "learning_rate": 0.001343065693430657, | |
| "loss": 0.7249, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 1.793820071096527, | |
| "grad_norm": 0.008204830810427666, | |
| "learning_rate": 0.001340632603406326, | |
| "loss": 0.7446, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.796007656549084, | |
| "grad_norm": 0.006959575694054365, | |
| "learning_rate": 0.001338199513381995, | |
| "loss": 0.6694, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 1.7981952420016407, | |
| "grad_norm": 0.006019539665430784, | |
| "learning_rate": 0.0013357664233576642, | |
| "loss": 0.7947, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.8003828274541975, | |
| "grad_norm": 0.007515772711485624, | |
| "learning_rate": 0.0013333333333333333, | |
| "loss": 0.6259, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 1.8025704129067541, | |
| "grad_norm": 0.0231679268181324, | |
| "learning_rate": 0.0013309002433090024, | |
| "loss": 0.5702, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.804757998359311, | |
| "grad_norm": 0.009831500239670277, | |
| "learning_rate": 0.0013284671532846717, | |
| "loss": 0.7197, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.8069455838118675, | |
| "grad_norm": 0.011389415711164474, | |
| "learning_rate": 0.0013260340632603406, | |
| "loss": 0.8466, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.8091331692644244, | |
| "grad_norm": 0.010654733516275883, | |
| "learning_rate": 0.0013236009732360097, | |
| "loss": 0.7456, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 1.8113207547169812, | |
| "grad_norm": 0.010770871303975582, | |
| "learning_rate": 0.001321167883211679, | |
| "loss": 0.6827, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.813508340169538, | |
| "grad_norm": 0.00828484632074833, | |
| "learning_rate": 0.0013187347931873478, | |
| "loss": 0.6794, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 1.8156959256220946, | |
| "grad_norm": 0.00973398145288229, | |
| "learning_rate": 0.0013163017031630172, | |
| "loss": 0.7354, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.8178835110746514, | |
| "grad_norm": 0.00983220711350441, | |
| "learning_rate": 0.0013138686131386862, | |
| "loss": 0.8531, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 1.820071096527208, | |
| "grad_norm": 0.02620159089565277, | |
| "learning_rate": 0.0013114355231143551, | |
| "loss": 0.7631, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.8222586819797648, | |
| "grad_norm": 0.057880647480487823, | |
| "learning_rate": 0.0013090024330900244, | |
| "loss": 0.9336, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 1.8244462674323216, | |
| "grad_norm": 0.011240589432418346, | |
| "learning_rate": 0.0013065693430656935, | |
| "loss": 0.5887, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.8266338528848785, | |
| "grad_norm": 0.012356660328805447, | |
| "learning_rate": 0.0013041362530413626, | |
| "loss": 0.702, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.828821438337435, | |
| "grad_norm": 0.006840168032795191, | |
| "learning_rate": 0.0013017031630170317, | |
| "loss": 0.756, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.8310090237899916, | |
| "grad_norm": 0.005550102796405554, | |
| "learning_rate": 0.0012992700729927008, | |
| "loss": 0.7161, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 1.8331966092425485, | |
| "grad_norm": 0.0120685501024127, | |
| "learning_rate": 0.0012968369829683699, | |
| "loss": 0.9234, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.8353841946951053, | |
| "grad_norm": 0.008514792658388615, | |
| "learning_rate": 0.001294403892944039, | |
| "loss": 0.5988, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 1.837571780147662, | |
| "grad_norm": 0.019344119355082512, | |
| "learning_rate": 0.001291970802919708, | |
| "loss": 0.8419, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.839759365600219, | |
| "grad_norm": 0.01257373858243227, | |
| "learning_rate": 0.0012895377128953772, | |
| "loss": 0.6785, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 1.8419469510527755, | |
| "grad_norm": 0.022899962961673737, | |
| "learning_rate": 0.0012871046228710463, | |
| "loss": 0.6617, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.844134536505332, | |
| "grad_norm": 0.012275392189621925, | |
| "learning_rate": 0.0012846715328467154, | |
| "loss": 0.8096, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 1.846322121957889, | |
| "grad_norm": 0.01191315334290266, | |
| "learning_rate": 0.0012822384428223844, | |
| "loss": 0.7757, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.8485097074104457, | |
| "grad_norm": 0.012164206244051456, | |
| "learning_rate": 0.0012798053527980535, | |
| "loss": 0.7284, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.8506972928630026, | |
| "grad_norm": 0.007747825235128403, | |
| "learning_rate": 0.0012773722627737226, | |
| "loss": 0.673, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.8528848783155591, | |
| "grad_norm": 0.01633123680949211, | |
| "learning_rate": 0.0012749391727493917, | |
| "loss": 0.6006, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 1.855072463768116, | |
| "grad_norm": 0.008600953966379166, | |
| "learning_rate": 0.0012725060827250608, | |
| "loss": 0.7354, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.8572600492206726, | |
| "grad_norm": 0.008487503044307232, | |
| "learning_rate": 0.00127007299270073, | |
| "loss": 0.689, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 1.8594476346732294, | |
| "grad_norm": 0.01615467295050621, | |
| "learning_rate": 0.001267639902676399, | |
| "loss": 0.7461, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.8616352201257862, | |
| "grad_norm": 0.008541187271475792, | |
| "learning_rate": 0.0012652068126520683, | |
| "loss": 0.6958, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 1.863822805578343, | |
| "grad_norm": 0.01053849421441555, | |
| "learning_rate": 0.0012627737226277372, | |
| "loss": 0.6786, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.8660103910308996, | |
| "grad_norm": 0.008857163600623608, | |
| "learning_rate": 0.0012603406326034063, | |
| "loss": 0.6645, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 1.8681979764834564, | |
| "grad_norm": 0.006793574895709753, | |
| "learning_rate": 0.0012579075425790756, | |
| "loss": 0.6311, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.870385561936013, | |
| "grad_norm": 0.01936703361570835, | |
| "learning_rate": 0.0012554744525547445, | |
| "loss": 0.9318, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.8725731473885698, | |
| "grad_norm": 0.009839971549808979, | |
| "learning_rate": 0.0012530413625304138, | |
| "loss": 0.7309, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.8747607328411267, | |
| "grad_norm": 0.010399356484413147, | |
| "learning_rate": 0.0012506082725060829, | |
| "loss": 0.8351, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 1.8769483182936835, | |
| "grad_norm": 0.014294488355517387, | |
| "learning_rate": 0.0012481751824817517, | |
| "loss": 0.6187, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.87913590374624, | |
| "grad_norm": 0.011614672839641571, | |
| "learning_rate": 0.001245742092457421, | |
| "loss": 0.9295, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 1.8813234891987969, | |
| "grad_norm": 0.015355818904936314, | |
| "learning_rate": 0.00124330900243309, | |
| "loss": 0.5266, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.8835110746513535, | |
| "grad_norm": 0.011674858629703522, | |
| "learning_rate": 0.0012408759124087592, | |
| "loss": 0.6467, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 1.8856986601039103, | |
| "grad_norm": 0.013345809653401375, | |
| "learning_rate": 0.0012384428223844283, | |
| "loss": 0.8166, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.8878862455564671, | |
| "grad_norm": 0.009595265612006187, | |
| "learning_rate": 0.0012360097323600972, | |
| "loss": 0.7704, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 1.890073831009024, | |
| "grad_norm": 0.01896647922694683, | |
| "learning_rate": 0.0012335766423357665, | |
| "loss": 0.7815, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.8922614164615805, | |
| "grad_norm": 0.017639558762311935, | |
| "learning_rate": 0.0012311435523114356, | |
| "loss": 0.7979, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.8944490019141371, | |
| "grad_norm": 0.022902049124240875, | |
| "learning_rate": 0.0012287104622871047, | |
| "loss": 0.8904, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.896636587366694, | |
| "grad_norm": 0.0124649154022336, | |
| "learning_rate": 0.0012262773722627738, | |
| "loss": 0.7693, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 1.8988241728192508, | |
| "grad_norm": 0.007474742829799652, | |
| "learning_rate": 0.0012238442822384429, | |
| "loss": 0.6641, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.9010117582718076, | |
| "grad_norm": 0.008987569250166416, | |
| "learning_rate": 0.001221411192214112, | |
| "loss": 0.6378, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 1.9031993437243644, | |
| "grad_norm": 0.009300309233367443, | |
| "learning_rate": 0.001218978102189781, | |
| "loss": 0.7426, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.905386929176921, | |
| "grad_norm": 0.01408142875880003, | |
| "learning_rate": 0.0012165450121654502, | |
| "loss": 0.7824, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.9075745146294776, | |
| "grad_norm": 0.00678917346522212, | |
| "learning_rate": 0.0012141119221411192, | |
| "loss": 0.7978, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.9097621000820344, | |
| "grad_norm": 0.010661943815648556, | |
| "learning_rate": 0.0012116788321167883, | |
| "loss": 0.6591, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 1.9119496855345912, | |
| "grad_norm": 0.009882554411888123, | |
| "learning_rate": 0.0012092457420924574, | |
| "loss": 0.7443, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.914137270987148, | |
| "grad_norm": 0.12100229412317276, | |
| "learning_rate": 0.0012068126520681265, | |
| "loss": 0.8035, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.9163248564397046, | |
| "grad_norm": 0.01500593964010477, | |
| "learning_rate": 0.0012043795620437956, | |
| "loss": 0.8671, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.9185124418922614, | |
| "grad_norm": 0.01351536437869072, | |
| "learning_rate": 0.0012019464720194647, | |
| "loss": 0.824, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 1.920700027344818, | |
| "grad_norm": 0.02334493212401867, | |
| "learning_rate": 0.0011995133819951338, | |
| "loss": 0.7728, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.9228876127973749, | |
| "grad_norm": 0.04414600878953934, | |
| "learning_rate": 0.001197080291970803, | |
| "loss": 0.7811, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.9250751982499317, | |
| "grad_norm": 0.03064621239900589, | |
| "learning_rate": 0.001194647201946472, | |
| "loss": 0.8812, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.9272627837024885, | |
| "grad_norm": 0.010438323952257633, | |
| "learning_rate": 0.001192214111922141, | |
| "loss": 0.8027, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 1.929450369155045, | |
| "grad_norm": 0.016364533454179764, | |
| "learning_rate": 0.0011897810218978104, | |
| "loss": 0.6239, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.931637954607602, | |
| "grad_norm": 0.02069861628115177, | |
| "learning_rate": 0.0011873479318734793, | |
| "loss": 0.8137, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 1.9338255400601585, | |
| "grad_norm": 0.017191501334309578, | |
| "learning_rate": 0.0011849148418491484, | |
| "loss": 0.8052, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.9360131255127153, | |
| "grad_norm": 0.014077574014663696, | |
| "learning_rate": 0.0011824817518248177, | |
| "loss": 0.8584, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.9382007109652721, | |
| "grad_norm": 0.009209788404405117, | |
| "learning_rate": 0.0011800486618004865, | |
| "loss": 0.6426, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.940388296417829, | |
| "grad_norm": 0.026021014899015427, | |
| "learning_rate": 0.0011776155717761558, | |
| "loss": 0.7457, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 1.9425758818703855, | |
| "grad_norm": 0.024019265547394753, | |
| "learning_rate": 0.001175182481751825, | |
| "loss": 0.869, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.9447634673229421, | |
| "grad_norm": 0.020230406895279884, | |
| "learning_rate": 0.0011727493917274938, | |
| "loss": 0.8532, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 1.946951052775499, | |
| "grad_norm": 0.018076736479997635, | |
| "learning_rate": 0.0011703163017031631, | |
| "loss": 0.7276, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.9491386382280558, | |
| "grad_norm": 0.019679049029946327, | |
| "learning_rate": 0.0011678832116788322, | |
| "loss": 0.7214, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 1.9513262236806126, | |
| "grad_norm": 0.010772393085062504, | |
| "learning_rate": 0.0011654501216545013, | |
| "loss": 0.6786, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.9535138091331694, | |
| "grad_norm": 0.010874917730689049, | |
| "learning_rate": 0.0011630170316301704, | |
| "loss": 0.7272, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 1.955701394585726, | |
| "grad_norm": 0.00815314520150423, | |
| "learning_rate": 0.0011605839416058395, | |
| "loss": 0.8908, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.9578889800382826, | |
| "grad_norm": 0.008539310656487942, | |
| "learning_rate": 0.0011581508515815086, | |
| "loss": 0.6394, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.9600765654908394, | |
| "grad_norm": 0.039017412811517715, | |
| "learning_rate": 0.0011557177615571777, | |
| "loss": 0.6505, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.9622641509433962, | |
| "grad_norm": 0.009175320155918598, | |
| "learning_rate": 0.0011532846715328468, | |
| "loss": 0.975, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 1.964451736395953, | |
| "grad_norm": 0.014542749151587486, | |
| "learning_rate": 0.0011508515815085159, | |
| "loss": 0.7222, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.9666393218485099, | |
| "grad_norm": 0.01856316812336445, | |
| "learning_rate": 0.001148418491484185, | |
| "loss": 0.7575, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 1.9688269073010665, | |
| "grad_norm": 0.007601718418300152, | |
| "learning_rate": 0.001145985401459854, | |
| "loss": 0.7233, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.971014492753623, | |
| "grad_norm": 0.034239862114191055, | |
| "learning_rate": 0.0011435523114355231, | |
| "loss": 0.6989, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 1.9732020782061799, | |
| "grad_norm": 0.00851233210414648, | |
| "learning_rate": 0.0011411192214111922, | |
| "loss": 0.8321, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.9753896636587367, | |
| "grad_norm": 0.009412054903805256, | |
| "learning_rate": 0.0011386861313868613, | |
| "loss": 0.7139, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 1.9775772491112935, | |
| "grad_norm": 0.012049161829054356, | |
| "learning_rate": 0.0011362530413625304, | |
| "loss": 0.6989, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.97976483456385, | |
| "grad_norm": 0.010931652970612049, | |
| "learning_rate": 0.0011338199513381995, | |
| "loss": 0.8747, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.981952420016407, | |
| "grad_norm": 0.015494965016841888, | |
| "learning_rate": 0.0011313868613138686, | |
| "loss": 0.8644, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.9841400054689635, | |
| "grad_norm": 0.012480970472097397, | |
| "learning_rate": 0.0011289537712895377, | |
| "loss": 0.907, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 1.9863275909215203, | |
| "grad_norm": 0.01492912694811821, | |
| "learning_rate": 0.001126520681265207, | |
| "loss": 0.7421, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.9885151763740772, | |
| "grad_norm": 0.012027468532323837, | |
| "learning_rate": 0.0011240875912408759, | |
| "loss": 0.9274, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 1.990702761826634, | |
| "grad_norm": 0.014835814945399761, | |
| "learning_rate": 0.001121654501216545, | |
| "loss": 0.8337, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.9928903472791906, | |
| "grad_norm": 0.008667545393109322, | |
| "learning_rate": 0.0011192214111922143, | |
| "loss": 0.6117, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 1.9950779327317474, | |
| "grad_norm": 0.01624200865626335, | |
| "learning_rate": 0.0011167883211678832, | |
| "loss": 0.8712, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.997265518184304, | |
| "grad_norm": 0.008188914507627487, | |
| "learning_rate": 0.0011143552311435525, | |
| "loss": 0.8495, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 1.9994531036368608, | |
| "grad_norm": 0.013718970119953156, | |
| "learning_rate": 0.0011119221411192213, | |
| "loss": 0.8417, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.0016406890894176, | |
| "grad_norm": 0.00691073015332222, | |
| "learning_rate": 0.0011094890510948904, | |
| "loss": 0.7033, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 2.0038282745419744, | |
| "grad_norm": 0.017321942374110222, | |
| "learning_rate": 0.0011070559610705597, | |
| "loss": 0.7979, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.0060158599945312, | |
| "grad_norm": 0.007781198713928461, | |
| "learning_rate": 0.0011046228710462286, | |
| "loss": 0.6795, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 2.0082034454470876, | |
| "grad_norm": 0.007755633443593979, | |
| "learning_rate": 0.001102189781021898, | |
| "loss": 0.6363, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.0103910308996444, | |
| "grad_norm": 0.015355097129940987, | |
| "learning_rate": 0.001099756690997567, | |
| "loss": 0.7684, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 2.0125786163522013, | |
| "grad_norm": 0.009972341358661652, | |
| "learning_rate": 0.001097323600973236, | |
| "loss": 0.7659, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.014766201804758, | |
| "grad_norm": 0.00998846534639597, | |
| "learning_rate": 0.0010948905109489052, | |
| "loss": 0.918, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 2.016953787257315, | |
| "grad_norm": 0.007050537038594484, | |
| "learning_rate": 0.0010924574209245743, | |
| "loss": 0.7083, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.0191413727098713, | |
| "grad_norm": 0.008426625281572342, | |
| "learning_rate": 0.0010900243309002432, | |
| "loss": 0.7962, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 2.021328958162428, | |
| "grad_norm": 0.009424027986824512, | |
| "learning_rate": 0.0010875912408759125, | |
| "loss": 0.7369, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.023516543614985, | |
| "grad_norm": 0.012517026625573635, | |
| "learning_rate": 0.0010851581508515816, | |
| "loss": 0.8281, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 2.0257041290675417, | |
| "grad_norm": 0.016427017748355865, | |
| "learning_rate": 0.0010827250608272507, | |
| "loss": 0.7808, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.0278917145200985, | |
| "grad_norm": 0.011162400245666504, | |
| "learning_rate": 0.0010802919708029198, | |
| "loss": 0.8512, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 2.0300792999726553, | |
| "grad_norm": 0.025822371244430542, | |
| "learning_rate": 0.0010778588807785888, | |
| "loss": 0.6347, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.0322668854252117, | |
| "grad_norm": 0.008243129588663578, | |
| "learning_rate": 0.001075425790754258, | |
| "loss": 0.7126, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 2.0344544708777685, | |
| "grad_norm": 0.01245404314249754, | |
| "learning_rate": 0.001072992700729927, | |
| "loss": 0.6111, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.0366420563303254, | |
| "grad_norm": 0.006443020887672901, | |
| "learning_rate": 0.0010705596107055961, | |
| "loss": 0.6287, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 2.038829641782882, | |
| "grad_norm": 0.01358412578701973, | |
| "learning_rate": 0.0010681265206812652, | |
| "loss": 1.0563, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.041017227235439, | |
| "grad_norm": 0.010836120694875717, | |
| "learning_rate": 0.0010656934306569343, | |
| "loss": 0.7046, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 2.043204812687996, | |
| "grad_norm": 0.012488581240177155, | |
| "learning_rate": 0.0010632603406326034, | |
| "loss": 0.8661, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.045392398140552, | |
| "grad_norm": 0.009522946551442146, | |
| "learning_rate": 0.0010608272506082725, | |
| "loss": 0.6687, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 2.047579983593109, | |
| "grad_norm": 0.03695467486977577, | |
| "learning_rate": 0.0010583941605839416, | |
| "loss": 0.7727, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.049767569045666, | |
| "grad_norm": 0.04616512730717659, | |
| "learning_rate": 0.0010559610705596107, | |
| "loss": 0.7193, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 2.0519551544982226, | |
| "grad_norm": 0.010503578931093216, | |
| "learning_rate": 0.0010535279805352798, | |
| "loss": 0.6701, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.0541427399507795, | |
| "grad_norm": 0.008623762056231499, | |
| "learning_rate": 0.001051094890510949, | |
| "loss": 0.7161, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 2.0563303254033363, | |
| "grad_norm": 0.007583661004900932, | |
| "learning_rate": 0.001048661800486618, | |
| "loss": 0.7402, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.0585179108558926, | |
| "grad_norm": 0.008966002613306046, | |
| "learning_rate": 0.001046228710462287, | |
| "loss": 0.7016, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 2.0607054963084495, | |
| "grad_norm": 0.0104443971067667, | |
| "learning_rate": 0.0010437956204379564, | |
| "loss": 0.7877, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.0628930817610063, | |
| "grad_norm": 0.011073727160692215, | |
| "learning_rate": 0.0010413625304136252, | |
| "loss": 0.8216, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 2.065080667213563, | |
| "grad_norm": 0.006104661151766777, | |
| "learning_rate": 0.0010389294403892943, | |
| "loss": 0.7218, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.06726825266612, | |
| "grad_norm": 0.006152690388262272, | |
| "learning_rate": 0.0010364963503649636, | |
| "loss": 0.6807, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 2.0694558381186763, | |
| "grad_norm": 0.01146136224269867, | |
| "learning_rate": 0.0010340632603406325, | |
| "loss": 0.8706, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.071643423571233, | |
| "grad_norm": 0.008924251422286034, | |
| "learning_rate": 0.0010316301703163018, | |
| "loss": 0.7596, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 2.07383100902379, | |
| "grad_norm": 0.01587800122797489, | |
| "learning_rate": 0.001029197080291971, | |
| "loss": 0.8315, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.0760185944763467, | |
| "grad_norm": 0.007868033833801746, | |
| "learning_rate": 0.0010267639902676398, | |
| "loss": 0.8498, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 2.0782061799289036, | |
| "grad_norm": 0.009655119851231575, | |
| "learning_rate": 0.001024330900243309, | |
| "loss": 0.909, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.0803937653814604, | |
| "grad_norm": 0.014302834868431091, | |
| "learning_rate": 0.001021897810218978, | |
| "loss": 0.8934, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 2.0825813508340167, | |
| "grad_norm": 0.008887048810720444, | |
| "learning_rate": 0.0010194647201946473, | |
| "loss": 0.62, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.0847689362865736, | |
| "grad_norm": 0.016339240595698357, | |
| "learning_rate": 0.0010170316301703164, | |
| "loss": 0.7503, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 2.0869565217391304, | |
| "grad_norm": 0.013042870908975601, | |
| "learning_rate": 0.0010145985401459853, | |
| "loss": 0.7425, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.089144107191687, | |
| "grad_norm": 0.009357294999063015, | |
| "learning_rate": 0.0010121654501216546, | |
| "loss": 0.7565, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 2.091331692644244, | |
| "grad_norm": 0.008100231178104877, | |
| "learning_rate": 0.0010097323600973237, | |
| "loss": 0.659, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.093519278096801, | |
| "grad_norm": 0.008745480328798294, | |
| "learning_rate": 0.0010072992700729927, | |
| "loss": 0.6722, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 2.095706863549357, | |
| "grad_norm": 0.02181909792125225, | |
| "learning_rate": 0.0010048661800486618, | |
| "loss": 0.7497, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.097894449001914, | |
| "grad_norm": 0.005593753885477781, | |
| "learning_rate": 0.001002433090024331, | |
| "loss": 0.6413, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 2.100082034454471, | |
| "grad_norm": 0.0110318623483181, | |
| "learning_rate": 0.001, | |
| "loss": 0.7437, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.1022696199070277, | |
| "grad_norm": 0.07487611472606659, | |
| "learning_rate": 0.0009975669099756691, | |
| "loss": 0.8967, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 2.1044572053595845, | |
| "grad_norm": 0.011572844348847866, | |
| "learning_rate": 0.0009951338199513382, | |
| "loss": 0.7016, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.1066447908121413, | |
| "grad_norm": 0.0219709649682045, | |
| "learning_rate": 0.0009927007299270073, | |
| "loss": 0.7582, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 2.1088323762646977, | |
| "grad_norm": 0.014250703155994415, | |
| "learning_rate": 0.0009902676399026764, | |
| "loss": 0.6485, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.1110199617172545, | |
| "grad_norm": 0.010836089961230755, | |
| "learning_rate": 0.0009878345498783455, | |
| "loss": 0.7457, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 2.1132075471698113, | |
| "grad_norm": 0.010538347065448761, | |
| "learning_rate": 0.0009854014598540146, | |
| "loss": 0.7283, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.115395132622368, | |
| "grad_norm": 0.011399851180613041, | |
| "learning_rate": 0.0009829683698296837, | |
| "loss": 0.6896, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 2.117582718074925, | |
| "grad_norm": 0.027435095980763435, | |
| "learning_rate": 0.000980535279805353, | |
| "loss": 0.9376, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.1197703035274817, | |
| "grad_norm": 0.00705757224932313, | |
| "learning_rate": 0.0009781021897810219, | |
| "loss": 0.7243, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 2.121957888980038, | |
| "grad_norm": 0.0098995016887784, | |
| "learning_rate": 0.000975669099756691, | |
| "loss": 0.7931, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.124145474432595, | |
| "grad_norm": 0.011125714518129826, | |
| "learning_rate": 0.00097323600973236, | |
| "loss": 0.6044, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 2.1263330598851518, | |
| "grad_norm": 0.009387229569256306, | |
| "learning_rate": 0.0009708029197080292, | |
| "loss": 0.7187, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.1285206453377086, | |
| "grad_norm": 0.01129234954714775, | |
| "learning_rate": 0.0009683698296836983, | |
| "loss": 0.8324, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 2.1307082307902654, | |
| "grad_norm": 0.011272157542407513, | |
| "learning_rate": 0.0009659367396593673, | |
| "loss": 0.7128, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.132895816242822, | |
| "grad_norm": 0.010409243404865265, | |
| "learning_rate": 0.0009635036496350365, | |
| "loss": 0.7535, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 2.1350834016953786, | |
| "grad_norm": 0.00857408158481121, | |
| "learning_rate": 0.0009610705596107057, | |
| "loss": 0.8129, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.1372709871479354, | |
| "grad_norm": 0.014548208564519882, | |
| "learning_rate": 0.0009586374695863747, | |
| "loss": 0.676, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 2.139458572600492, | |
| "grad_norm": 0.016449380666017532, | |
| "learning_rate": 0.0009562043795620438, | |
| "loss": 0.7384, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.141646158053049, | |
| "grad_norm": 0.007109857629984617, | |
| "learning_rate": 0.000953771289537713, | |
| "loss": 0.6808, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 2.143833743505606, | |
| "grad_norm": 0.009979904629290104, | |
| "learning_rate": 0.000951338199513382, | |
| "loss": 0.6907, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.146021328958162, | |
| "grad_norm": 0.008424636907875538, | |
| "learning_rate": 0.0009489051094890511, | |
| "loss": 0.7423, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 2.148208914410719, | |
| "grad_norm": 0.01054910384118557, | |
| "learning_rate": 0.0009464720194647203, | |
| "loss": 0.6611, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.150396499863276, | |
| "grad_norm": 0.0084614809602499, | |
| "learning_rate": 0.0009440389294403893, | |
| "loss": 0.7548, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 2.1525840853158327, | |
| "grad_norm": 0.008796039037406445, | |
| "learning_rate": 0.0009416058394160585, | |
| "loss": 0.9042, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.1547716707683895, | |
| "grad_norm": 0.011639994569122791, | |
| "learning_rate": 0.0009391727493917275, | |
| "loss": 0.6474, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 2.1569592562209463, | |
| "grad_norm": 0.011916186660528183, | |
| "learning_rate": 0.0009367396593673965, | |
| "loss": 0.7848, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.1591468416735027, | |
| "grad_norm": 0.01620625890791416, | |
| "learning_rate": 0.0009343065693430657, | |
| "loss": 0.7924, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 2.1613344271260595, | |
| "grad_norm": 0.008310189470648766, | |
| "learning_rate": 0.0009318734793187349, | |
| "loss": 0.8015, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.1635220125786163, | |
| "grad_norm": 0.008162159472703934, | |
| "learning_rate": 0.0009294403892944039, | |
| "loss": 0.8261, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 2.165709598031173, | |
| "grad_norm": 0.009289762936532497, | |
| "learning_rate": 0.000927007299270073, | |
| "loss": 0.8676, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.16789718348373, | |
| "grad_norm": 0.007392804138362408, | |
| "learning_rate": 0.000924574209245742, | |
| "loss": 0.6025, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 2.1700847689362868, | |
| "grad_norm": 0.008378117345273495, | |
| "learning_rate": 0.0009221411192214112, | |
| "loss": 0.5951, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.172272354388843, | |
| "grad_norm": 0.037044674158096313, | |
| "learning_rate": 0.0009197080291970804, | |
| "loss": 0.7454, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 2.1744599398414, | |
| "grad_norm": 0.01427681464701891, | |
| "learning_rate": 0.0009172749391727494, | |
| "loss": 0.5663, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.1766475252939568, | |
| "grad_norm": 0.010998294688761234, | |
| "learning_rate": 0.0009148418491484185, | |
| "loss": 0.9058, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 2.1788351107465136, | |
| "grad_norm": 0.007977189496159554, | |
| "learning_rate": 0.0009124087591240877, | |
| "loss": 0.664, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.1810226961990704, | |
| "grad_norm": 0.008938194252550602, | |
| "learning_rate": 0.0009099756690997567, | |
| "loss": 0.7787, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 2.1832102816516272, | |
| "grad_norm": 0.014179794117808342, | |
| "learning_rate": 0.0009075425790754259, | |
| "loss": 0.6453, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.1853978671041836, | |
| "grad_norm": 0.01838630810379982, | |
| "learning_rate": 0.000905109489051095, | |
| "loss": 0.7138, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 2.1875854525567404, | |
| "grad_norm": 0.027501361444592476, | |
| "learning_rate": 0.0009026763990267639, | |
| "loss": 0.7204, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.1897730380092972, | |
| "grad_norm": 0.007381811738014221, | |
| "learning_rate": 0.0009002433090024331, | |
| "loss": 0.8955, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 2.191960623461854, | |
| "grad_norm": 0.07506415992975235, | |
| "learning_rate": 0.0008978102189781023, | |
| "loss": 0.802, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.194148208914411, | |
| "grad_norm": 0.028858385980129242, | |
| "learning_rate": 0.0008953771289537713, | |
| "loss": 0.7682, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 2.1963357943669672, | |
| "grad_norm": 0.013214879669249058, | |
| "learning_rate": 0.0008929440389294404, | |
| "loss": 0.7162, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.198523379819524, | |
| "grad_norm": 0.007629261817783117, | |
| "learning_rate": 0.0008905109489051096, | |
| "loss": 0.7283, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 2.200710965272081, | |
| "grad_norm": 0.007726036943495274, | |
| "learning_rate": 0.0008880778588807786, | |
| "loss": 0.8558, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.2028985507246377, | |
| "grad_norm": 0.008436914533376694, | |
| "learning_rate": 0.0008856447688564477, | |
| "loss": 0.7377, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 2.2050861361771945, | |
| "grad_norm": 0.02465754747390747, | |
| "learning_rate": 0.0008832116788321168, | |
| "loss": 0.5909, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.2072737216297513, | |
| "grad_norm": 0.007964403368532658, | |
| "learning_rate": 0.0008807785888077859, | |
| "loss": 0.9931, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 2.2094613070823077, | |
| "grad_norm": 0.008428809233009815, | |
| "learning_rate": 0.0008783454987834551, | |
| "loss": 0.8308, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.2116488925348645, | |
| "grad_norm": 0.005988140590488911, | |
| "learning_rate": 0.0008759124087591241, | |
| "loss": 0.6528, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 2.2138364779874213, | |
| "grad_norm": 0.009502807632088661, | |
| "learning_rate": 0.0008734793187347931, | |
| "loss": 0.7241, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.216024063439978, | |
| "grad_norm": 0.01181811187416315, | |
| "learning_rate": 0.0008710462287104623, | |
| "loss": 0.5897, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 2.218211648892535, | |
| "grad_norm": 0.013522054068744183, | |
| "learning_rate": 0.0008686131386861313, | |
| "loss": 0.7664, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.220399234345092, | |
| "grad_norm": 0.008381453342735767, | |
| "learning_rate": 0.0008661800486618005, | |
| "loss": 0.7758, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 2.222586819797648, | |
| "grad_norm": 0.011634815484285355, | |
| "learning_rate": 0.0008637469586374696, | |
| "loss": 0.7362, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.224774405250205, | |
| "grad_norm": 0.008570423349738121, | |
| "learning_rate": 0.0008613138686131386, | |
| "loss": 0.8869, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 2.226961990702762, | |
| "grad_norm": 0.01613277941942215, | |
| "learning_rate": 0.0008588807785888078, | |
| "loss": 0.8074, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.2291495761553186, | |
| "grad_norm": 0.0062742773443460464, | |
| "learning_rate": 0.000856447688564477, | |
| "loss": 0.7695, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 2.2313371616078754, | |
| "grad_norm": 0.011958430521190166, | |
| "learning_rate": 0.000854014598540146, | |
| "loss": 0.9689, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.2335247470604322, | |
| "grad_norm": 0.010232674889266491, | |
| "learning_rate": 0.0008515815085158151, | |
| "loss": 0.7289, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 2.2357123325129886, | |
| "grad_norm": 0.010546423494815826, | |
| "learning_rate": 0.0008491484184914843, | |
| "loss": 0.7882, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.2378999179655454, | |
| "grad_norm": 0.006704252678900957, | |
| "learning_rate": 0.0008467153284671533, | |
| "loss": 0.7245, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 2.2400875034181023, | |
| "grad_norm": 0.00856088288128376, | |
| "learning_rate": 0.0008442822384428225, | |
| "loss": 0.8478, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.242275088870659, | |
| "grad_norm": 0.011011838912963867, | |
| "learning_rate": 0.0008418491484184916, | |
| "loss": 0.8878, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 2.244462674323216, | |
| "grad_norm": 0.008859807625412941, | |
| "learning_rate": 0.0008394160583941605, | |
| "loss": 1.0637, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.2466502597757723, | |
| "grad_norm": 0.019353823736310005, | |
| "learning_rate": 0.0008369829683698297, | |
| "loss": 0.6664, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 2.248837845228329, | |
| "grad_norm": 0.007266916800290346, | |
| "learning_rate": 0.0008345498783454987, | |
| "loss": 0.7924, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.251025430680886, | |
| "grad_norm": 0.00936873722821474, | |
| "learning_rate": 0.0008321167883211679, | |
| "loss": 0.7045, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 2.2532130161334427, | |
| "grad_norm": 0.007908246479928493, | |
| "learning_rate": 0.000829683698296837, | |
| "loss": 0.9256, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.2554006015859995, | |
| "grad_norm": 0.024966659024357796, | |
| "learning_rate": 0.000827250608272506, | |
| "loss": 0.7243, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 2.2575881870385563, | |
| "grad_norm": 0.009444604627788067, | |
| "learning_rate": 0.0008248175182481752, | |
| "loss": 0.7369, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.259775772491113, | |
| "grad_norm": 0.009447803720831871, | |
| "learning_rate": 0.0008223844282238443, | |
| "loss": 0.7721, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 2.2619633579436695, | |
| "grad_norm": 0.008546645753085613, | |
| "learning_rate": 0.0008199513381995134, | |
| "loss": 0.8094, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.2641509433962264, | |
| "grad_norm": 0.006809299346059561, | |
| "learning_rate": 0.0008175182481751825, | |
| "loss": 0.7907, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 2.266338528848783, | |
| "grad_norm": 0.013527573086321354, | |
| "learning_rate": 0.0008150851581508517, | |
| "loss": 0.6692, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.26852611430134, | |
| "grad_norm": 0.007041016593575478, | |
| "learning_rate": 0.0008126520681265207, | |
| "loss": 0.7474, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 2.270713699753897, | |
| "grad_norm": 0.006707175634801388, | |
| "learning_rate": 0.0008102189781021898, | |
| "loss": 0.8134, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.272901285206453, | |
| "grad_norm": 0.030407702550292015, | |
| "learning_rate": 0.000807785888077859, | |
| "loss": 0.7734, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 2.27508887065901, | |
| "grad_norm": 0.011364832520484924, | |
| "learning_rate": 0.000805352798053528, | |
| "loss": 0.6188, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.277276456111567, | |
| "grad_norm": 0.009676680900156498, | |
| "learning_rate": 0.0008029197080291971, | |
| "loss": 0.8262, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 2.2794640415641236, | |
| "grad_norm": 0.012146366760134697, | |
| "learning_rate": 0.0008004866180048662, | |
| "loss": 0.7543, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.2816516270166805, | |
| "grad_norm": 0.021344035863876343, | |
| "learning_rate": 0.0007980535279805352, | |
| "loss": 0.8434, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 2.2838392124692373, | |
| "grad_norm": 0.019379200413823128, | |
| "learning_rate": 0.0007956204379562044, | |
| "loss": 0.6678, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.2860267979217936, | |
| "grad_norm": 0.012972463853657246, | |
| "learning_rate": 0.0007931873479318734, | |
| "loss": 0.7363, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 2.2882143833743505, | |
| "grad_norm": 0.005540755111724138, | |
| "learning_rate": 0.0007907542579075426, | |
| "loss": 0.7702, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.2904019688269073, | |
| "grad_norm": 0.01054232195019722, | |
| "learning_rate": 0.0007883211678832117, | |
| "loss": 0.8086, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 2.292589554279464, | |
| "grad_norm": 0.006333992816507816, | |
| "learning_rate": 0.0007858880778588807, | |
| "loss": 0.8547, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.294777139732021, | |
| "grad_norm": 0.007503498811274767, | |
| "learning_rate": 0.0007834549878345499, | |
| "loss": 0.9384, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 2.2969647251845773, | |
| "grad_norm": 0.009519786573946476, | |
| "learning_rate": 0.0007810218978102191, | |
| "loss": 0.7457, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.299152310637134, | |
| "grad_norm": 0.009697610512375832, | |
| "learning_rate": 0.0007785888077858881, | |
| "loss": 0.6572, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 2.301339896089691, | |
| "grad_norm": 0.01142230723053217, | |
| "learning_rate": 0.0007761557177615572, | |
| "loss": 0.7003, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.3035274815422477, | |
| "grad_norm": 0.014880196191370487, | |
| "learning_rate": 0.0007737226277372264, | |
| "loss": 0.9522, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 2.3057150669948046, | |
| "grad_norm": 0.03530775010585785, | |
| "learning_rate": 0.0007712895377128953, | |
| "loss": 0.8303, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.3079026524473614, | |
| "grad_norm": 0.008375970646739006, | |
| "learning_rate": 0.0007688564476885646, | |
| "loss": 0.9399, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 2.310090237899918, | |
| "grad_norm": 0.011312820017337799, | |
| "learning_rate": 0.0007664233576642336, | |
| "loss": 0.6918, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.3122778233524746, | |
| "grad_norm": 0.00965717900544405, | |
| "learning_rate": 0.0007639902676399026, | |
| "loss": 0.6898, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 2.3144654088050314, | |
| "grad_norm": 0.046056658029556274, | |
| "learning_rate": 0.0007615571776155718, | |
| "loss": 0.7655, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.316652994257588, | |
| "grad_norm": 0.006473752204328775, | |
| "learning_rate": 0.0007591240875912409, | |
| "loss": 0.7825, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 2.318840579710145, | |
| "grad_norm": 0.012731518596410751, | |
| "learning_rate": 0.0007566909975669099, | |
| "loss": 0.7138, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.321028165162702, | |
| "grad_norm": 0.01815684884786606, | |
| "learning_rate": 0.0007542579075425791, | |
| "loss": 0.7992, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 2.323215750615258, | |
| "grad_norm": 0.012457008473575115, | |
| "learning_rate": 0.0007518248175182483, | |
| "loss": 0.7565, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.325403336067815, | |
| "grad_norm": 0.011130121536552906, | |
| "learning_rate": 0.0007493917274939173, | |
| "loss": 0.6585, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 2.327590921520372, | |
| "grad_norm": 0.009390764869749546, | |
| "learning_rate": 0.0007469586374695864, | |
| "loss": 0.5921, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.3297785069729287, | |
| "grad_norm": 0.006265114061534405, | |
| "learning_rate": 0.0007445255474452555, | |
| "loss": 0.862, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 2.3319660924254855, | |
| "grad_norm": 0.014493511989712715, | |
| "learning_rate": 0.0007420924574209246, | |
| "loss": 0.6529, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.3341536778780423, | |
| "grad_norm": 0.01009755115956068, | |
| "learning_rate": 0.0007396593673965938, | |
| "loss": 1.0077, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 2.3363412633305987, | |
| "grad_norm": 0.022166702896356583, | |
| "learning_rate": 0.0007372262773722629, | |
| "loss": 0.9121, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.3385288487831555, | |
| "grad_norm": 0.028010999783873558, | |
| "learning_rate": 0.0007347931873479318, | |
| "loss": 0.6663, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 2.3407164342357123, | |
| "grad_norm": 0.012431381270289421, | |
| "learning_rate": 0.0007323600973236009, | |
| "loss": 0.7579, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.342904019688269, | |
| "grad_norm": 0.0932813212275505, | |
| "learning_rate": 0.0007299270072992701, | |
| "loss": 0.5542, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 2.345091605140826, | |
| "grad_norm": 0.011022589169442654, | |
| "learning_rate": 0.0007274939172749392, | |
| "loss": 0.7093, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.3472791905933827, | |
| "grad_norm": 0.008994583040475845, | |
| "learning_rate": 0.0007250608272506082, | |
| "loss": 0.7466, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 2.349466776045939, | |
| "grad_norm": 0.01782486028969288, | |
| "learning_rate": 0.0007226277372262774, | |
| "loss": 0.6847, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.351654361498496, | |
| "grad_norm": 0.011398195289075375, | |
| "learning_rate": 0.0007201946472019465, | |
| "loss": 0.687, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 2.3538419469510528, | |
| "grad_norm": 0.023858705535531044, | |
| "learning_rate": 0.0007177615571776156, | |
| "loss": 0.6984, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.3560295324036096, | |
| "grad_norm": 0.008185802958905697, | |
| "learning_rate": 0.0007153284671532847, | |
| "loss": 0.8747, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 2.3582171178561664, | |
| "grad_norm": 0.018106609582901, | |
| "learning_rate": 0.0007128953771289538, | |
| "loss": 0.6591, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.360404703308723, | |
| "grad_norm": 0.013991002924740314, | |
| "learning_rate": 0.0007104622871046229, | |
| "loss": 0.818, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 2.3625922887612796, | |
| "grad_norm": 0.007820016704499722, | |
| "learning_rate": 0.000708029197080292, | |
| "loss": 0.9661, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.3647798742138364, | |
| "grad_norm": 0.020563364028930664, | |
| "learning_rate": 0.0007055961070559611, | |
| "loss": 0.896, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 2.366967459666393, | |
| "grad_norm": 0.01632773131132126, | |
| "learning_rate": 0.0007031630170316302, | |
| "loss": 0.8516, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.36915504511895, | |
| "grad_norm": 0.012202097102999687, | |
| "learning_rate": 0.0007007299270072992, | |
| "loss": 0.921, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 2.371342630571507, | |
| "grad_norm": 0.009598075412213802, | |
| "learning_rate": 0.0006982968369829684, | |
| "loss": 0.677, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.373530216024063, | |
| "grad_norm": 0.010769539512693882, | |
| "learning_rate": 0.0006958637469586375, | |
| "loss": 0.7964, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 2.37571780147662, | |
| "grad_norm": 0.011242173612117767, | |
| "learning_rate": 0.0006934306569343065, | |
| "loss": 0.6444, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.377905386929177, | |
| "grad_norm": 0.009250817820429802, | |
| "learning_rate": 0.0006909975669099756, | |
| "loss": 0.7456, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 2.3800929723817337, | |
| "grad_norm": 0.008871940895915031, | |
| "learning_rate": 0.0006885644768856448, | |
| "loss": 0.7497, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.3822805578342905, | |
| "grad_norm": 0.014774895273149014, | |
| "learning_rate": 0.0006861313868613139, | |
| "loss": 0.8508, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 2.3844681432868473, | |
| "grad_norm": 0.008470469154417515, | |
| "learning_rate": 0.000683698296836983, | |
| "loss": 0.6278, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.386655728739404, | |
| "grad_norm": 0.02862645871937275, | |
| "learning_rate": 0.0006812652068126521, | |
| "loss": 0.7235, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 2.3888433141919605, | |
| "grad_norm": 0.010565055534243584, | |
| "learning_rate": 0.0006788321167883212, | |
| "loss": 0.7064, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.3910308996445173, | |
| "grad_norm": 0.00996407214552164, | |
| "learning_rate": 0.0006763990267639903, | |
| "loss": 0.747, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 2.393218485097074, | |
| "grad_norm": 0.008201108314096928, | |
| "learning_rate": 0.0006739659367396594, | |
| "loss": 0.8917, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.395406070549631, | |
| "grad_norm": 0.007856379263103008, | |
| "learning_rate": 0.0006715328467153285, | |
| "loss": 0.8106, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 2.3975936560021878, | |
| "grad_norm": 0.01899876445531845, | |
| "learning_rate": 0.0006690997566909976, | |
| "loss": 0.9151, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.399781241454744, | |
| "grad_norm": 0.0086012938991189, | |
| "learning_rate": 0.0006666666666666666, | |
| "loss": 0.872, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 2.401968826907301, | |
| "grad_norm": 0.007030507083982229, | |
| "learning_rate": 0.0006642335766423358, | |
| "loss": 0.6529, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.4041564123598578, | |
| "grad_norm": 0.01876233145594597, | |
| "learning_rate": 0.0006618004866180048, | |
| "loss": 0.8421, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 2.4063439978124146, | |
| "grad_norm": 0.033474959433078766, | |
| "learning_rate": 0.0006593673965936739, | |
| "loss": 0.6956, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.4085315832649714, | |
| "grad_norm": 0.018535858020186424, | |
| "learning_rate": 0.0006569343065693431, | |
| "loss": 0.7232, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 2.4107191687175282, | |
| "grad_norm": 0.010383503511548042, | |
| "learning_rate": 0.0006545012165450122, | |
| "loss": 0.5804, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.4129067541700846, | |
| "grad_norm": 0.0077387490309774876, | |
| "learning_rate": 0.0006520681265206813, | |
| "loss": 0.828, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 2.4150943396226414, | |
| "grad_norm": 0.011656009592115879, | |
| "learning_rate": 0.0006496350364963504, | |
| "loss": 0.9106, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.4172819250751982, | |
| "grad_norm": 0.005996339488774538, | |
| "learning_rate": 0.0006472019464720195, | |
| "loss": 0.6921, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 2.419469510527755, | |
| "grad_norm": 0.022230584174394608, | |
| "learning_rate": 0.0006447688564476886, | |
| "loss": 0.9711, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.421657095980312, | |
| "grad_norm": 0.031066155061125755, | |
| "learning_rate": 0.0006423357664233577, | |
| "loss": 0.8718, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 2.4238446814328682, | |
| "grad_norm": 0.011762702837586403, | |
| "learning_rate": 0.0006399026763990268, | |
| "loss": 0.818, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.426032266885425, | |
| "grad_norm": 0.009383924305438995, | |
| "learning_rate": 0.0006374695863746959, | |
| "loss": 0.5913, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 2.428219852337982, | |
| "grad_norm": 0.012824693694710732, | |
| "learning_rate": 0.000635036496350365, | |
| "loss": 0.7115, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.4304074377905387, | |
| "grad_norm": 0.007453750818967819, | |
| "learning_rate": 0.0006326034063260342, | |
| "loss": 0.7374, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 2.4325950232430955, | |
| "grad_norm": 0.007933787070214748, | |
| "learning_rate": 0.0006301703163017031, | |
| "loss": 0.7921, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.4347826086956523, | |
| "grad_norm": 0.01717616245150566, | |
| "learning_rate": 0.0006277372262773722, | |
| "loss": 0.9326, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 2.436970194148209, | |
| "grad_norm": 0.009397076442837715, | |
| "learning_rate": 0.0006253041362530414, | |
| "loss": 0.6388, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.4391577796007655, | |
| "grad_norm": 0.008330175653100014, | |
| "learning_rate": 0.0006228710462287105, | |
| "loss": 0.5517, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 2.4413453650533223, | |
| "grad_norm": 0.013194689527153969, | |
| "learning_rate": 0.0006204379562043796, | |
| "loss": 0.8779, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.443532950505879, | |
| "grad_norm": 0.012824257835745811, | |
| "learning_rate": 0.0006180048661800486, | |
| "loss": 0.7731, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 2.445720535958436, | |
| "grad_norm": 0.011488651856780052, | |
| "learning_rate": 0.0006155717761557178, | |
| "loss": 0.7806, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.447908121410993, | |
| "grad_norm": 0.006684242747724056, | |
| "learning_rate": 0.0006131386861313869, | |
| "loss": 1.0212, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 2.450095706863549, | |
| "grad_norm": 0.010995331220328808, | |
| "learning_rate": 0.000610705596107056, | |
| "loss": 0.8499, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.452283292316106, | |
| "grad_norm": 0.016977710649371147, | |
| "learning_rate": 0.0006082725060827251, | |
| "loss": 0.7029, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 2.454470877768663, | |
| "grad_norm": 0.008742439560592175, | |
| "learning_rate": 0.0006058394160583942, | |
| "loss": 0.6834, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.4566584632212196, | |
| "grad_norm": 0.006410808768123388, | |
| "learning_rate": 0.0006034063260340633, | |
| "loss": 0.8371, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 2.4588460486737764, | |
| "grad_norm": 0.008776198141276836, | |
| "learning_rate": 0.0006009732360097324, | |
| "loss": 0.7001, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.4610336341263332, | |
| "grad_norm": 0.007712388876825571, | |
| "learning_rate": 0.0005985401459854014, | |
| "loss": 0.5664, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 2.4632212195788896, | |
| "grad_norm": 0.011250052601099014, | |
| "learning_rate": 0.0005961070559610705, | |
| "loss": 0.8572, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.4654088050314464, | |
| "grad_norm": 0.010831180959939957, | |
| "learning_rate": 0.0005936739659367396, | |
| "loss": 0.6984, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 2.4675963904840033, | |
| "grad_norm": 0.025114471092820168, | |
| "learning_rate": 0.0005912408759124088, | |
| "loss": 0.7401, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.46978397593656, | |
| "grad_norm": 0.006640868727117777, | |
| "learning_rate": 0.0005888077858880779, | |
| "loss": 0.5887, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 2.471971561389117, | |
| "grad_norm": 0.0060841697268188, | |
| "learning_rate": 0.0005863746958637469, | |
| "loss": 0.7121, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.4741591468416733, | |
| "grad_norm": 0.012216274626553059, | |
| "learning_rate": 0.0005839416058394161, | |
| "loss": 0.8174, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 2.47634673229423, | |
| "grad_norm": 0.009857951663434505, | |
| "learning_rate": 0.0005815085158150852, | |
| "loss": 0.7229, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.478534317746787, | |
| "grad_norm": 0.010938407853245735, | |
| "learning_rate": 0.0005790754257907543, | |
| "loss": 0.5738, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 2.4807219031993437, | |
| "grad_norm": 0.026813512668013573, | |
| "learning_rate": 0.0005766423357664234, | |
| "loss": 0.8543, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.4829094886519005, | |
| "grad_norm": 0.01071678102016449, | |
| "learning_rate": 0.0005742092457420925, | |
| "loss": 0.9774, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 2.4850970741044573, | |
| "grad_norm": 0.009592295624315739, | |
| "learning_rate": 0.0005717761557177616, | |
| "loss": 0.9619, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.487284659557014, | |
| "grad_norm": 0.005114677362143993, | |
| "learning_rate": 0.0005693430656934307, | |
| "loss": 0.8033, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 2.4894722450095705, | |
| "grad_norm": 0.012539639137685299, | |
| "learning_rate": 0.0005669099756690998, | |
| "loss": 0.8993, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.4916598304621274, | |
| "grad_norm": 0.026053965091705322, | |
| "learning_rate": 0.0005644768856447688, | |
| "loss": 0.6817, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 2.493847415914684, | |
| "grad_norm": 0.007609077729284763, | |
| "learning_rate": 0.0005620437956204379, | |
| "loss": 0.8549, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.496035001367241, | |
| "grad_norm": 0.010698397643864155, | |
| "learning_rate": 0.0005596107055961071, | |
| "loss": 0.7068, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 2.498222586819798, | |
| "grad_norm": 0.008611828088760376, | |
| "learning_rate": 0.0005571776155717762, | |
| "loss": 0.7465, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.500410172272354, | |
| "grad_norm": 0.01089494489133358, | |
| "learning_rate": 0.0005547445255474452, | |
| "loss": 0.6224, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 2.502597757724911, | |
| "grad_norm": 0.024782098829746246, | |
| "learning_rate": 0.0005523114355231143, | |
| "loss": 0.8328, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.504785343177468, | |
| "grad_norm": 0.006382483057677746, | |
| "learning_rate": 0.0005498783454987835, | |
| "loss": 0.7787, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 2.5069729286300246, | |
| "grad_norm": 0.016949672251939774, | |
| "learning_rate": 0.0005474452554744526, | |
| "loss": 0.7046, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.5091605140825815, | |
| "grad_norm": 0.027401480823755264, | |
| "learning_rate": 0.0005450121654501216, | |
| "loss": 0.6702, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 2.5113480995351383, | |
| "grad_norm": 0.01999586448073387, | |
| "learning_rate": 0.0005425790754257908, | |
| "loss": 0.8054, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.513535684987695, | |
| "grad_norm": 0.010145720094442368, | |
| "learning_rate": 0.0005401459854014599, | |
| "loss": 0.6592, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 2.5157232704402515, | |
| "grad_norm": 0.018535887822508812, | |
| "learning_rate": 0.000537712895377129, | |
| "loss": 0.7254, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.5179108558928083, | |
| "grad_norm": 0.009648307226598263, | |
| "learning_rate": 0.0005352798053527981, | |
| "loss": 0.6838, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 2.520098441345365, | |
| "grad_norm": 0.016310011968016624, | |
| "learning_rate": 0.0005328467153284672, | |
| "loss": 0.8777, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.522286026797922, | |
| "grad_norm": 0.010320610366761684, | |
| "learning_rate": 0.0005304136253041362, | |
| "loss": 0.7651, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 2.5244736122504783, | |
| "grad_norm": 0.012834092602133751, | |
| "learning_rate": 0.0005279805352798053, | |
| "loss": 0.7847, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.526661197703035, | |
| "grad_norm": 0.011668582446873188, | |
| "learning_rate": 0.0005255474452554745, | |
| "loss": 0.7225, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 2.528848783155592, | |
| "grad_norm": 0.009817942976951599, | |
| "learning_rate": 0.0005231143552311435, | |
| "loss": 0.6983, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.5310363686081487, | |
| "grad_norm": 0.009282633662223816, | |
| "learning_rate": 0.0005206812652068126, | |
| "loss": 0.7688, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 2.5332239540607056, | |
| "grad_norm": 0.007419208530336618, | |
| "learning_rate": 0.0005182481751824818, | |
| "loss": 0.728, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.5354115395132624, | |
| "grad_norm": 0.029275061562657356, | |
| "learning_rate": 0.0005158150851581509, | |
| "loss": 0.8293, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 2.537599124965819, | |
| "grad_norm": 0.01723194308578968, | |
| "learning_rate": 0.0005133819951338199, | |
| "loss": 0.6128, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.5397867104183756, | |
| "grad_norm": 0.009285934269428253, | |
| "learning_rate": 0.000510948905109489, | |
| "loss": 0.6788, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 2.5419742958709324, | |
| "grad_norm": 0.008555158041417599, | |
| "learning_rate": 0.0005085158150851582, | |
| "loss": 0.6507, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.544161881323489, | |
| "grad_norm": 0.0168358962982893, | |
| "learning_rate": 0.0005060827250608273, | |
| "loss": 0.942, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 2.546349466776046, | |
| "grad_norm": 0.0068771797232329845, | |
| "learning_rate": 0.0005036496350364964, | |
| "loss": 0.7844, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.548537052228603, | |
| "grad_norm": 0.04532065615057945, | |
| "learning_rate": 0.0005012165450121655, | |
| "loss": 0.8095, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 2.550724637681159, | |
| "grad_norm": 0.00933657493442297, | |
| "learning_rate": 0.0004987834549878346, | |
| "loss": 0.8072, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.552912223133716, | |
| "grad_norm": 0.009804673492908478, | |
| "learning_rate": 0.0004963503649635036, | |
| "loss": 0.8715, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 2.555099808586273, | |
| "grad_norm": 0.010783910751342773, | |
| "learning_rate": 0.0004939172749391727, | |
| "loss": 0.7891, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.5572873940388297, | |
| "grad_norm": 0.011784784495830536, | |
| "learning_rate": 0.0004914841849148418, | |
| "loss": 0.7262, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 2.5594749794913865, | |
| "grad_norm": 0.007322199642658234, | |
| "learning_rate": 0.0004890510948905109, | |
| "loss": 0.7809, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.5616625649439433, | |
| "grad_norm": 0.011777276173233986, | |
| "learning_rate": 0.00048661800486618, | |
| "loss": 0.7791, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 2.5638501503965, | |
| "grad_norm": 0.015589660964906216, | |
| "learning_rate": 0.00048418491484184916, | |
| "loss": 0.921, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.5660377358490565, | |
| "grad_norm": 0.010277018882334232, | |
| "learning_rate": 0.00048175182481751826, | |
| "loss": 0.9368, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 2.5682253213016133, | |
| "grad_norm": 0.02483278699219227, | |
| "learning_rate": 0.00047931873479318735, | |
| "loss": 0.7714, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.57041290675417, | |
| "grad_norm": 0.013863074593245983, | |
| "learning_rate": 0.0004768856447688565, | |
| "loss": 0.6637, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 2.572600492206727, | |
| "grad_norm": 0.015338894911110401, | |
| "learning_rate": 0.00047445255474452553, | |
| "loss": 0.7678, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.5747880776592833, | |
| "grad_norm": 0.007364062592387199, | |
| "learning_rate": 0.0004720194647201946, | |
| "loss": 0.995, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 2.57697566311184, | |
| "grad_norm": 0.1765730232000351, | |
| "learning_rate": 0.00046958637469586377, | |
| "loss": 0.7865, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.579163248564397, | |
| "grad_norm": 0.010664415545761585, | |
| "learning_rate": 0.00046715328467153287, | |
| "loss": 0.5741, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 2.5813508340169538, | |
| "grad_norm": 0.012521582655608654, | |
| "learning_rate": 0.00046472019464720196, | |
| "loss": 0.6621, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.5835384194695106, | |
| "grad_norm": 0.03732423484325409, | |
| "learning_rate": 0.000462287104622871, | |
| "loss": 0.7453, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 2.5857260049220674, | |
| "grad_norm": 0.013986853882670403, | |
| "learning_rate": 0.0004598540145985402, | |
| "loss": 0.7057, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.587913590374624, | |
| "grad_norm": 0.013078927993774414, | |
| "learning_rate": 0.00045742092457420923, | |
| "loss": 0.7167, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 2.590101175827181, | |
| "grad_norm": 0.006835412234067917, | |
| "learning_rate": 0.0004549878345498783, | |
| "loss": 0.8064, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.5922887612797374, | |
| "grad_norm": 0.020057901740074158, | |
| "learning_rate": 0.0004525547445255475, | |
| "loss": 0.7096, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 2.594476346732294, | |
| "grad_norm": 0.026187503710389137, | |
| "learning_rate": 0.00045012165450121657, | |
| "loss": 0.9496, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.596663932184851, | |
| "grad_norm": 0.012171875685453415, | |
| "learning_rate": 0.00044768856447688566, | |
| "loss": 0.7529, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 2.598851517637408, | |
| "grad_norm": 0.012145042419433594, | |
| "learning_rate": 0.0004452554744525548, | |
| "loss": 0.8654, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.601039103089964, | |
| "grad_norm": 0.013504109345376492, | |
| "learning_rate": 0.00044282238442822384, | |
| "loss": 0.6347, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 2.603226688542521, | |
| "grad_norm": 0.01362569723278284, | |
| "learning_rate": 0.00044038929440389293, | |
| "loss": 0.661, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.605414273995078, | |
| "grad_norm": 0.013327688910067081, | |
| "learning_rate": 0.00043795620437956203, | |
| "loss": 0.6851, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 2.6076018594476347, | |
| "grad_norm": 0.008194427005946636, | |
| "learning_rate": 0.0004355231143552312, | |
| "loss": 0.8226, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.6097894449001915, | |
| "grad_norm": 0.017937535420060158, | |
| "learning_rate": 0.00043309002433090027, | |
| "loss": 0.7033, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 2.6119770303527483, | |
| "grad_norm": 0.005625641439110041, | |
| "learning_rate": 0.0004306569343065693, | |
| "loss": 0.7106, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.614164615805305, | |
| "grad_norm": 0.01812170445919037, | |
| "learning_rate": 0.0004282238442822385, | |
| "loss": 0.7344, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 2.6163522012578615, | |
| "grad_norm": 0.007461361587047577, | |
| "learning_rate": 0.00042579075425790754, | |
| "loss": 0.835, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.6185397867104183, | |
| "grad_norm": 0.014407969079911709, | |
| "learning_rate": 0.00042335766423357664, | |
| "loss": 0.7829, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 2.620727372162975, | |
| "grad_norm": 0.008925898931920528, | |
| "learning_rate": 0.0004209245742092458, | |
| "loss": 0.6425, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.622914957615532, | |
| "grad_norm": 0.010357217863202095, | |
| "learning_rate": 0.0004184914841849149, | |
| "loss": 0.894, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 2.6251025430680883, | |
| "grad_norm": 0.01632748544216156, | |
| "learning_rate": 0.00041605839416058397, | |
| "loss": 0.6886, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.627290128520645, | |
| "grad_norm": 0.021274514496326447, | |
| "learning_rate": 0.000413625304136253, | |
| "loss": 0.7503, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 2.629477713973202, | |
| "grad_norm": 0.021467119455337524, | |
| "learning_rate": 0.00041119221411192215, | |
| "loss": 0.9202, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.6316652994257588, | |
| "grad_norm": 0.011900427751243114, | |
| "learning_rate": 0.00040875912408759124, | |
| "loss": 0.7084, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 2.6338528848783156, | |
| "grad_norm": 0.010819557122886181, | |
| "learning_rate": 0.00040632603406326034, | |
| "loss": 1.0455, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.6360404703308724, | |
| "grad_norm": 0.012575685046613216, | |
| "learning_rate": 0.0004038929440389295, | |
| "loss": 0.6894, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 2.6382280557834292, | |
| "grad_norm": 0.011274064891040325, | |
| "learning_rate": 0.0004014598540145986, | |
| "loss": 0.8449, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.640415641235986, | |
| "grad_norm": 0.013194631785154343, | |
| "learning_rate": 0.0003990267639902676, | |
| "loss": 0.8192, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 2.6426032266885424, | |
| "grad_norm": 0.009542672894895077, | |
| "learning_rate": 0.0003965936739659367, | |
| "loss": 0.8768, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.6447908121410992, | |
| "grad_norm": 0.016639290377497673, | |
| "learning_rate": 0.00039416058394160585, | |
| "loss": 0.7371, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 2.646978397593656, | |
| "grad_norm": 0.02203970216214657, | |
| "learning_rate": 0.00039172749391727494, | |
| "loss": 0.6598, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.649165983046213, | |
| "grad_norm": 0.027763044461607933, | |
| "learning_rate": 0.00038929440389294404, | |
| "loss": 0.6819, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 2.6513535684987692, | |
| "grad_norm": 0.01537309866398573, | |
| "learning_rate": 0.0003868613138686132, | |
| "loss": 0.8249, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.653541153951326, | |
| "grad_norm": 0.01565646007657051, | |
| "learning_rate": 0.0003844282238442823, | |
| "loss": 0.569, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 2.655728739403883, | |
| "grad_norm": 0.01048749778419733, | |
| "learning_rate": 0.0003819951338199513, | |
| "loss": 0.6359, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.6579163248564397, | |
| "grad_norm": 0.061209116131067276, | |
| "learning_rate": 0.00037956204379562046, | |
| "loss": 0.7011, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 2.6601039103089965, | |
| "grad_norm": 0.016036316752433777, | |
| "learning_rate": 0.00037712895377128955, | |
| "loss": 0.5889, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.6622914957615533, | |
| "grad_norm": 0.014299210160970688, | |
| "learning_rate": 0.00037469586374695864, | |
| "loss": 0.7685, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 2.66447908121411, | |
| "grad_norm": 0.010716800577938557, | |
| "learning_rate": 0.00037226277372262774, | |
| "loss": 0.7795, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.007198740262538195, | |
| "learning_rate": 0.0003698296836982969, | |
| "loss": 0.8868, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 2.6688542521192233, | |
| "grad_norm": 0.018458040431141853, | |
| "learning_rate": 0.0003673965936739659, | |
| "loss": 0.6935, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.67104183757178, | |
| "grad_norm": 0.011869457550346851, | |
| "learning_rate": 0.00036496350364963507, | |
| "loss": 0.7638, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 2.673229423024337, | |
| "grad_norm": 0.00896628387272358, | |
| "learning_rate": 0.0003625304136253041, | |
| "loss": 0.7615, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.675417008476894, | |
| "grad_norm": 0.008536278270184994, | |
| "learning_rate": 0.00036009732360097325, | |
| "loss": 0.6647, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 2.67760459392945, | |
| "grad_norm": 0.02423817664384842, | |
| "learning_rate": 0.00035766423357664234, | |
| "loss": 0.6876, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.679792179382007, | |
| "grad_norm": 0.011117582209408283, | |
| "learning_rate": 0.00035523114355231144, | |
| "loss": 0.665, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 2.681979764834564, | |
| "grad_norm": 0.009505179710686207, | |
| "learning_rate": 0.00035279805352798053, | |
| "loss": 0.6284, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.6841673502871206, | |
| "grad_norm": 0.0063440497033298016, | |
| "learning_rate": 0.0003503649635036496, | |
| "loss": 0.8279, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 2.6863549357396774, | |
| "grad_norm": 0.0201023630797863, | |
| "learning_rate": 0.00034793187347931877, | |
| "loss": 0.8996, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.6885425211922342, | |
| "grad_norm": 0.006452304311096668, | |
| "learning_rate": 0.0003454987834549878, | |
| "loss": 0.8563, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 2.690730106644791, | |
| "grad_norm": 0.00840191449970007, | |
| "learning_rate": 0.00034306569343065695, | |
| "loss": 0.6543, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.6929176920973474, | |
| "grad_norm": 0.011340702883899212, | |
| "learning_rate": 0.00034063260340632605, | |
| "loss": 0.733, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 2.6951052775499043, | |
| "grad_norm": 0.01761777698993683, | |
| "learning_rate": 0.00033819951338199514, | |
| "loss": 0.9136, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.697292863002461, | |
| "grad_norm": 0.012587963603436947, | |
| "learning_rate": 0.00033576642335766423, | |
| "loss": 0.801, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 2.699480448455018, | |
| "grad_norm": 0.006971995811909437, | |
| "learning_rate": 0.0003333333333333333, | |
| "loss": 0.8079, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.7016680339075743, | |
| "grad_norm": 0.00921553373336792, | |
| "learning_rate": 0.0003309002433090024, | |
| "loss": 0.6801, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 2.703855619360131, | |
| "grad_norm": 0.012788954190909863, | |
| "learning_rate": 0.00032846715328467156, | |
| "loss": 0.8119, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.706043204812688, | |
| "grad_norm": 0.01745203509926796, | |
| "learning_rate": 0.00032603406326034065, | |
| "loss": 0.808, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 2.7082307902652447, | |
| "grad_norm": 0.010819566436111927, | |
| "learning_rate": 0.00032360097323600975, | |
| "loss": 0.6882, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.7104183757178015, | |
| "grad_norm": 0.013807238079607487, | |
| "learning_rate": 0.00032116788321167884, | |
| "loss": 0.5872, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 2.7126059611703583, | |
| "grad_norm": 0.015879668295383453, | |
| "learning_rate": 0.00031873479318734793, | |
| "loss": 0.7541, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.714793546622915, | |
| "grad_norm": 0.008229264058172703, | |
| "learning_rate": 0.0003163017031630171, | |
| "loss": 0.8002, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 2.7169811320754715, | |
| "grad_norm": 0.011732214130461216, | |
| "learning_rate": 0.0003138686131386861, | |
| "loss": 0.7049, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.7191687175280284, | |
| "grad_norm": 0.008688759990036488, | |
| "learning_rate": 0.00031143552311435526, | |
| "loss": 0.9007, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 2.721356302980585, | |
| "grad_norm": 0.014027293771505356, | |
| "learning_rate": 0.0003090024330900243, | |
| "loss": 0.6098, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.723543888433142, | |
| "grad_norm": 0.00831068679690361, | |
| "learning_rate": 0.00030656934306569345, | |
| "loss": 0.7435, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 2.725731473885699, | |
| "grad_norm": 0.017324576154351234, | |
| "learning_rate": 0.00030413625304136254, | |
| "loss": 0.7317, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.727919059338255, | |
| "grad_norm": 0.01490398496389389, | |
| "learning_rate": 0.00030170316301703163, | |
| "loss": 0.7434, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 2.730106644790812, | |
| "grad_norm": 0.02181348390877247, | |
| "learning_rate": 0.0002992700729927007, | |
| "loss": 0.7395, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.732294230243369, | |
| "grad_norm": 0.017193686217069626, | |
| "learning_rate": 0.0002968369829683698, | |
| "loss": 1.0303, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 2.7344818156959256, | |
| "grad_norm": 0.011623183265328407, | |
| "learning_rate": 0.00029440389294403896, | |
| "loss": 0.5918, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.7366694011484825, | |
| "grad_norm": 0.007596330717206001, | |
| "learning_rate": 0.00029197080291970805, | |
| "loss": 0.6441, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 2.7388569866010393, | |
| "grad_norm": 0.022759029641747475, | |
| "learning_rate": 0.00028953771289537715, | |
| "loss": 0.6192, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.741044572053596, | |
| "grad_norm": 0.0065732188522815704, | |
| "learning_rate": 0.00028710462287104624, | |
| "loss": 0.73, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 2.7432321575061525, | |
| "grad_norm": 0.009496266953647137, | |
| "learning_rate": 0.00028467153284671533, | |
| "loss": 0.839, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.7454197429587093, | |
| "grad_norm": 0.007220600266009569, | |
| "learning_rate": 0.0002822384428223844, | |
| "loss": 0.6448, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 2.747607328411266, | |
| "grad_norm": 0.015215203166007996, | |
| "learning_rate": 0.00027980535279805357, | |
| "loss": 0.7697, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.749794913863823, | |
| "grad_norm": 0.015471878461539745, | |
| "learning_rate": 0.0002773722627737226, | |
| "loss": 0.7398, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 2.7519824993163793, | |
| "grad_norm": 0.009130065329372883, | |
| "learning_rate": 0.00027493917274939175, | |
| "loss": 0.6993, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.754170084768936, | |
| "grad_norm": 0.007493583485484123, | |
| "learning_rate": 0.0002725060827250608, | |
| "loss": 0.6525, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 2.756357670221493, | |
| "grad_norm": 0.018882576376199722, | |
| "learning_rate": 0.00027007299270072994, | |
| "loss": 0.785, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.7585452556740497, | |
| "grad_norm": 0.010290750302374363, | |
| "learning_rate": 0.00026763990267639903, | |
| "loss": 0.6355, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 2.7607328411266066, | |
| "grad_norm": 0.020789271220564842, | |
| "learning_rate": 0.0002652068126520681, | |
| "loss": 0.6681, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.7629204265791634, | |
| "grad_norm": 0.010807972401380539, | |
| "learning_rate": 0.00026277372262773727, | |
| "loss": 0.8581, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 2.76510801203172, | |
| "grad_norm": 0.006756063550710678, | |
| "learning_rate": 0.0002603406326034063, | |
| "loss": 0.7499, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.767295597484277, | |
| "grad_norm": 0.013115596026182175, | |
| "learning_rate": 0.00025790754257907546, | |
| "loss": 0.6298, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 2.7694831829368334, | |
| "grad_norm": 0.010143927298486233, | |
| "learning_rate": 0.0002554744525547445, | |
| "loss": 0.7911, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.77167076838939, | |
| "grad_norm": 0.011593978852033615, | |
| "learning_rate": 0.00025304136253041364, | |
| "loss": 0.6558, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 2.773858353841947, | |
| "grad_norm": 0.011897698044776917, | |
| "learning_rate": 0.00025060827250608273, | |
| "loss": 0.7177, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.776045939294504, | |
| "grad_norm": 0.011287844739854336, | |
| "learning_rate": 0.0002481751824817518, | |
| "loss": 0.8625, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 2.77823352474706, | |
| "grad_norm": 0.017498012632131577, | |
| "learning_rate": 0.0002457420924574209, | |
| "loss": 0.896, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.780421110199617, | |
| "grad_norm": 0.011069230735301971, | |
| "learning_rate": 0.00024330900243309, | |
| "loss": 0.6567, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 2.782608695652174, | |
| "grad_norm": 0.005669731646776199, | |
| "learning_rate": 0.00024087591240875913, | |
| "loss": 0.7313, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.7847962811047307, | |
| "grad_norm": 0.02650737576186657, | |
| "learning_rate": 0.00023844282238442825, | |
| "loss": 0.8647, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 2.7869838665572875, | |
| "grad_norm": 0.010408868081867695, | |
| "learning_rate": 0.0002360097323600973, | |
| "loss": 0.8034, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.7891714520098443, | |
| "grad_norm": 0.013187460601329803, | |
| "learning_rate": 0.00023357664233576643, | |
| "loss": 0.8, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 2.791359037462401, | |
| "grad_norm": 0.009964399971067905, | |
| "learning_rate": 0.0002311435523114355, | |
| "loss": 0.8949, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.7935466229149575, | |
| "grad_norm": 0.01696036383509636, | |
| "learning_rate": 0.00022871046228710462, | |
| "loss": 0.678, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 2.7957342083675143, | |
| "grad_norm": 0.07283343374729156, | |
| "learning_rate": 0.00022627737226277374, | |
| "loss": 0.7264, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.797921793820071, | |
| "grad_norm": 0.007607647217810154, | |
| "learning_rate": 0.00022384428223844283, | |
| "loss": 0.8112, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 2.800109379272628, | |
| "grad_norm": 0.015119451098144054, | |
| "learning_rate": 0.00022141119221411192, | |
| "loss": 0.6995, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.8022969647251843, | |
| "grad_norm": 0.013507510535418987, | |
| "learning_rate": 0.00021897810218978101, | |
| "loss": 0.8193, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 2.804484550177741, | |
| "grad_norm": 0.007651912048459053, | |
| "learning_rate": 0.00021654501216545013, | |
| "loss": 0.5999, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.806672135630298, | |
| "grad_norm": 0.010115343146026134, | |
| "learning_rate": 0.00021411192214111925, | |
| "loss": 0.7694, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 2.8088597210828548, | |
| "grad_norm": 0.011188814416527748, | |
| "learning_rate": 0.00021167883211678832, | |
| "loss": 0.8099, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.8110473065354116, | |
| "grad_norm": 0.007763843517750502, | |
| "learning_rate": 0.00020924574209245744, | |
| "loss": 0.7182, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 2.8132348919879684, | |
| "grad_norm": 0.00900893472135067, | |
| "learning_rate": 0.0002068126520681265, | |
| "loss": 0.6297, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.815422477440525, | |
| "grad_norm": 0.006093029864132404, | |
| "learning_rate": 0.00020437956204379562, | |
| "loss": 1.0166, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 2.817610062893082, | |
| "grad_norm": 0.008186981081962585, | |
| "learning_rate": 0.00020194647201946474, | |
| "loss": 0.6606, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.8197976483456384, | |
| "grad_norm": 0.011285791173577309, | |
| "learning_rate": 0.0001995133819951338, | |
| "loss": 0.672, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 2.821985233798195, | |
| "grad_norm": 0.011607305146753788, | |
| "learning_rate": 0.00019708029197080293, | |
| "loss": 0.6903, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.824172819250752, | |
| "grad_norm": 0.008523947559297085, | |
| "learning_rate": 0.00019464720194647202, | |
| "loss": 0.8383, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 2.826360404703309, | |
| "grad_norm": 0.010200290009379387, | |
| "learning_rate": 0.00019221411192214114, | |
| "loss": 0.7475, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.828547990155865, | |
| "grad_norm": 0.01312936469912529, | |
| "learning_rate": 0.00018978102189781023, | |
| "loss": 0.7571, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 2.830735575608422, | |
| "grad_norm": 0.021754464134573936, | |
| "learning_rate": 0.00018734793187347932, | |
| "loss": 0.7915, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.832923161060979, | |
| "grad_norm": 0.022569775581359863, | |
| "learning_rate": 0.00018491484184914844, | |
| "loss": 0.7305, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 2.8351107465135357, | |
| "grad_norm": 0.009172527119517326, | |
| "learning_rate": 0.00018248175182481753, | |
| "loss": 0.8616, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.8372983319660925, | |
| "grad_norm": 0.00900851096957922, | |
| "learning_rate": 0.00018004866180048663, | |
| "loss": 0.8411, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 2.8394859174186493, | |
| "grad_norm": 0.033786166459321976, | |
| "learning_rate": 0.00017761557177615572, | |
| "loss": 0.6755, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.841673502871206, | |
| "grad_norm": 0.006091755349189043, | |
| "learning_rate": 0.0001751824817518248, | |
| "loss": 0.7822, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 2.8438610883237625, | |
| "grad_norm": 0.011280403472483158, | |
| "learning_rate": 0.0001727493917274939, | |
| "loss": 0.8669, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.8460486737763193, | |
| "grad_norm": 0.007846282795071602, | |
| "learning_rate": 0.00017031630170316302, | |
| "loss": 0.752, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 2.848236259228876, | |
| "grad_norm": 0.008928561583161354, | |
| "learning_rate": 0.00016788321167883211, | |
| "loss": 0.7062, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.850423844681433, | |
| "grad_norm": 0.0234297476708889, | |
| "learning_rate": 0.0001654501216545012, | |
| "loss": 0.7319, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 2.8526114301339898, | |
| "grad_norm": 0.07628759741783142, | |
| "learning_rate": 0.00016301703163017033, | |
| "loss": 0.8256, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.854799015586546, | |
| "grad_norm": 0.00962966587394476, | |
| "learning_rate": 0.00016058394160583942, | |
| "loss": 0.825, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 2.856986601039103, | |
| "grad_norm": 0.008182559162378311, | |
| "learning_rate": 0.00015815085158150854, | |
| "loss": 0.7628, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.8591741864916598, | |
| "grad_norm": 0.0483902171254158, | |
| "learning_rate": 0.00015571776155717763, | |
| "loss": 0.8631, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 2.8613617719442166, | |
| "grad_norm": 0.01323285885155201, | |
| "learning_rate": 0.00015328467153284672, | |
| "loss": 0.7958, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.8635493573967734, | |
| "grad_norm": 0.009712522849440575, | |
| "learning_rate": 0.00015085158150851582, | |
| "loss": 0.6506, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 2.8657369428493302, | |
| "grad_norm": 0.0073866224847733974, | |
| "learning_rate": 0.0001484184914841849, | |
| "loss": 0.5997, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.867924528301887, | |
| "grad_norm": 0.009534020908176899, | |
| "learning_rate": 0.00014598540145985403, | |
| "loss": 0.7732, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 2.8701121137544434, | |
| "grad_norm": 0.008029601536691189, | |
| "learning_rate": 0.00014355231143552312, | |
| "loss": 0.7837, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.8722996992070002, | |
| "grad_norm": 0.01388575229793787, | |
| "learning_rate": 0.0001411192214111922, | |
| "loss": 0.6959, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 2.874487284659557, | |
| "grad_norm": 0.011830773204565048, | |
| "learning_rate": 0.0001386861313868613, | |
| "loss": 0.7597, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.876674870112114, | |
| "grad_norm": 0.013655097223818302, | |
| "learning_rate": 0.0001362530413625304, | |
| "loss": 0.6103, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 2.8788624555646702, | |
| "grad_norm": 0.009793232195079327, | |
| "learning_rate": 0.00013381995133819952, | |
| "loss": 0.7327, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.881050041017227, | |
| "grad_norm": 0.009699089452624321, | |
| "learning_rate": 0.00013138686131386864, | |
| "loss": 0.7882, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 2.883237626469784, | |
| "grad_norm": 0.01353220921009779, | |
| "learning_rate": 0.00012895377128953773, | |
| "loss": 0.7567, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.8854252119223407, | |
| "grad_norm": 0.012468249537050724, | |
| "learning_rate": 0.00012652068126520682, | |
| "loss": 0.6502, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 2.8876127973748975, | |
| "grad_norm": 0.010982934385538101, | |
| "learning_rate": 0.0001240875912408759, | |
| "loss": 0.6542, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.8898003828274543, | |
| "grad_norm": 0.008489643223583698, | |
| "learning_rate": 0.000121654501216545, | |
| "loss": 0.7122, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 2.891987968280011, | |
| "grad_norm": 0.009710462763905525, | |
| "learning_rate": 0.00011922141119221412, | |
| "loss": 0.8059, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.8941755537325675, | |
| "grad_norm": 0.008519637398421764, | |
| "learning_rate": 0.00011678832116788322, | |
| "loss": 0.668, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 2.8963631391851243, | |
| "grad_norm": 0.012375866994261742, | |
| "learning_rate": 0.00011435523114355231, | |
| "loss": 0.8298, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.898550724637681, | |
| "grad_norm": 0.011852890253067017, | |
| "learning_rate": 0.00011192214111922141, | |
| "loss": 1.0037, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 2.900738310090238, | |
| "grad_norm": 0.01731940545141697, | |
| "learning_rate": 0.00010948905109489051, | |
| "loss": 0.7002, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.902925895542795, | |
| "grad_norm": 0.026805153116583824, | |
| "learning_rate": 0.00010705596107055963, | |
| "loss": 0.9983, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 2.905113480995351, | |
| "grad_norm": 0.011630130000412464, | |
| "learning_rate": 0.00010462287104622872, | |
| "loss": 0.575, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.907301066447908, | |
| "grad_norm": 0.012041180394589901, | |
| "learning_rate": 0.00010218978102189781, | |
| "loss": 0.6631, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 2.909488651900465, | |
| "grad_norm": 0.009331166744232178, | |
| "learning_rate": 9.97566909975669e-05, | |
| "loss": 0.7661, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.9116762373530216, | |
| "grad_norm": 0.010035173036158085, | |
| "learning_rate": 9.732360097323601e-05, | |
| "loss": 0.7367, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 2.9138638228055784, | |
| "grad_norm": 0.0184579249471426, | |
| "learning_rate": 9.489051094890511e-05, | |
| "loss": 0.7267, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.9160514082581352, | |
| "grad_norm": 0.019723238423466682, | |
| "learning_rate": 9.245742092457422e-05, | |
| "loss": 0.9285, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 2.918238993710692, | |
| "grad_norm": 0.01119768712669611, | |
| "learning_rate": 9.002433090024331e-05, | |
| "loss": 0.8886, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.9204265791632484, | |
| "grad_norm": 0.010187883861362934, | |
| "learning_rate": 8.75912408759124e-05, | |
| "loss": 0.6872, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 2.9226141646158053, | |
| "grad_norm": 0.006695912219583988, | |
| "learning_rate": 8.515815085158151e-05, | |
| "loss": 0.6093, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.924801750068362, | |
| "grad_norm": 0.009726252406835556, | |
| "learning_rate": 8.27250608272506e-05, | |
| "loss": 0.735, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 2.926989335520919, | |
| "grad_norm": 0.006968527100980282, | |
| "learning_rate": 8.029197080291971e-05, | |
| "loss": 0.9525, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.9291769209734753, | |
| "grad_norm": 0.019444549456238747, | |
| "learning_rate": 7.785888077858882e-05, | |
| "loss": 0.7423, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 2.931364506426032, | |
| "grad_norm": 0.014326276257634163, | |
| "learning_rate": 7.542579075425791e-05, | |
| "loss": 0.7437, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.933552091878589, | |
| "grad_norm": 0.008168605156242847, | |
| "learning_rate": 7.299270072992701e-05, | |
| "loss": 0.7014, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 2.9357396773311457, | |
| "grad_norm": 0.010011604055762291, | |
| "learning_rate": 7.05596107055961e-05, | |
| "loss": 0.6541, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.9379272627837025, | |
| "grad_norm": 0.013739430345594883, | |
| "learning_rate": 6.81265206812652e-05, | |
| "loss": 0.7885, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 2.9401148482362593, | |
| "grad_norm": 0.01414500456303358, | |
| "learning_rate": 6.569343065693432e-05, | |
| "loss": 0.9111, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.942302433688816, | |
| "grad_norm": 0.010208160616457462, | |
| "learning_rate": 6.326034063260341e-05, | |
| "loss": 0.6641, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 2.944490019141373, | |
| "grad_norm": 0.012237477116286755, | |
| "learning_rate": 6.08272506082725e-05, | |
| "loss": 0.6199, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.9466776045939294, | |
| "grad_norm": 0.008850525133311749, | |
| "learning_rate": 5.839416058394161e-05, | |
| "loss": 0.8436, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 2.948865190046486, | |
| "grad_norm": 0.01408157218247652, | |
| "learning_rate": 5.596107055961071e-05, | |
| "loss": 0.667, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.951052775499043, | |
| "grad_norm": 0.017354557290673256, | |
| "learning_rate": 5.352798053527981e-05, | |
| "loss": 0.7591, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 2.9532403609516, | |
| "grad_norm": 0.013411460444331169, | |
| "learning_rate": 5.1094890510948905e-05, | |
| "loss": 0.8248, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.955427946404156, | |
| "grad_norm": 0.018828334286808968, | |
| "learning_rate": 4.8661800486618005e-05, | |
| "loss": 0.8297, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 2.957615531856713, | |
| "grad_norm": 0.012131531722843647, | |
| "learning_rate": 4.622871046228711e-05, | |
| "loss": 0.8469, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 2.95980311730927, | |
| "grad_norm": 0.017933214083313942, | |
| "learning_rate": 4.37956204379562e-05, | |
| "loss": 0.886, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 2.9619907027618266, | |
| "grad_norm": 0.007120661437511444, | |
| "learning_rate": 4.13625304136253e-05, | |
| "loss": 0.7975, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 2.9641782882143834, | |
| "grad_norm": 0.008959448896348476, | |
| "learning_rate": 3.892944038929441e-05, | |
| "loss": 0.7624, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 2.9663658736669403, | |
| "grad_norm": 0.00703001581132412, | |
| "learning_rate": 3.649635036496351e-05, | |
| "loss": 0.9414, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 2.968553459119497, | |
| "grad_norm": 0.009628667496144772, | |
| "learning_rate": 3.40632603406326e-05, | |
| "loss": 0.7348, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 2.9707410445720535, | |
| "grad_norm": 0.010123343206942081, | |
| "learning_rate": 3.1630170316301705e-05, | |
| "loss": 0.5589, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 2.9729286300246103, | |
| "grad_norm": 0.012991656549274921, | |
| "learning_rate": 2.9197080291970804e-05, | |
| "loss": 0.7015, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 2.975116215477167, | |
| "grad_norm": 0.008844063617289066, | |
| "learning_rate": 2.6763990267639907e-05, | |
| "loss": 0.7395, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.977303800929724, | |
| "grad_norm": 0.010974117554724216, | |
| "learning_rate": 2.4330900243309002e-05, | |
| "loss": 0.815, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 2.9794913863822803, | |
| "grad_norm": 0.011202923953533173, | |
| "learning_rate": 2.18978102189781e-05, | |
| "loss": 0.7593, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 2.981678971834837, | |
| "grad_norm": 0.011004596017301083, | |
| "learning_rate": 1.9464720194647204e-05, | |
| "loss": 0.6727, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 2.983866557287394, | |
| "grad_norm": 0.009554206393659115, | |
| "learning_rate": 1.70316301703163e-05, | |
| "loss": 0.8229, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 2.9860541427399507, | |
| "grad_norm": 0.013814912177622318, | |
| "learning_rate": 1.4598540145985402e-05, | |
| "loss": 1.0031, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 2.9882417281925076, | |
| "grad_norm": 0.006289259064942598, | |
| "learning_rate": 1.2165450121654501e-05, | |
| "loss": 0.6995, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 2.9904293136450644, | |
| "grad_norm": 0.008405916392803192, | |
| "learning_rate": 9.732360097323602e-06, | |
| "loss": 0.7135, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 2.992616899097621, | |
| "grad_norm": 0.012755095958709717, | |
| "learning_rate": 7.299270072992701e-06, | |
| "loss": 0.8523, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.994804484550178, | |
| "grad_norm": 0.011079053394496441, | |
| "learning_rate": 4.866180048661801e-06, | |
| "loss": 0.6673, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 2.9969920700027344, | |
| "grad_norm": 0.011697685346007347, | |
| "learning_rate": 2.4330900243309005e-06, | |
| "loss": 0.7831, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.999179655455291, | |
| "grad_norm": 0.0072103943675756454, | |
| "learning_rate": 0.0, | |
| "loss": 0.8479, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 2.999179655455291, | |
| "step": 1371, | |
| "total_flos": 4.3134948379459584e+17, | |
| "train_loss": 0.7785058324133541, | |
| "train_runtime": 1561.6761, | |
| "train_samples_per_second": 14.048, | |
| "train_steps_per_second": 0.878 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1371, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.3134948379459584e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |