| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.023999808001535988, |
| "eval_steps": 500, |
| "global_step": 3000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 7.999936000511996e-05, |
| "grad_norm": 4.414881453815827, |
| "learning_rate": 7.199424046076314e-09, |
| "loss": 0.5441, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00015999872001023991, |
| "grad_norm": 4.004957247533114, |
| "learning_rate": 1.519878409727222e-08, |
| "loss": 0.5353, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00023999808001535987, |
| "grad_norm": 4.889790729834478, |
| "learning_rate": 2.3198144148468124e-08, |
| "loss": 0.537, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.00031999744002047983, |
| "grad_norm": 3.7138301639437508, |
| "learning_rate": 3.119750419966403e-08, |
| "loss": 0.5488, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0003999968000255998, |
| "grad_norm": 4.208028943214464, |
| "learning_rate": 3.919686425085993e-08, |
| "loss": 0.5557, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.00047999616003071974, |
| "grad_norm": 4.326896533998126, |
| "learning_rate": 4.719622430205584e-08, |
| "loss": 0.5061, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0005599955200358397, |
| "grad_norm": 7.1723877700496415, |
| "learning_rate": 5.519558435325175e-08, |
| "loss": 0.5193, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0006399948800409597, |
| "grad_norm": 3.892238785520058, |
| "learning_rate": 6.319494440444764e-08, |
| "loss": 0.5311, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0007199942400460796, |
| "grad_norm": 3.4517472540795513, |
| "learning_rate": 7.119430445564356e-08, |
| "loss": 0.5232, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0007999936000511996, |
| "grad_norm": 4.094057558667446, |
| "learning_rate": 7.919366450683946e-08, |
| "loss": 0.5225, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0008799929600563195, |
| "grad_norm": 3.4395682865399144, |
| "learning_rate": 8.719302455803536e-08, |
| "loss": 0.5371, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0009599923200614395, |
| "grad_norm": 3.7029712275218682, |
| "learning_rate": 9.519238460923127e-08, |
| "loss": 0.5305, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0010399916800665594, |
| "grad_norm": 3.493027269180126, |
| "learning_rate": 1.0319174466042718e-07, |
| "loss": 0.5167, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0011199910400716794, |
| "grad_norm": 3.480790725115363, |
| "learning_rate": 1.1119110471162308e-07, |
| "loss": 0.5543, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0011999904000767993, |
| "grad_norm": 3.0350778792458564, |
| "learning_rate": 1.1919046476281897e-07, |
| "loss": 0.5188, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0012799897600819193, |
| "grad_norm": 2.7703962024693216, |
| "learning_rate": 1.271898248140149e-07, |
| "loss": 0.5176, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0013599891200870393, |
| "grad_norm": 2.7312812150288277, |
| "learning_rate": 1.351891848652108e-07, |
| "loss": 0.5206, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0014399884800921593, |
| "grad_norm": 2.6772442085163837, |
| "learning_rate": 1.431885449164067e-07, |
| "loss": 0.5187, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0015199878400972793, |
| "grad_norm": 2.6017371604025503, |
| "learning_rate": 1.5118790496760262e-07, |
| "loss": 0.4858, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0015999872001023993, |
| "grad_norm": 2.361921882047865, |
| "learning_rate": 1.5918726501879854e-07, |
| "loss": 0.4965, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0016799865601075192, |
| "grad_norm": 1.9949603945902301, |
| "learning_rate": 1.6718662506999443e-07, |
| "loss": 0.49, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.001759985920112639, |
| "grad_norm": 1.689017850332906, |
| "learning_rate": 1.7518598512119031e-07, |
| "loss": 0.4474, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.001839985280117759, |
| "grad_norm": 1.7526108484730667, |
| "learning_rate": 1.8318534517238623e-07, |
| "loss": 0.4758, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.001919984640122879, |
| "grad_norm": 1.6619250196852287, |
| "learning_rate": 1.9118470522358212e-07, |
| "loss": 0.4551, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.001999984000127999, |
| "grad_norm": 1.4244411983222107, |
| "learning_rate": 1.9918406527477803e-07, |
| "loss": 0.4666, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.0020799833601331187, |
| "grad_norm": 1.4091371713945773, |
| "learning_rate": 2.0718342532597392e-07, |
| "loss": 0.4744, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.002159982720138239, |
| "grad_norm": 1.4037698231377602, |
| "learning_rate": 2.1518278537716986e-07, |
| "loss": 0.4577, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.0022399820801433587, |
| "grad_norm": 1.1233973407058262, |
| "learning_rate": 2.2318214542836575e-07, |
| "loss": 0.4442, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.002319981440148479, |
| "grad_norm": 1.0934034827825632, |
| "learning_rate": 2.3118150547956164e-07, |
| "loss": 0.4746, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.0023999808001535987, |
| "grad_norm": 0.9547032856231036, |
| "learning_rate": 2.3918086553075753e-07, |
| "loss": 0.4631, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.002479980160158719, |
| "grad_norm": 1.011447249716743, |
| "learning_rate": 2.4718022558195345e-07, |
| "loss": 0.4359, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.0025599795201638386, |
| "grad_norm": 0.7506687829382205, |
| "learning_rate": 2.5517958563314936e-07, |
| "loss": 0.4554, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.002639978880168959, |
| "grad_norm": 0.8268141903301145, |
| "learning_rate": 2.631789456843453e-07, |
| "loss": 0.4444, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.0027199782401740786, |
| "grad_norm": 0.9120933866995852, |
| "learning_rate": 2.711783057355412e-07, |
| "loss": 0.4662, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.0027999776001791984, |
| "grad_norm": 0.9481570868182263, |
| "learning_rate": 2.791776657867371e-07, |
| "loss": 0.4677, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.0028799769601843186, |
| "grad_norm": 0.8010445040150771, |
| "learning_rate": 2.8717702583793297e-07, |
| "loss": 0.4196, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.0029599763201894383, |
| "grad_norm": 0.8670839473253033, |
| "learning_rate": 2.951763858891289e-07, |
| "loss": 0.4492, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.0030399756801945585, |
| "grad_norm": 0.8536232676044045, |
| "learning_rate": 3.031757459403248e-07, |
| "loss": 0.4375, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.0031199750401996783, |
| "grad_norm": 0.8135677817452803, |
| "learning_rate": 3.111751059915207e-07, |
| "loss": 0.438, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.0031999744002047985, |
| "grad_norm": 0.9667962048008838, |
| "learning_rate": 3.191744660427166e-07, |
| "loss": 0.4348, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.0032799737602099183, |
| "grad_norm": 0.885059264680228, |
| "learning_rate": 3.271738260939125e-07, |
| "loss": 0.4295, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.0033599731202150385, |
| "grad_norm": 0.7112007856245484, |
| "learning_rate": 3.3517318614510846e-07, |
| "loss": 0.4412, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.0034399724802201583, |
| "grad_norm": 0.9486242885147141, |
| "learning_rate": 3.431725461963043e-07, |
| "loss": 0.4357, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.003519971840225278, |
| "grad_norm": 0.7792187180449996, |
| "learning_rate": 3.5117190624750024e-07, |
| "loss": 0.4053, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.0035999712002303982, |
| "grad_norm": 0.8013118911013118, |
| "learning_rate": 3.591712662986961e-07, |
| "loss": 0.4526, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.003679970560235518, |
| "grad_norm": 0.9290353407648725, |
| "learning_rate": 3.6717062634989207e-07, |
| "loss": 0.4394, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.003759969920240638, |
| "grad_norm": 0.7440275824040646, |
| "learning_rate": 3.7516998640108793e-07, |
| "loss": 0.4354, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.003839969280245758, |
| "grad_norm": 0.6661735557442742, |
| "learning_rate": 3.8316934645228385e-07, |
| "loss": 0.4132, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.003919968640250878, |
| "grad_norm": 0.80714967397643, |
| "learning_rate": 3.9116870650347976e-07, |
| "loss": 0.4247, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.003999968000255998, |
| "grad_norm": 0.7269691108846945, |
| "learning_rate": 3.9916806655467563e-07, |
| "loss": 0.4308, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.004079967360261118, |
| "grad_norm": 0.8112092339496537, |
| "learning_rate": 4.071674266058716e-07, |
| "loss": 0.4215, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.0041599667202662375, |
| "grad_norm": 0.8465738727092779, |
| "learning_rate": 4.1516678665706746e-07, |
| "loss": 0.4438, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.004239966080271358, |
| "grad_norm": 0.7947114246478345, |
| "learning_rate": 4.2316614670826337e-07, |
| "loss": 0.402, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.004319965440276478, |
| "grad_norm": 0.771445835897062, |
| "learning_rate": 4.311655067594593e-07, |
| "loss": 0.4434, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.004399964800281598, |
| "grad_norm": 0.7143580000434352, |
| "learning_rate": 4.3916486681065515e-07, |
| "loss": 0.4227, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.004479964160286717, |
| "grad_norm": 0.7487773075552359, |
| "learning_rate": 4.471642268618511e-07, |
| "loss": 0.438, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.004559963520291838, |
| "grad_norm": 0.7373810653849879, |
| "learning_rate": 4.55163586913047e-07, |
| "loss": 0.4288, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.004639962880296958, |
| "grad_norm": 0.7104505393064982, |
| "learning_rate": 4.631629469642429e-07, |
| "loss": 0.4462, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.004719962240302078, |
| "grad_norm": 0.6996674322090365, |
| "learning_rate": 4.7116230701543876e-07, |
| "loss": 0.4263, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.004799961600307197, |
| "grad_norm": 0.779468888606517, |
| "learning_rate": 4.791616670666347e-07, |
| "loss": 0.4378, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.0048799609603123175, |
| "grad_norm": 0.6348456379271163, |
| "learning_rate": 4.871610271178306e-07, |
| "loss": 0.4098, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.004959960320317438, |
| "grad_norm": 0.6384341588537313, |
| "learning_rate": 4.951603871690265e-07, |
| "loss": 0.4489, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.005039959680322557, |
| "grad_norm": 0.6779051066808497, |
| "learning_rate": 5.031597472202224e-07, |
| "loss": 0.4522, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.005119959040327677, |
| "grad_norm": 0.7405337420145796, |
| "learning_rate": 5.111591072714183e-07, |
| "loss": 0.4248, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.0051999584003327975, |
| "grad_norm": 0.6915688533513078, |
| "learning_rate": 5.191584673226143e-07, |
| "loss": 0.4129, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.005279957760337918, |
| "grad_norm": 0.6044444399034763, |
| "learning_rate": 5.271578273738101e-07, |
| "loss": 0.4237, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.005359957120343037, |
| "grad_norm": 0.7401624173362278, |
| "learning_rate": 5.351571874250061e-07, |
| "loss": 0.4216, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.005439956480348157, |
| "grad_norm": 0.7496141168534413, |
| "learning_rate": 5.43156547476202e-07, |
| "loss": 0.4268, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.005519955840353277, |
| "grad_norm": 0.730614366570662, |
| "learning_rate": 5.511559075273978e-07, |
| "loss": 0.4589, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.005599955200358397, |
| "grad_norm": 0.7836211351094068, |
| "learning_rate": 5.591552675785937e-07, |
| "loss": 0.4176, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.005679954560363517, |
| "grad_norm": 0.6648189022606215, |
| "learning_rate": 5.671546276297896e-07, |
| "loss": 0.4274, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.005759953920368637, |
| "grad_norm": 0.6607901995290275, |
| "learning_rate": 5.751539876809856e-07, |
| "loss": 0.428, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.005839953280373757, |
| "grad_norm": 0.8422142881250906, |
| "learning_rate": 5.831533477321815e-07, |
| "loss": 0.4206, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.005919952640378877, |
| "grad_norm": 0.8636103630308042, |
| "learning_rate": 5.911527077833774e-07, |
| "loss": 0.4384, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.005999952000383997, |
| "grad_norm": 0.7020283376881055, |
| "learning_rate": 5.991520678345733e-07, |
| "loss": 0.4336, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.006079951360389117, |
| "grad_norm": 0.6637768636661737, |
| "learning_rate": 6.071514278857692e-07, |
| "loss": 0.421, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.006159950720394236, |
| "grad_norm": 0.6720228370970702, |
| "learning_rate": 6.151507879369651e-07, |
| "loss": 0.4425, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.006239950080399357, |
| "grad_norm": 0.7915308024009126, |
| "learning_rate": 6.231501479881609e-07, |
| "loss": 0.4665, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.006319949440404477, |
| "grad_norm": 0.7005483141808596, |
| "learning_rate": 6.311495080393569e-07, |
| "loss": 0.4371, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.006399948800409597, |
| "grad_norm": 0.7085842808188052, |
| "learning_rate": 6.391488680905528e-07, |
| "loss": 0.4481, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.006479948160414716, |
| "grad_norm": 0.797364113205394, |
| "learning_rate": 6.471482281417488e-07, |
| "loss": 0.43, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.0065599475204198366, |
| "grad_norm": 0.7769920944189029, |
| "learning_rate": 6.551475881929446e-07, |
| "loss": 0.4421, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.006639946880424957, |
| "grad_norm": 0.7190098449382926, |
| "learning_rate": 6.631469482441405e-07, |
| "loss": 0.4524, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.006719946240430077, |
| "grad_norm": 0.8999089940382301, |
| "learning_rate": 6.711463082953363e-07, |
| "loss": 0.4327, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.006799945600435196, |
| "grad_norm": 0.8071258672281636, |
| "learning_rate": 6.791456683465323e-07, |
| "loss": 0.3999, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.0068799449604403165, |
| "grad_norm": 0.9718244104757909, |
| "learning_rate": 6.871450283977283e-07, |
| "loss": 0.4109, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.006959944320445437, |
| "grad_norm": 0.7260805899057289, |
| "learning_rate": 6.951443884489241e-07, |
| "loss": 0.4254, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.007039943680450556, |
| "grad_norm": 0.6793848246836304, |
| "learning_rate": 7.031437485001201e-07, |
| "loss": 0.4168, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.007119943040455676, |
| "grad_norm": 0.8220405330990818, |
| "learning_rate": 7.11143108551316e-07, |
| "loss": 0.4368, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.0071999424004607964, |
| "grad_norm": 0.8650021092848164, |
| "learning_rate": 7.191424686025118e-07, |
| "loss": 0.4336, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.007279941760465917, |
| "grad_norm": 0.6667689577948444, |
| "learning_rate": 7.271418286537078e-07, |
| "loss": 0.4325, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.007359941120471036, |
| "grad_norm": 0.6443211014509481, |
| "learning_rate": 7.351411887049036e-07, |
| "loss": 0.443, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.007439940480476156, |
| "grad_norm": 0.6067913066251965, |
| "learning_rate": 7.431405487560996e-07, |
| "loss": 0.4095, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.007519939840481276, |
| "grad_norm": 0.6935512372810877, |
| "learning_rate": 7.511399088072954e-07, |
| "loss": 0.4272, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.007599939200486396, |
| "grad_norm": 0.6599113657719949, |
| "learning_rate": 7.591392688584914e-07, |
| "loss": 0.3942, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.007679938560491516, |
| "grad_norm": 0.611225655896161, |
| "learning_rate": 7.671386289096873e-07, |
| "loss": 0.4451, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.007759937920496636, |
| "grad_norm": 0.757015652343119, |
| "learning_rate": 7.751379889608831e-07, |
| "loss": 0.4059, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.007839937280501755, |
| "grad_norm": 0.8456384517410107, |
| "learning_rate": 7.831373490120791e-07, |
| "loss": 0.4278, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.007919936640506876, |
| "grad_norm": 0.7996016208371923, |
| "learning_rate": 7.911367090632751e-07, |
| "loss": 0.4536, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.007999936000511996, |
| "grad_norm": 0.6228736248515305, |
| "learning_rate": 7.991360691144709e-07, |
| "loss": 0.3969, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.008079935360517116, |
| "grad_norm": 0.6440105424946402, |
| "learning_rate": 8.071354291656668e-07, |
| "loss": 0.4307, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.008159934720522236, |
| "grad_norm": 0.6636370756757088, |
| "learning_rate": 8.151347892168628e-07, |
| "loss": 0.4019, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.008239934080527356, |
| "grad_norm": 0.6534728964626215, |
| "learning_rate": 8.231341492680586e-07, |
| "loss": 0.4357, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.008319933440532475, |
| "grad_norm": 0.6854238349847609, |
| "learning_rate": 8.311335093192545e-07, |
| "loss": 0.4156, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.008399932800537595, |
| "grad_norm": 0.7692797686595098, |
| "learning_rate": 8.391328693704504e-07, |
| "loss": 0.4364, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.008479932160542715, |
| "grad_norm": 0.6892139807862941, |
| "learning_rate": 8.471322294216464e-07, |
| "loss": 0.4595, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.008559931520547836, |
| "grad_norm": 0.6786670330601587, |
| "learning_rate": 8.551315894728423e-07, |
| "loss": 0.4207, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.008639930880552956, |
| "grad_norm": 0.6118366771253826, |
| "learning_rate": 8.631309495240381e-07, |
| "loss": 0.4186, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.008719930240558076, |
| "grad_norm": 0.7157718863255984, |
| "learning_rate": 8.711303095752341e-07, |
| "loss": 0.4412, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.008799929600563196, |
| "grad_norm": 0.7102096103999658, |
| "learning_rate": 8.791296696264299e-07, |
| "loss": 0.4205, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.008879928960568315, |
| "grad_norm": 0.6326552697725677, |
| "learning_rate": 8.871290296776258e-07, |
| "loss": 0.4161, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.008959928320573435, |
| "grad_norm": 0.7455469536196851, |
| "learning_rate": 8.951283897288219e-07, |
| "loss": 0.4259, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.009039927680578555, |
| "grad_norm": 0.8219540514047248, |
| "learning_rate": 9.031277497800177e-07, |
| "loss": 0.4423, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.009119927040583675, |
| "grad_norm": 0.6428846698552027, |
| "learning_rate": 9.111271098312136e-07, |
| "loss": 0.432, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.009199926400588795, |
| "grad_norm": 0.7959015617833209, |
| "learning_rate": 9.191264698824094e-07, |
| "loss": 0.4374, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.009279925760593916, |
| "grad_norm": 0.722690696817927, |
| "learning_rate": 9.271258299336054e-07, |
| "loss": 0.4177, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.009359925120599036, |
| "grad_norm": 0.7583146312532495, |
| "learning_rate": 9.351251899848013e-07, |
| "loss": 0.4349, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.009439924480604156, |
| "grad_norm": 0.8841341873240007, |
| "learning_rate": 9.431245500359971e-07, |
| "loss": 0.4323, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.009519923840609274, |
| "grad_norm": 0.6844990736142492, |
| "learning_rate": 9.511239100871932e-07, |
| "loss": 0.4195, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.009599923200614395, |
| "grad_norm": 0.7129122863481299, |
| "learning_rate": 9.59123270138389e-07, |
| "loss": 0.4088, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.009679922560619515, |
| "grad_norm": 0.6532760834208395, |
| "learning_rate": 9.67122630189585e-07, |
| "loss": 0.3969, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.009759921920624635, |
| "grad_norm": 0.7563260927255472, |
| "learning_rate": 9.75121990240781e-07, |
| "loss": 0.4075, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.009839921280629755, |
| "grad_norm": 0.7704505024635662, |
| "learning_rate": 9.831213502919768e-07, |
| "loss": 0.3981, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.009919920640634875, |
| "grad_norm": 0.7056792723024738, |
| "learning_rate": 9.911207103431725e-07, |
| "loss": 0.4223, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.009999920000639996, |
| "grad_norm": 0.750390399783068, |
| "learning_rate": 9.991200703943684e-07, |
| "loss": 0.4102, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.010079919360645114, |
| "grad_norm": 0.7529460057363149, |
| "learning_rate": 1.0071194304455646e-06, |
| "loss": 0.4278, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.010159918720650234, |
| "grad_norm": 1.041695148553965, |
| "learning_rate": 1.0151187904967603e-06, |
| "loss": 0.4261, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.010239918080655355, |
| "grad_norm": 0.8098838670880232, |
| "learning_rate": 1.0231181505479562e-06, |
| "loss": 0.4163, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.010319917440660475, |
| "grad_norm": 0.7139180273227849, |
| "learning_rate": 1.0311175105991523e-06, |
| "loss": 0.4139, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.010399916800665595, |
| "grad_norm": 0.7293628900497775, |
| "learning_rate": 1.039116870650348e-06, |
| "loss": 0.4139, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.010479916160670715, |
| "grad_norm": 3.9899125178456276, |
| "learning_rate": 1.047116230701544e-06, |
| "loss": 0.4352, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.010559915520675835, |
| "grad_norm": 0.741428608083807, |
| "learning_rate": 1.0551155907527398e-06, |
| "loss": 0.4487, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.010639914880680954, |
| "grad_norm": 0.7645959836257802, |
| "learning_rate": 1.0631149508039358e-06, |
| "loss": 0.4509, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.010719914240686074, |
| "grad_norm": 0.6630163901596575, |
| "learning_rate": 1.0711143108551317e-06, |
| "loss": 0.422, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.010799913600691194, |
| "grad_norm": 0.7653242364061862, |
| "learning_rate": 1.0791136709063276e-06, |
| "loss": 0.4047, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.010879912960696314, |
| "grad_norm": 0.6952138151449674, |
| "learning_rate": 1.0871130309575235e-06, |
| "loss": 0.424, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.010959912320701435, |
| "grad_norm": 0.8400358998787617, |
| "learning_rate": 1.0951123910087194e-06, |
| "loss": 0.4329, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.011039911680706555, |
| "grad_norm": 0.7959368745719518, |
| "learning_rate": 1.1031117510599153e-06, |
| "loss": 0.4296, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.011119911040711675, |
| "grad_norm": 0.7182831330712102, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 0.4053, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.011199910400716794, |
| "grad_norm": 0.7631459457678664, |
| "learning_rate": 1.1191104711623072e-06, |
| "loss": 0.4174, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.011279909760721914, |
| "grad_norm": 0.6904060292491346, |
| "learning_rate": 1.127109831213503e-06, |
| "loss": 0.4262, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.011359909120727034, |
| "grad_norm": 0.7154968430224656, |
| "learning_rate": 1.1351091912646988e-06, |
| "loss": 0.4116, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.011439908480732154, |
| "grad_norm": 0.6430506977204669, |
| "learning_rate": 1.143108551315895e-06, |
| "loss": 0.3985, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.011519907840737274, |
| "grad_norm": 0.6060536937789834, |
| "learning_rate": 1.1511079113670908e-06, |
| "loss": 0.4366, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.011599907200742395, |
| "grad_norm": 0.7070045694797042, |
| "learning_rate": 1.1591072714182865e-06, |
| "loss": 0.3951, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.011679906560747515, |
| "grad_norm": 0.6799481270205386, |
| "learning_rate": 1.1671066314694824e-06, |
| "loss": 0.4277, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.011759905920752633, |
| "grad_norm": 0.7706688131579046, |
| "learning_rate": 1.1751059915206786e-06, |
| "loss": 0.4106, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.011839905280757753, |
| "grad_norm": 0.7659401655636618, |
| "learning_rate": 1.1831053515718743e-06, |
| "loss": 0.4147, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.011919904640762874, |
| "grad_norm": 0.7880351649260696, |
| "learning_rate": 1.1911047116230702e-06, |
| "loss": 0.4018, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.011999904000767994, |
| "grad_norm": 0.6643952514129879, |
| "learning_rate": 1.1991040716742661e-06, |
| "loss": 0.4225, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.012079903360773114, |
| "grad_norm": 0.7410891062680932, |
| "learning_rate": 1.207103431725462e-06, |
| "loss": 0.4235, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.012159902720778234, |
| "grad_norm": 0.6702465025094532, |
| "learning_rate": 1.215102791776658e-06, |
| "loss": 0.4485, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.012239902080783354, |
| "grad_norm": 0.72511805103151, |
| "learning_rate": 1.2231021518278539e-06, |
| "loss": 0.4075, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.012319901440788473, |
| "grad_norm": 0.7364457263640842, |
| "learning_rate": 1.2311015118790498e-06, |
| "loss": 0.4209, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.012399900800793593, |
| "grad_norm": 0.6760801533876919, |
| "learning_rate": 1.2391008719302457e-06, |
| "loss": 0.4153, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.012479900160798713, |
| "grad_norm": 1.329240774622819, |
| "learning_rate": 1.2471002319814416e-06, |
| "loss": 0.4569, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.012559899520803833, |
| "grad_norm": 0.7197695590369367, |
| "learning_rate": 1.2550995920326375e-06, |
| "loss": 0.4148, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.012639898880808954, |
| "grad_norm": 0.7197146321929337, |
| "learning_rate": 1.2630989520838332e-06, |
| "loss": 0.4309, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.012719898240814074, |
| "grad_norm": 0.6941046794760515, |
| "learning_rate": 1.2710983121350293e-06, |
| "loss": 0.4266, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.012799897600819194, |
| "grad_norm": 0.6685733935987979, |
| "learning_rate": 1.2790976721862253e-06, |
| "loss": 0.4148, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.012879896960824314, |
| "grad_norm": 0.829963069106683, |
| "learning_rate": 1.287097032237421e-06, |
| "loss": 0.438, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.012959896320829433, |
| "grad_norm": 0.8772001383565942, |
| "learning_rate": 1.295096392288617e-06, |
| "loss": 0.4167, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.013039895680834553, |
| "grad_norm": 0.7693529714105686, |
| "learning_rate": 1.303095752339813e-06, |
| "loss": 0.4465, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.013119895040839673, |
| "grad_norm": 0.6969819161826472, |
| "learning_rate": 1.3110951123910087e-06, |
| "loss": 0.4374, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.013199894400844793, |
| "grad_norm": 0.7245418409893126, |
| "learning_rate": 1.3190944724422048e-06, |
| "loss": 0.4375, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.013279893760849914, |
| "grad_norm": 0.6704658854506884, |
| "learning_rate": 1.3270938324934008e-06, |
| "loss": 0.4144, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.013359893120855034, |
| "grad_norm": 0.7596780468310279, |
| "learning_rate": 1.3350931925445965e-06, |
| "loss": 0.4142, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.013439892480860154, |
| "grad_norm": 0.799033496950235, |
| "learning_rate": 1.3430925525957924e-06, |
| "loss": 0.424, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.013519891840865272, |
| "grad_norm": 0.8360146268303155, |
| "learning_rate": 1.3510919126469885e-06, |
| "loss": 0.4104, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.013599891200870393, |
| "grad_norm": 0.7830381641790578, |
| "learning_rate": 1.3590912726981842e-06, |
| "loss": 0.4067, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.013679890560875513, |
| "grad_norm": 0.7376045815830132, |
| "learning_rate": 1.3670906327493801e-06, |
| "loss": 0.4232, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.013759889920880633, |
| "grad_norm": 0.7780257393234633, |
| "learning_rate": 1.3750899928005762e-06, |
| "loss": 0.4001, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.013839889280885753, |
| "grad_norm": 0.7400853428241427, |
| "learning_rate": 1.383089352851772e-06, |
| "loss": 0.4283, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.013919888640890873, |
| "grad_norm": 0.6301335389858812, |
| "learning_rate": 1.3910887129029679e-06, |
| "loss": 0.4315, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.013999888000895994, |
| "grad_norm": 0.6873058205506691, |
| "learning_rate": 1.3990880729541636e-06, |
| "loss": 0.4294, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.014079887360901112, |
| "grad_norm": 0.6118849450571252, |
| "learning_rate": 1.4070874330053597e-06, |
| "loss": 0.4116, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.014159886720906232, |
| "grad_norm": 0.8643963519063279, |
| "learning_rate": 1.4150867930565556e-06, |
| "loss": 0.4189, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.014239886080911352, |
| "grad_norm": 0.6986286668684888, |
| "learning_rate": 1.4230861531077513e-06, |
| "loss": 0.4393, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.014319885440916473, |
| "grad_norm": 0.7361554237470329, |
| "learning_rate": 1.4310855131589474e-06, |
| "loss": 0.4048, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.014399884800921593, |
| "grad_norm": 0.7526595512221772, |
| "learning_rate": 1.4390848732101434e-06, |
| "loss": 0.4434, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.014479884160926713, |
| "grad_norm": 0.6793121490590498, |
| "learning_rate": 1.447084233261339e-06, |
| "loss": 0.4313, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.014559883520931833, |
| "grad_norm": 0.7503593630371851, |
| "learning_rate": 1.4550835933125352e-06, |
| "loss": 0.4363, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.014639882880936952, |
| "grad_norm": 0.6631126747097913, |
| "learning_rate": 1.463082953363731e-06, |
| "loss": 0.4172, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.014719882240942072, |
| "grad_norm": 0.6201041671912539, |
| "learning_rate": 1.4710823134149268e-06, |
| "loss": 0.426, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.014799881600947192, |
| "grad_norm": 0.7986575840999489, |
| "learning_rate": 1.479081673466123e-06, |
| "loss": 0.4418, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.014879880960952312, |
| "grad_norm": 0.7485644191714067, |
| "learning_rate": 1.4870810335173188e-06, |
| "loss": 0.4315, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.014959880320957433, |
| "grad_norm": 0.767180980787683, |
| "learning_rate": 1.4950803935685146e-06, |
| "loss": 0.44, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.015039879680962553, |
| "grad_norm": 0.7085244935047633, |
| "learning_rate": 1.5030797536197107e-06, |
| "loss": 0.4197, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.015119879040967673, |
| "grad_norm": 0.9096558305501717, |
| "learning_rate": 1.5110791136709064e-06, |
| "loss": 0.4276, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.015199878400972791, |
| "grad_norm": 0.7782449144127891, |
| "learning_rate": 1.5190784737221023e-06, |
| "loss": 0.4433, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.015279877760977912, |
| "grad_norm": 0.6784288997728832, |
| "learning_rate": 1.5270778337732984e-06, |
| "loss": 0.408, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.015359877120983032, |
| "grad_norm": 0.7394160138728095, |
| "learning_rate": 1.5350771938244941e-06, |
| "loss": 0.427, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.015439876480988152, |
| "grad_norm": 0.7020184796096351, |
| "learning_rate": 1.54307655387569e-06, |
| "loss": 0.4585, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.015519875840993272, |
| "grad_norm": 0.6570216742745165, |
| "learning_rate": 1.551075913926886e-06, |
| "loss": 0.4203, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.015599875200998392, |
| "grad_norm": 0.6492915506893296, |
| "learning_rate": 1.5590752739780819e-06, |
| "loss": 0.4225, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.01567987456100351, |
| "grad_norm": 0.6404207960330748, |
| "learning_rate": 1.5670746340292778e-06, |
| "loss": 0.4155, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.01575987392100863, |
| "grad_norm": 0.7069937527425317, |
| "learning_rate": 1.5750739940804737e-06, |
| "loss": 0.4017, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.01583987328101375, |
| "grad_norm": 0.7497814910327999, |
| "learning_rate": 1.5830733541316694e-06, |
| "loss": 0.4336, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.01591987264101887, |
| "grad_norm": 0.997502334828043, |
| "learning_rate": 1.5910727141828655e-06, |
| "loss": 0.4252, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.01599987200102399, |
| "grad_norm": 0.7051737520195863, |
| "learning_rate": 1.5990720742340615e-06, |
| "loss": 0.4136, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.016079871361029112, |
| "grad_norm": 0.7358677233078202, |
| "learning_rate": 1.6070714342852572e-06, |
| "loss": 0.4041, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.016159870721034232, |
| "grad_norm": 0.7232160368639224, |
| "learning_rate": 1.6150707943364533e-06, |
| "loss": 0.3925, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.016239870081039352, |
| "grad_norm": 0.7492396218793221, |
| "learning_rate": 1.6230701543876492e-06, |
| "loss": 0.4453, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.016319869441044473, |
| "grad_norm": 0.7879468186487532, |
| "learning_rate": 1.631069514438845e-06, |
| "loss": 0.4143, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.016399868801049593, |
| "grad_norm": 0.7027247029038095, |
| "learning_rate": 1.639068874490041e-06, |
| "loss": 0.4487, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.016479868161054713, |
| "grad_norm": 0.6646702988688921, |
| "learning_rate": 1.6470682345412367e-06, |
| "loss": 0.4189, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.016559867521059833, |
| "grad_norm": 0.7214592394412016, |
| "learning_rate": 1.6550675945924326e-06, |
| "loss": 0.4298, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.01663986688106495, |
| "grad_norm": 0.7009780752105863, |
| "learning_rate": 1.6630669546436288e-06, |
| "loss": 0.43, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.01671986624107007, |
| "grad_norm": 0.6802031501540443, |
| "learning_rate": 1.6710663146948245e-06, |
| "loss": 0.4064, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.01679986560107519, |
| "grad_norm": 0.7764568933093239, |
| "learning_rate": 1.6790656747460204e-06, |
| "loss": 0.4192, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.01687986496108031, |
| "grad_norm": 0.7257831241745193, |
| "learning_rate": 1.6870650347972165e-06, |
| "loss": 0.4137, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.01695986432108543, |
| "grad_norm": 0.7231093177815116, |
| "learning_rate": 1.6950643948484122e-06, |
| "loss": 0.4321, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.01703986368109055, |
| "grad_norm": 0.7604848904942334, |
| "learning_rate": 1.7030637548996081e-06, |
| "loss": 0.4173, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.01711986304109567, |
| "grad_norm": 0.8671202797505491, |
| "learning_rate": 1.7110631149508043e-06, |
| "loss": 0.4172, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.01719986240110079, |
| "grad_norm": 0.8818905733108134, |
| "learning_rate": 1.719062475002e-06, |
| "loss": 0.4096, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.01727986176110591, |
| "grad_norm": 0.7073377083366502, |
| "learning_rate": 1.7270618350531959e-06, |
| "loss": 0.4327, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.01735986112111103, |
| "grad_norm": 0.9637856611728648, |
| "learning_rate": 1.735061195104392e-06, |
| "loss": 0.457, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.017439860481116152, |
| "grad_norm": 0.7763253238443666, |
| "learning_rate": 1.7430605551555877e-06, |
| "loss": 0.4444, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.017519859841121272, |
| "grad_norm": 0.7532387544984117, |
| "learning_rate": 1.7510599152067836e-06, |
| "loss": 0.4193, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.017599859201126392, |
| "grad_norm": 0.7001703245870058, |
| "learning_rate": 1.7590592752579793e-06, |
| "loss": 0.4334, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.017679858561131512, |
| "grad_norm": 0.653320909525973, |
| "learning_rate": 1.7670586353091755e-06, |
| "loss": 0.4137, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.01775985792113663, |
| "grad_norm": 0.6431905950303065, |
| "learning_rate": 1.7750579953603714e-06, |
| "loss": 0.4056, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.01783985728114175, |
| "grad_norm": 0.6355510699319944, |
| "learning_rate": 1.783057355411567e-06, |
| "loss": 0.3967, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.01791985664114687, |
| "grad_norm": 1.2693108108319435, |
| "learning_rate": 1.7910567154627632e-06, |
| "loss": 0.3958, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.01799985600115199, |
| "grad_norm": 0.7084484301635297, |
| "learning_rate": 1.7990560755139591e-06, |
| "loss": 0.4341, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.01807985536115711, |
| "grad_norm": 0.7305495029250423, |
| "learning_rate": 1.8070554355651548e-06, |
| "loss": 0.4343, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.01815985472116223, |
| "grad_norm": 0.7855576477357182, |
| "learning_rate": 1.8150547956163507e-06, |
| "loss": 0.4158, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.01823985408116735, |
| "grad_norm": 0.8272843321029247, |
| "learning_rate": 1.8230541556675469e-06, |
| "loss": 0.4355, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.01831985344117247, |
| "grad_norm": 0.753702291149743, |
| "learning_rate": 1.8310535157187426e-06, |
| "loss": 0.4025, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.01839985280117759, |
| "grad_norm": 0.7598104262546688, |
| "learning_rate": 1.8390528757699385e-06, |
| "loss": 0.413, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.01847985216118271, |
| "grad_norm": 0.7611395927729616, |
| "learning_rate": 1.8470522358211346e-06, |
| "loss": 0.4373, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.01855985152118783, |
| "grad_norm": 0.760722694363519, |
| "learning_rate": 1.8550515958723303e-06, |
| "loss": 0.4179, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.01863985088119295, |
| "grad_norm": 0.5957047498546761, |
| "learning_rate": 1.8630509559235262e-06, |
| "loss": 0.4231, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.01871985024119807, |
| "grad_norm": 0.7186940138009597, |
| "learning_rate": 1.8710503159747224e-06, |
| "loss": 0.4171, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.018799849601203192, |
| "grad_norm": 0.7760364408450707, |
| "learning_rate": 1.879049676025918e-06, |
| "loss": 0.4003, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.018879848961208312, |
| "grad_norm": 0.7481302705420546, |
| "learning_rate": 1.887049036077114e-06, |
| "loss": 0.4182, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.01895984832121343, |
| "grad_norm": 0.7358536669321117, |
| "learning_rate": 1.8950483961283097e-06, |
| "loss": 0.4272, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.01903984768121855, |
| "grad_norm": 0.7602582219791679, |
| "learning_rate": 1.9030477561795058e-06, |
| "loss": 0.4157, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.01911984704122367, |
| "grad_norm": 0.7426387012574098, |
| "learning_rate": 1.911047116230702e-06, |
| "loss": 0.4136, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.01919984640122879, |
| "grad_norm": 0.6954613448087369, |
| "learning_rate": 1.9190464762818974e-06, |
| "loss": 0.4081, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.01927984576123391, |
| "grad_norm": 0.7604294663875293, |
| "learning_rate": 1.9270458363330933e-06, |
| "loss": 0.4116, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.01935984512123903, |
| "grad_norm": 0.7196806209080392, |
| "learning_rate": 1.9350451963842897e-06, |
| "loss": 0.4253, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.01943984448124415, |
| "grad_norm": 0.7619473447002568, |
| "learning_rate": 1.943044556435485e-06, |
| "loss": 0.4048, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.01951984384124927, |
| "grad_norm": 0.7052222380767645, |
| "learning_rate": 1.951043916486681e-06, |
| "loss": 0.4092, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.01959984320125439, |
| "grad_norm": 0.6311702076235657, |
| "learning_rate": 1.9590432765378774e-06, |
| "loss": 0.409, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.01967984256125951, |
| "grad_norm": 0.7989650814927569, |
| "learning_rate": 1.967042636589073e-06, |
| "loss": 0.4031, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.01975984192126463, |
| "grad_norm": 0.7243725001067509, |
| "learning_rate": 1.975041996640269e-06, |
| "loss": 0.4139, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.01983984128126975, |
| "grad_norm": 0.7689695050248523, |
| "learning_rate": 1.983041356691465e-06, |
| "loss": 0.419, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.01991984064127487, |
| "grad_norm": 0.6784111247678405, |
| "learning_rate": 1.9910407167426607e-06, |
| "loss": 0.4255, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.01999984000127999, |
| "grad_norm": 0.696241896489532, |
| "learning_rate": 1.9990400767938566e-06, |
| "loss": 0.4387, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.020079839361285108, |
| "grad_norm": 0.7337662500397681, |
| "learning_rate": 2.0070394368450525e-06, |
| "loss": 0.4107, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.02015983872129023, |
| "grad_norm": 0.7522121421950386, |
| "learning_rate": 2.0150387968962484e-06, |
| "loss": 0.4341, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.02023983808129535, |
| "grad_norm": 0.7635117961228429, |
| "learning_rate": 2.0230381569474443e-06, |
| "loss": 0.444, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.02031983744130047, |
| "grad_norm": 0.7258340461733414, |
| "learning_rate": 2.0310375169986402e-06, |
| "loss": 0.4532, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.02039983680130559, |
| "grad_norm": 0.8227531741667117, |
| "learning_rate": 2.039036877049836e-06, |
| "loss": 0.4211, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.02047983616131071, |
| "grad_norm": 0.8046278992558573, |
| "learning_rate": 2.047036237101032e-06, |
| "loss": 0.4304, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.02055983552131583, |
| "grad_norm": 0.8583586766230429, |
| "learning_rate": 2.055035597152228e-06, |
| "loss": 0.389, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.02063983488132095, |
| "grad_norm": 0.7059091393994599, |
| "learning_rate": 2.063034957203424e-06, |
| "loss": 0.4143, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.02071983424132607, |
| "grad_norm": 0.7169812723780176, |
| "learning_rate": 2.07103431725462e-06, |
| "loss": 0.4231, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.02079983360133119, |
| "grad_norm": 0.744498117665369, |
| "learning_rate": 2.0790336773058157e-06, |
| "loss": 0.3931, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.02087983296133631, |
| "grad_norm": 0.8714679551154206, |
| "learning_rate": 2.0870330373570117e-06, |
| "loss": 0.4227, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.02095983232134143, |
| "grad_norm": 0.6310793333444085, |
| "learning_rate": 2.0950323974082076e-06, |
| "loss": 0.4092, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.02103983168134655, |
| "grad_norm": 0.7793866057944923, |
| "learning_rate": 2.1030317574594035e-06, |
| "loss": 0.4139, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.02111983104135167, |
| "grad_norm": 0.8117526334063988, |
| "learning_rate": 2.1110311175105994e-06, |
| "loss": 0.4154, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.021199830401356787, |
| "grad_norm": 0.7466901509429923, |
| "learning_rate": 2.1190304775617953e-06, |
| "loss": 0.4139, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.021279829761361908, |
| "grad_norm": 0.9396797684739518, |
| "learning_rate": 2.1270298376129912e-06, |
| "loss": 0.4166, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.021359829121367028, |
| "grad_norm": 0.7212101339600039, |
| "learning_rate": 2.135029197664187e-06, |
| "loss": 0.4299, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.021439828481372148, |
| "grad_norm": 0.9051397916977733, |
| "learning_rate": 2.1430285577153826e-06, |
| "loss": 0.4203, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.021519827841377268, |
| "grad_norm": 0.6759148258397492, |
| "learning_rate": 2.151027917766579e-06, |
| "loss": 0.4065, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.02159982720138239, |
| "grad_norm": 0.7191776566727898, |
| "learning_rate": 2.159027277817775e-06, |
| "loss": 0.3957, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.02167982656138751, |
| "grad_norm": 0.7752464373073066, |
| "learning_rate": 2.1670266378689704e-06, |
| "loss": 0.4198, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.02175982592139263, |
| "grad_norm": 0.7096879372714776, |
| "learning_rate": 2.1750259979201667e-06, |
| "loss": 0.4337, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.02183982528139775, |
| "grad_norm": 0.7577341998111574, |
| "learning_rate": 2.1830253579713626e-06, |
| "loss": 0.4484, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.02191982464140287, |
| "grad_norm": 0.8303467101337862, |
| "learning_rate": 2.191024718022558e-06, |
| "loss": 0.436, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.02199982400140799, |
| "grad_norm": 0.740434972574641, |
| "learning_rate": 2.1990240780737545e-06, |
| "loss": 0.4026, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.02207982336141311, |
| "grad_norm": 0.7385703007931491, |
| "learning_rate": 2.2070234381249504e-06, |
| "loss": 0.3994, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.02215982272141823, |
| "grad_norm": 0.804160370262748, |
| "learning_rate": 2.215022798176146e-06, |
| "loss": 0.4247, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.02223982208142335, |
| "grad_norm": 0.9699580286471233, |
| "learning_rate": 2.2230221582273422e-06, |
| "loss": 0.4158, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.02231982144142847, |
| "grad_norm": 0.854317456477185, |
| "learning_rate": 2.231021518278538e-06, |
| "loss": 0.4224, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.022399820801433587, |
| "grad_norm": 0.7261443031804087, |
| "learning_rate": 2.2390208783297336e-06, |
| "loss": 0.4153, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.022479820161438707, |
| "grad_norm": 0.7221332661288903, |
| "learning_rate": 2.24702023838093e-06, |
| "loss": 0.4287, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.022559819521443827, |
| "grad_norm": 0.8879662037891538, |
| "learning_rate": 2.2550195984321255e-06, |
| "loss": 0.4221, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.022639818881448948, |
| "grad_norm": 0.7885980884870208, |
| "learning_rate": 2.2630189584833214e-06, |
| "loss": 0.4394, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.022719818241454068, |
| "grad_norm": 4.841458093614334, |
| "learning_rate": 2.2710183185345173e-06, |
| "loss": 0.4306, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.022799817601459188, |
| "grad_norm": 0.762762416297712, |
| "learning_rate": 2.279017678585713e-06, |
| "loss": 0.4248, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.022879816961464308, |
| "grad_norm": 0.8448845187481899, |
| "learning_rate": 2.287017038636909e-06, |
| "loss": 0.4016, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.02295981632146943, |
| "grad_norm": 0.7119692504285716, |
| "learning_rate": 2.295016398688105e-06, |
| "loss": 0.4187, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.02303981568147455, |
| "grad_norm": 0.7322860524669678, |
| "learning_rate": 2.303015758739301e-06, |
| "loss": 0.3992, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.02311981504147967, |
| "grad_norm": 0.7809553100217358, |
| "learning_rate": 2.311015118790497e-06, |
| "loss": 0.4323, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.02319981440148479, |
| "grad_norm": 0.7497042609672665, |
| "learning_rate": 2.3190144788416928e-06, |
| "loss": 0.4216, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.02327981376148991, |
| "grad_norm": 0.77908486703842, |
| "learning_rate": 2.3270138388928887e-06, |
| "loss": 0.4139, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.02335981312149503, |
| "grad_norm": 0.8259941131428074, |
| "learning_rate": 2.3350131989440846e-06, |
| "loss": 0.4298, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.02343981248150015, |
| "grad_norm": 0.7315958297934407, |
| "learning_rate": 2.3430125589952805e-06, |
| "loss": 0.4148, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.023519811841505266, |
| "grad_norm": 0.7127501348213877, |
| "learning_rate": 2.3510119190464764e-06, |
| "loss": 0.4226, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.023599811201510387, |
| "grad_norm": 0.7004822200975431, |
| "learning_rate": 2.3590112790976724e-06, |
| "loss": 0.4037, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.023679810561515507, |
| "grad_norm": 0.7640873416448367, |
| "learning_rate": 2.3670106391488683e-06, |
| "loss": 0.4196, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.023759809921520627, |
| "grad_norm": 0.7562584220469137, |
| "learning_rate": 2.375009999200064e-06, |
| "loss": 0.4226, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.023839809281525747, |
| "grad_norm": 0.700408619087647, |
| "learning_rate": 2.38300935925126e-06, |
| "loss": 0.4194, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.023919808641530867, |
| "grad_norm": 0.6926553567290514, |
| "learning_rate": 2.391008719302456e-06, |
| "loss": 0.4377, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.023999808001535988, |
| "grad_norm": 0.8080569171783707, |
| "learning_rate": 2.399008079353652e-06, |
| "loss": 0.425, |
| "step": 3000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 125001, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 173052491300864.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|