{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1442, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006936616665221538, "grad_norm": 0.08399141805844908, "learning_rate": 6.896551724137931e-08, "loss": 0.8362026214599609, "step": 1, "token_acc": 0.7844009193564505 }, { "epoch": 0.0013873233330443076, "grad_norm": 0.08784838389966729, "learning_rate": 1.3793103448275863e-07, "loss": 0.851883590221405, "step": 2, "token_acc": 0.7807218078926658 }, { "epoch": 0.0020809849995664614, "grad_norm": 0.07990751801277315, "learning_rate": 2.0689655172413796e-07, "loss": 0.8301538228988647, "step": 3, "token_acc": 0.7880121108903396 }, { "epoch": 0.0027746466660886152, "grad_norm": 0.08002628379159607, "learning_rate": 2.7586206896551726e-07, "loss": 0.8089802861213684, "step": 4, "token_acc": 0.7890976352821704 }, { "epoch": 0.003468308332610769, "grad_norm": 0.08075156451650223, "learning_rate": 3.4482758620689656e-07, "loss": 0.8257039785385132, "step": 5, "token_acc": 0.7880987280722492 }, { "epoch": 0.004161969999132923, "grad_norm": 0.08638121621125884, "learning_rate": 4.137931034482759e-07, "loss": 0.875056803226471, "step": 6, "token_acc": 0.7754661677580293 }, { "epoch": 0.004855631665655077, "grad_norm": 0.07970093629503781, "learning_rate": 4.827586206896552e-07, "loss": 0.8205430507659912, "step": 7, "token_acc": 0.7870488746056135 }, { "epoch": 0.0055492933321772304, "grad_norm": 0.08129008734436946, "learning_rate": 5.517241379310345e-07, "loss": 0.8296421766281128, "step": 8, "token_acc": 0.7859504722030592 }, { "epoch": 0.006242954998699385, "grad_norm": 0.08535825411600195, "learning_rate": 6.206896551724139e-07, "loss": 0.8665030002593994, "step": 9, "token_acc": 0.7765346547430091 }, { "epoch": 0.006936616665221538, "grad_norm": 0.0787300907620576, "learning_rate": 6.896551724137931e-07, "loss": 0.8208533525466919, "step": 10, "token_acc": 0.7866293374056164 }, { "epoch": 0.007630278331743692, "grad_norm": 0.0787978578859864, "learning_rate": 7.586206896551725e-07, "loss": 0.8295032382011414, "step": 11, "token_acc": 0.7848363401590918 }, { "epoch": 0.008323939998265846, "grad_norm": 0.07577369555869191, "learning_rate": 8.275862068965518e-07, "loss": 0.8084601163864136, "step": 12, "token_acc": 0.79043602219578 }, { "epoch": 0.009017601664787999, "grad_norm": 0.06841821760937615, "learning_rate": 8.965517241379311e-07, "loss": 0.8244179487228394, "step": 13, "token_acc": 0.7842221000549753 }, { "epoch": 0.009711263331310154, "grad_norm": 0.06549767335820104, "learning_rate": 9.655172413793103e-07, "loss": 0.7989367246627808, "step": 14, "token_acc": 0.7893403476447914 }, { "epoch": 0.010404924997832308, "grad_norm": 0.06583655309766888, "learning_rate": 1.0344827586206898e-06, "loss": 0.8271135687828064, "step": 15, "token_acc": 0.7798498607277133 }, { "epoch": 0.011098586664354461, "grad_norm": 0.060771499623624825, "learning_rate": 1.103448275862069e-06, "loss": 0.7940614819526672, "step": 16, "token_acc": 0.7883717050383717 }, { "epoch": 0.011792248330876614, "grad_norm": 0.0453992963677153, "learning_rate": 1.1724137931034483e-06, "loss": 0.7817755937576294, "step": 17, "token_acc": 0.7850267511831066 }, { "epoch": 0.01248590999739877, "grad_norm": 0.0438989557756506, "learning_rate": 1.2413793103448277e-06, "loss": 0.7984982132911682, "step": 18, "token_acc": 0.7822590683894739 }, { "epoch": 0.013179571663920923, "grad_norm": 0.04082740415458147, "learning_rate": 1.3103448275862072e-06, "loss": 0.7682293653488159, "step": 19, "token_acc": 0.7895700675061043 }, { "epoch": 0.013873233330443076, "grad_norm": 0.03872419750396462, "learning_rate": 1.3793103448275862e-06, "loss": 0.7530816793441772, "step": 20, "token_acc": 0.7930515920474595 }, { "epoch": 0.01456689499696523, "grad_norm": 0.03844558002633521, "learning_rate": 1.4482758620689657e-06, "loss": 0.7586860656738281, "step": 21, "token_acc": 0.7920527200657773 }, { "epoch": 0.015260556663487385, "grad_norm": 0.036636222827711375, "learning_rate": 1.517241379310345e-06, "loss": 0.7528326511383057, "step": 22, "token_acc": 0.7936526931776026 }, { "epoch": 0.015954218330009536, "grad_norm": 0.03406656488208333, "learning_rate": 1.5862068965517244e-06, "loss": 0.7563762664794922, "step": 23, "token_acc": 0.7913676782606622 }, { "epoch": 0.01664787999653169, "grad_norm": 0.034185896384559905, "learning_rate": 1.6551724137931037e-06, "loss": 0.7306485176086426, "step": 24, "token_acc": 0.7971748367446699 }, { "epoch": 0.017341541663053846, "grad_norm": 0.0342467249770327, "learning_rate": 1.724137931034483e-06, "loss": 0.7241615653038025, "step": 25, "token_acc": 0.7972557427258805 }, { "epoch": 0.018035203329575998, "grad_norm": 0.033364052369727774, "learning_rate": 1.7931034482758622e-06, "loss": 0.7143334150314331, "step": 26, "token_acc": 0.7999321434197293 }, { "epoch": 0.018728864996098153, "grad_norm": 0.031660577446445015, "learning_rate": 1.8620689655172416e-06, "loss": 0.718389630317688, "step": 27, "token_acc": 0.7989284293769449 }, { "epoch": 0.01942252666262031, "grad_norm": 0.03294794319777035, "learning_rate": 1.9310344827586207e-06, "loss": 0.731799840927124, "step": 28, "token_acc": 0.7956352625489279 }, { "epoch": 0.02011618832914246, "grad_norm": 0.03306601402264085, "learning_rate": 2.0000000000000003e-06, "loss": 0.7162213921546936, "step": 29, "token_acc": 0.7970820607315041 }, { "epoch": 0.020809849995664615, "grad_norm": 0.031253802928620386, "learning_rate": 2.0689655172413796e-06, "loss": 0.7097111344337463, "step": 30, "token_acc": 0.8004319136172765 }, { "epoch": 0.021503511662186767, "grad_norm": 0.0307058566063796, "learning_rate": 2.137931034482759e-06, "loss": 0.7243767976760864, "step": 31, "token_acc": 0.7953365585014336 }, { "epoch": 0.022197173328708922, "grad_norm": 0.02799535096418621, "learning_rate": 2.206896551724138e-06, "loss": 0.7084879875183105, "step": 32, "token_acc": 0.7990539801763685 }, { "epoch": 0.022890834995231077, "grad_norm": 0.02713980385877347, "learning_rate": 2.2758620689655173e-06, "loss": 0.708966851234436, "step": 33, "token_acc": 0.7961420267318301 }, { "epoch": 0.02358449666175323, "grad_norm": 0.024938838416353862, "learning_rate": 2.3448275862068966e-06, "loss": 0.6749756336212158, "step": 34, "token_acc": 0.8073871702795242 }, { "epoch": 0.024278158328275384, "grad_norm": 0.026809940210059995, "learning_rate": 2.4137931034482762e-06, "loss": 0.6967236995697021, "step": 35, "token_acc": 0.8005203493774391 }, { "epoch": 0.02497181999479754, "grad_norm": 0.025609426257491935, "learning_rate": 2.4827586206896555e-06, "loss": 0.6764301061630249, "step": 36, "token_acc": 0.807175776797661 }, { "epoch": 0.02566548166131969, "grad_norm": 0.021479698057608392, "learning_rate": 2.5517241379310347e-06, "loss": 0.6373404860496521, "step": 37, "token_acc": 0.8188886574831652 }, { "epoch": 0.026359143327841845, "grad_norm": 0.024178514587334516, "learning_rate": 2.6206896551724144e-06, "loss": 0.6553655862808228, "step": 38, "token_acc": 0.8131120352190827 }, { "epoch": 0.027052804994364, "grad_norm": 0.023485182988780308, "learning_rate": 2.6896551724137932e-06, "loss": 0.6682815551757812, "step": 39, "token_acc": 0.8090370117354283 }, { "epoch": 0.027746466660886152, "grad_norm": 0.023089045249078535, "learning_rate": 2.7586206896551725e-06, "loss": 0.6481136083602905, "step": 40, "token_acc": 0.8140935579636666 }, { "epoch": 0.028440128327408307, "grad_norm": 0.02076797729785688, "learning_rate": 2.827586206896552e-06, "loss": 0.6561841368675232, "step": 41, "token_acc": 0.8119998564610471 }, { "epoch": 0.02913378999393046, "grad_norm": 0.022619577587508856, "learning_rate": 2.8965517241379314e-06, "loss": 0.6610379219055176, "step": 42, "token_acc": 0.8102211411426243 }, { "epoch": 0.029827451660452614, "grad_norm": 0.018584714555695287, "learning_rate": 2.9655172413793102e-06, "loss": 0.6233815550804138, "step": 43, "token_acc": 0.8191165632503625 }, { "epoch": 0.03052111332697477, "grad_norm": 0.019031397957309525, "learning_rate": 3.03448275862069e-06, "loss": 0.6682798862457275, "step": 44, "token_acc": 0.8063199670523045 }, { "epoch": 0.03121477499349692, "grad_norm": 0.01774541392995265, "learning_rate": 3.103448275862069e-06, "loss": 0.6576106548309326, "step": 45, "token_acc": 0.8107735844331472 }, { "epoch": 0.03190843666001907, "grad_norm": 0.02018222310624702, "learning_rate": 3.172413793103449e-06, "loss": 0.648489236831665, "step": 46, "token_acc": 0.8123889713576522 }, { "epoch": 0.03260209832654123, "grad_norm": 0.018881607115503886, "learning_rate": 3.2413793103448277e-06, "loss": 0.6482137441635132, "step": 47, "token_acc": 0.8111542931407221 }, { "epoch": 0.03329575999306338, "grad_norm": 0.018877252472627712, "learning_rate": 3.3103448275862073e-06, "loss": 0.6565637588500977, "step": 48, "token_acc": 0.8110030305992766 }, { "epoch": 0.03398942165958554, "grad_norm": 0.017942348445432882, "learning_rate": 3.3793103448275866e-06, "loss": 0.6420289278030396, "step": 49, "token_acc": 0.8116050176905758 }, { "epoch": 0.03468308332610769, "grad_norm": 0.018157066918151755, "learning_rate": 3.448275862068966e-06, "loss": 0.6173396706581116, "step": 50, "token_acc": 0.8192398035447362 }, { "epoch": 0.03537674499262985, "grad_norm": 0.01772915145474612, "learning_rate": 3.517241379310345e-06, "loss": 0.6004500389099121, "step": 51, "token_acc": 0.8236404461113305 }, { "epoch": 0.036070406659151996, "grad_norm": 0.016934618557755795, "learning_rate": 3.5862068965517243e-06, "loss": 0.6024036407470703, "step": 52, "token_acc": 0.825062375249501 }, { "epoch": 0.03676406832567415, "grad_norm": 0.015842381300972465, "learning_rate": 3.655172413793104e-06, "loss": 0.6160068511962891, "step": 53, "token_acc": 0.8224183014339785 }, { "epoch": 0.037457729992196306, "grad_norm": 0.016142159961798155, "learning_rate": 3.7241379310344832e-06, "loss": 0.6137673854827881, "step": 54, "token_acc": 0.8217016984141878 }, { "epoch": 0.03815139165871846, "grad_norm": 0.016012732792265744, "learning_rate": 3.793103448275862e-06, "loss": 0.609256386756897, "step": 55, "token_acc": 0.8211389086275759 }, { "epoch": 0.03884505332524062, "grad_norm": 0.016576612013096417, "learning_rate": 3.862068965517241e-06, "loss": 0.6299525499343872, "step": 56, "token_acc": 0.8168784499685103 }, { "epoch": 0.039538714991762765, "grad_norm": 0.01830584078375312, "learning_rate": 3.931034482758621e-06, "loss": 0.6330629587173462, "step": 57, "token_acc": 0.8147105901791708 }, { "epoch": 0.04023237665828492, "grad_norm": 0.016578446133597376, "learning_rate": 4.000000000000001e-06, "loss": 0.6150146722793579, "step": 58, "token_acc": 0.820306801872809 }, { "epoch": 0.040926038324807075, "grad_norm": 0.01569884080264784, "learning_rate": 4.0689655172413795e-06, "loss": 0.5982812643051147, "step": 59, "token_acc": 0.8262216589212787 }, { "epoch": 0.04161969999132923, "grad_norm": 0.016136679948085552, "learning_rate": 4.137931034482759e-06, "loss": 0.6229971647262573, "step": 60, "token_acc": 0.8165855168390852 }, { "epoch": 0.042313361657851385, "grad_norm": 0.01623331122752101, "learning_rate": 4.206896551724138e-06, "loss": 0.6000616550445557, "step": 61, "token_acc": 0.8226992571648251 }, { "epoch": 0.04300702332437353, "grad_norm": 0.016584374140497977, "learning_rate": 4.275862068965518e-06, "loss": 0.6001010537147522, "step": 62, "token_acc": 0.8232222805069962 }, { "epoch": 0.04370068499089569, "grad_norm": 0.0157814290644338, "learning_rate": 4.3448275862068965e-06, "loss": 0.6042478680610657, "step": 63, "token_acc": 0.8211784608112647 }, { "epoch": 0.044394346657417844, "grad_norm": 0.016414114505732155, "learning_rate": 4.413793103448276e-06, "loss": 0.6099900007247925, "step": 64, "token_acc": 0.8204395278045883 }, { "epoch": 0.04508800832394, "grad_norm": 0.014234981197856078, "learning_rate": 4.482758620689656e-06, "loss": 0.560578465461731, "step": 65, "token_acc": 0.8342567803515307 }, { "epoch": 0.045781669990462154, "grad_norm": 0.015234271440671387, "learning_rate": 4.551724137931035e-06, "loss": 0.6104633808135986, "step": 66, "token_acc": 0.8224189574375573 }, { "epoch": 0.04647533165698431, "grad_norm": 0.015242104053248582, "learning_rate": 4.620689655172414e-06, "loss": 0.6128778457641602, "step": 67, "token_acc": 0.8204900147051848 }, { "epoch": 0.04716899332350646, "grad_norm": 0.01859068604224133, "learning_rate": 4.689655172413793e-06, "loss": 0.6185278296470642, "step": 68, "token_acc": 0.8180870639403961 }, { "epoch": 0.04786265499002861, "grad_norm": 0.015591143481802128, "learning_rate": 4.758620689655173e-06, "loss": 0.6072246432304382, "step": 69, "token_acc": 0.8207720471792533 }, { "epoch": 0.04855631665655077, "grad_norm": 0.018314215573983033, "learning_rate": 4.8275862068965525e-06, "loss": 0.600108802318573, "step": 70, "token_acc": 0.8223739415668625 }, { "epoch": 0.04924997832307292, "grad_norm": 0.015225963908821314, "learning_rate": 4.896551724137931e-06, "loss": 0.5860385894775391, "step": 71, "token_acc": 0.8260890435617425 }, { "epoch": 0.04994363998959508, "grad_norm": 0.014438351664408024, "learning_rate": 4.965517241379311e-06, "loss": 0.6024948358535767, "step": 72, "token_acc": 0.8214920491978103 }, { "epoch": 0.050637301656117226, "grad_norm": 0.014825686700040053, "learning_rate": 5.03448275862069e-06, "loss": 0.5928671360015869, "step": 73, "token_acc": 0.8253988367525374 }, { "epoch": 0.05133096332263938, "grad_norm": 0.014231602273815742, "learning_rate": 5.1034482758620695e-06, "loss": 0.5919697880744934, "step": 74, "token_acc": 0.8256004142004871 }, { "epoch": 0.052024624989161536, "grad_norm": 0.016506683233429353, "learning_rate": 5.172413793103449e-06, "loss": 0.5765053033828735, "step": 75, "token_acc": 0.8284337410675686 }, { "epoch": 0.05271828665568369, "grad_norm": 0.014674437994144803, "learning_rate": 5.241379310344829e-06, "loss": 0.5857199430465698, "step": 76, "token_acc": 0.8257197314165411 }, { "epoch": 0.053411948322205846, "grad_norm": 0.01490796384135624, "learning_rate": 5.310344827586207e-06, "loss": 0.5952301025390625, "step": 77, "token_acc": 0.825292337403879 }, { "epoch": 0.054105609988728, "grad_norm": 0.022326247582322125, "learning_rate": 5.3793103448275865e-06, "loss": 0.588671863079071, "step": 78, "token_acc": 0.8262040795870296 }, { "epoch": 0.05479927165525015, "grad_norm": 0.015045222843786836, "learning_rate": 5.448275862068966e-06, "loss": 0.599948525428772, "step": 79, "token_acc": 0.8225045502442763 }, { "epoch": 0.055492933321772304, "grad_norm": 0.015675725435358494, "learning_rate": 5.517241379310345e-06, "loss": 0.6112521886825562, "step": 80, "token_acc": 0.8207593471175264 }, { "epoch": 0.05618659498829446, "grad_norm": 0.014765701928163252, "learning_rate": 5.586206896551725e-06, "loss": 0.5791158676147461, "step": 81, "token_acc": 0.8303918317066237 }, { "epoch": 0.056880256654816615, "grad_norm": 0.015280282221277969, "learning_rate": 5.655172413793104e-06, "loss": 0.5852774381637573, "step": 82, "token_acc": 0.8261926351317314 }, { "epoch": 0.05757391832133877, "grad_norm": 0.014500211854317173, "learning_rate": 5.724137931034483e-06, "loss": 0.5779550075531006, "step": 83, "token_acc": 0.8301994161017369 }, { "epoch": 0.05826757998786092, "grad_norm": 0.017051988575040793, "learning_rate": 5.793103448275863e-06, "loss": 0.6115071773529053, "step": 84, "token_acc": 0.8196408285596887 }, { "epoch": 0.05896124165438307, "grad_norm": 0.014831303837451583, "learning_rate": 5.862068965517242e-06, "loss": 0.5790849328041077, "step": 85, "token_acc": 0.8296567484376457 }, { "epoch": 0.05965490332090523, "grad_norm": 0.02377326603038526, "learning_rate": 5.9310344827586205e-06, "loss": 0.5914106369018555, "step": 86, "token_acc": 0.8244405777350547 }, { "epoch": 0.06034856498742738, "grad_norm": 0.015525032252390108, "learning_rate": 6e-06, "loss": 0.5857087969779968, "step": 87, "token_acc": 0.8257571122667482 }, { "epoch": 0.06104222665394954, "grad_norm": 0.014733585995455873, "learning_rate": 6.06896551724138e-06, "loss": 0.5570677518844604, "step": 88, "token_acc": 0.8347208146324985 }, { "epoch": 0.06173588832047169, "grad_norm": 0.01456018184274463, "learning_rate": 6.1379310344827595e-06, "loss": 0.5797313451766968, "step": 89, "token_acc": 0.8293581630385196 }, { "epoch": 0.06242954998699384, "grad_norm": 0.01466591236258203, "learning_rate": 6.206896551724138e-06, "loss": 0.5639247298240662, "step": 90, "token_acc": 0.833207438581346 }, { "epoch": 0.063123211653516, "grad_norm": 0.014574321647642738, "learning_rate": 6.275862068965518e-06, "loss": 0.5485864281654358, "step": 91, "token_acc": 0.8366429417378606 }, { "epoch": 0.06381687332003814, "grad_norm": 0.01532330953709679, "learning_rate": 6.344827586206898e-06, "loss": 0.5889104604721069, "step": 92, "token_acc": 0.8256909321891704 }, { "epoch": 0.0645105349865603, "grad_norm": 0.01568027198558858, "learning_rate": 6.413793103448276e-06, "loss": 0.5846375823020935, "step": 93, "token_acc": 0.82678943925349 }, { "epoch": 0.06520419665308246, "grad_norm": 0.014528693701179145, "learning_rate": 6.482758620689655e-06, "loss": 0.5643830299377441, "step": 94, "token_acc": 0.8330634389664425 }, { "epoch": 0.06589785831960461, "grad_norm": 0.014471469801909373, "learning_rate": 6.551724137931035e-06, "loss": 0.5512454509735107, "step": 95, "token_acc": 0.8363627948952319 }, { "epoch": 0.06659151998612677, "grad_norm": 0.014623840972436165, "learning_rate": 6.620689655172415e-06, "loss": 0.5689704418182373, "step": 96, "token_acc": 0.8302617981398553 }, { "epoch": 0.06728518165264892, "grad_norm": 0.01626453314165516, "learning_rate": 6.6896551724137935e-06, "loss": 0.586560070514679, "step": 97, "token_acc": 0.8262588586348377 }, { "epoch": 0.06797884331917108, "grad_norm": 0.014866483650778532, "learning_rate": 6.758620689655173e-06, "loss": 0.5817029476165771, "step": 98, "token_acc": 0.8270428982625684 }, { "epoch": 0.06867250498569323, "grad_norm": 0.015143057887257701, "learning_rate": 6.827586206896553e-06, "loss": 0.5679138898849487, "step": 99, "token_acc": 0.8301929571246166 }, { "epoch": 0.06936616665221539, "grad_norm": 0.014744519878404569, "learning_rate": 6.896551724137932e-06, "loss": 0.584690272808075, "step": 100, "token_acc": 0.8234322049120454 }, { "epoch": 0.07005982831873754, "grad_norm": 0.016170887475069125, "learning_rate": 6.9655172413793105e-06, "loss": 0.5746439099311829, "step": 101, "token_acc": 0.8308557509761078 }, { "epoch": 0.0707534899852597, "grad_norm": 0.014833392427749692, "learning_rate": 7.03448275862069e-06, "loss": 0.5576360821723938, "step": 102, "token_acc": 0.8338130945188765 }, { "epoch": 0.07144715165178184, "grad_norm": 0.014191401431503724, "learning_rate": 7.103448275862069e-06, "loss": 0.5503104329109192, "step": 103, "token_acc": 0.8371926361916846 }, { "epoch": 0.07214081331830399, "grad_norm": 0.015185345254095447, "learning_rate": 7.172413793103449e-06, "loss": 0.555490255355835, "step": 104, "token_acc": 0.8346358050468293 }, { "epoch": 0.07283447498482615, "grad_norm": 0.014437153469527635, "learning_rate": 7.241379310344828e-06, "loss": 0.573482871055603, "step": 105, "token_acc": 0.8304479783569959 }, { "epoch": 0.0735281366513483, "grad_norm": 0.014792016430559695, "learning_rate": 7.310344827586208e-06, "loss": 0.5661434531211853, "step": 106, "token_acc": 0.829783635550938 }, { "epoch": 0.07422179831787046, "grad_norm": 0.015536092778502004, "learning_rate": 7.379310344827587e-06, "loss": 0.5989271402359009, "step": 107, "token_acc": 0.8216430863750452 }, { "epoch": 0.07491545998439261, "grad_norm": 0.014883654436006945, "learning_rate": 7.4482758620689665e-06, "loss": 0.5775589942932129, "step": 108, "token_acc": 0.8277963026528711 }, { "epoch": 0.07560912165091477, "grad_norm": 0.014852678229778584, "learning_rate": 7.517241379310345e-06, "loss": 0.5865265130996704, "step": 109, "token_acc": 0.8266872480919304 }, { "epoch": 0.07630278331743692, "grad_norm": 0.016636504172056196, "learning_rate": 7.586206896551724e-06, "loss": 0.5872029662132263, "step": 110, "token_acc": 0.8274814724820799 }, { "epoch": 0.07699644498395908, "grad_norm": 0.015468695395158077, "learning_rate": 7.655172413793104e-06, "loss": 0.5733298659324646, "step": 111, "token_acc": 0.8281509759251278 }, { "epoch": 0.07769010665048123, "grad_norm": 0.014922057491545392, "learning_rate": 7.724137931034483e-06, "loss": 0.5727829337120056, "step": 112, "token_acc": 0.8299064306573694 }, { "epoch": 0.07838376831700339, "grad_norm": 0.014960686334983988, "learning_rate": 7.793103448275863e-06, "loss": 0.5382459163665771, "step": 113, "token_acc": 0.8382420560973839 }, { "epoch": 0.07907742998352553, "grad_norm": 0.014904856165396932, "learning_rate": 7.862068965517242e-06, "loss": 0.5520014762878418, "step": 114, "token_acc": 0.8345570649574285 }, { "epoch": 0.07977109165004768, "grad_norm": 0.01507547458366748, "learning_rate": 7.93103448275862e-06, "loss": 0.5655591487884521, "step": 115, "token_acc": 0.8328722636516719 }, { "epoch": 0.08046475331656984, "grad_norm": 0.015117959858449648, "learning_rate": 8.000000000000001e-06, "loss": 0.5893549919128418, "step": 116, "token_acc": 0.8250021053644687 }, { "epoch": 0.081158414983092, "grad_norm": 0.014584082147760195, "learning_rate": 8.06896551724138e-06, "loss": 0.553308367729187, "step": 117, "token_acc": 0.8349688510000468 }, { "epoch": 0.08185207664961415, "grad_norm": 0.01579113228052569, "learning_rate": 8.137931034482759e-06, "loss": 0.5593133568763733, "step": 118, "token_acc": 0.8335965353049554 }, { "epoch": 0.0825457383161363, "grad_norm": 0.015016977305422213, "learning_rate": 8.206896551724138e-06, "loss": 0.5513169765472412, "step": 119, "token_acc": 0.8355140186915888 }, { "epoch": 0.08323939998265846, "grad_norm": 0.015096572864980581, "learning_rate": 8.275862068965518e-06, "loss": 0.5526262521743774, "step": 120, "token_acc": 0.8352916491817037 }, { "epoch": 0.08393306164918062, "grad_norm": 0.014764546267677744, "learning_rate": 8.344827586206897e-06, "loss": 0.5388391017913818, "step": 121, "token_acc": 0.8378613135163827 }, { "epoch": 0.08462672331570277, "grad_norm": 0.016390283756232787, "learning_rate": 8.413793103448276e-06, "loss": 0.5827832818031311, "step": 122, "token_acc": 0.8267365008105749 }, { "epoch": 0.08532038498222493, "grad_norm": 0.015110119852602204, "learning_rate": 8.482758620689656e-06, "loss": 0.5672512054443359, "step": 123, "token_acc": 0.8301324586223302 }, { "epoch": 0.08601404664874707, "grad_norm": 0.015009010416107011, "learning_rate": 8.551724137931035e-06, "loss": 0.5732897520065308, "step": 124, "token_acc": 0.8295777018902409 }, { "epoch": 0.08670770831526922, "grad_norm": 0.01508619093541854, "learning_rate": 8.620689655172414e-06, "loss": 0.5674504041671753, "step": 125, "token_acc": 0.8302461336298392 }, { "epoch": 0.08740136998179138, "grad_norm": 0.01564635718501289, "learning_rate": 8.689655172413793e-06, "loss": 0.5822408199310303, "step": 126, "token_acc": 0.8259374760358886 }, { "epoch": 0.08809503164831353, "grad_norm": 0.03240731752148802, "learning_rate": 8.758620689655173e-06, "loss": 0.5788717269897461, "step": 127, "token_acc": 0.8276144305232009 }, { "epoch": 0.08878869331483569, "grad_norm": 0.014974096319736568, "learning_rate": 8.827586206896552e-06, "loss": 0.5563147068023682, "step": 128, "token_acc": 0.834416314549823 }, { "epoch": 0.08948235498135784, "grad_norm": 0.0146730820740775, "learning_rate": 8.896551724137931e-06, "loss": 0.5310776233673096, "step": 129, "token_acc": 0.8393698020381867 }, { "epoch": 0.09017601664788, "grad_norm": 0.015861544474004795, "learning_rate": 8.965517241379312e-06, "loss": 0.549136221408844, "step": 130, "token_acc": 0.836146289226185 }, { "epoch": 0.09086967831440215, "grad_norm": 0.014303141764787522, "learning_rate": 9.03448275862069e-06, "loss": 0.5449596047401428, "step": 131, "token_acc": 0.8371849986936798 }, { "epoch": 0.09156333998092431, "grad_norm": 0.015629460873208663, "learning_rate": 9.10344827586207e-06, "loss": 0.5642707347869873, "step": 132, "token_acc": 0.8310665526799824 }, { "epoch": 0.09225700164744646, "grad_norm": 0.02119338126586355, "learning_rate": 9.172413793103448e-06, "loss": 0.5683020949363708, "step": 133, "token_acc": 0.8306284058934571 }, { "epoch": 0.09295066331396862, "grad_norm": 0.015352571468338561, "learning_rate": 9.241379310344829e-06, "loss": 0.5644988417625427, "step": 134, "token_acc": 0.8314240723155149 }, { "epoch": 0.09364432498049076, "grad_norm": 0.01524164063718711, "learning_rate": 9.310344827586207e-06, "loss": 0.5565685033798218, "step": 135, "token_acc": 0.8335906996597743 }, { "epoch": 0.09433798664701291, "grad_norm": 0.015527519880016851, "learning_rate": 9.379310344827586e-06, "loss": 0.5415838956832886, "step": 136, "token_acc": 0.8383237874284057 }, { "epoch": 0.09503164831353507, "grad_norm": 0.015780654609999334, "learning_rate": 9.448275862068967e-06, "loss": 0.5441333651542664, "step": 137, "token_acc": 0.8378835438109837 }, { "epoch": 0.09572530998005722, "grad_norm": 0.015035763988213479, "learning_rate": 9.517241379310346e-06, "loss": 0.5384058952331543, "step": 138, "token_acc": 0.8384323852563873 }, { "epoch": 0.09641897164657938, "grad_norm": 0.014967442692600181, "learning_rate": 9.586206896551724e-06, "loss": 0.5410946607589722, "step": 139, "token_acc": 0.8361277776460265 }, { "epoch": 0.09711263331310153, "grad_norm": 0.016061459927873997, "learning_rate": 9.655172413793105e-06, "loss": 0.568100094795227, "step": 140, "token_acc": 0.8314595648578939 }, { "epoch": 0.09780629497962369, "grad_norm": 0.01613227644553098, "learning_rate": 9.724137931034484e-06, "loss": 0.5591145157814026, "step": 141, "token_acc": 0.8329316316988686 }, { "epoch": 0.09849995664614584, "grad_norm": 0.015292578303807631, "learning_rate": 9.793103448275863e-06, "loss": 0.5640596747398376, "step": 142, "token_acc": 0.8315775875230478 }, { "epoch": 0.099193618312668, "grad_norm": 0.016456173201267115, "learning_rate": 9.862068965517241e-06, "loss": 0.5832973122596741, "step": 143, "token_acc": 0.8258518388703903 }, { "epoch": 0.09988727997919015, "grad_norm": 0.01585154111414925, "learning_rate": 9.931034482758622e-06, "loss": 0.5731790065765381, "step": 144, "token_acc": 0.827411105400974 }, { "epoch": 0.10058094164571231, "grad_norm": 0.014859179084303275, "learning_rate": 1e-05, "loss": 0.530449628829956, "step": 145, "token_acc": 0.8405657114919091 }, { "epoch": 0.10127460331223445, "grad_norm": 0.016337370575117482, "learning_rate": 9.999985332382042e-06, "loss": 0.549363374710083, "step": 146, "token_acc": 0.8359442891702092 }, { "epoch": 0.1019682649787566, "grad_norm": 0.015048165499700098, "learning_rate": 9.99994132961422e-06, "loss": 0.5277963876724243, "step": 147, "token_acc": 0.8409129703235875 }, { "epoch": 0.10266192664527876, "grad_norm": 0.016356957135936746, "learning_rate": 9.999867991954703e-06, "loss": 0.5345621705055237, "step": 148, "token_acc": 0.8394555709574112 }, { "epoch": 0.10335558831180092, "grad_norm": 0.01607235565276681, "learning_rate": 9.999765319833765e-06, "loss": 0.5227524042129517, "step": 149, "token_acc": 0.843668307387956 }, { "epoch": 0.10404924997832307, "grad_norm": 0.017356621852177963, "learning_rate": 9.99963331385379e-06, "loss": 0.5751610398292542, "step": 150, "token_acc": 0.8261633465106064 }, { "epoch": 0.10474291164484523, "grad_norm": 0.01619498191117961, "learning_rate": 9.999471974789262e-06, "loss": 0.5422139167785645, "step": 151, "token_acc": 0.8368626479152469 }, { "epoch": 0.10543657331136738, "grad_norm": 0.014665501908450401, "learning_rate": 9.999281303586763e-06, "loss": 0.542507529258728, "step": 152, "token_acc": 0.8371078402627103 }, { "epoch": 0.10613023497788954, "grad_norm": 0.015699291463538524, "learning_rate": 9.999061301364974e-06, "loss": 0.5466927289962769, "step": 153, "token_acc": 0.8357308094270717 }, { "epoch": 0.10682389664441169, "grad_norm": 0.01574789536460029, "learning_rate": 9.998811969414654e-06, "loss": 0.5376659631729126, "step": 154, "token_acc": 0.8396335971939138 }, { "epoch": 0.10751755831093385, "grad_norm": 0.015951194784827087, "learning_rate": 9.99853330919865e-06, "loss": 0.537109375, "step": 155, "token_acc": 0.8363241092155201 }, { "epoch": 0.108211219977456, "grad_norm": 0.016072704204802838, "learning_rate": 9.998225322351871e-06, "loss": 0.5676643252372742, "step": 156, "token_acc": 0.8294109793545411 }, { "epoch": 0.10890488164397814, "grad_norm": 0.015219092637635444, "learning_rate": 9.997888010681292e-06, "loss": 0.5465761423110962, "step": 157, "token_acc": 0.8366449958794983 }, { "epoch": 0.1095985433105003, "grad_norm": 0.016228793014167214, "learning_rate": 9.997521376165935e-06, "loss": 0.5755155682563782, "step": 158, "token_acc": 0.8266715876237928 }, { "epoch": 0.11029220497702245, "grad_norm": 0.01489607323629833, "learning_rate": 9.997125420956865e-06, "loss": 0.5290955901145935, "step": 159, "token_acc": 0.8410274210343631 }, { "epoch": 0.11098586664354461, "grad_norm": 0.015070952506286235, "learning_rate": 9.996700147377167e-06, "loss": 0.5417481660842896, "step": 160, "token_acc": 0.8380707198946725 }, { "epoch": 0.11167952831006676, "grad_norm": 0.015251851339296057, "learning_rate": 9.996245557921941e-06, "loss": 0.5658220052719116, "step": 161, "token_acc": 0.8311436649929314 }, { "epoch": 0.11237318997658892, "grad_norm": 0.015366464223146999, "learning_rate": 9.995761655258288e-06, "loss": 0.5343703031539917, "step": 162, "token_acc": 0.8384410311598102 }, { "epoch": 0.11306685164311107, "grad_norm": 0.015763737132443945, "learning_rate": 9.995248442225285e-06, "loss": 0.5657963752746582, "step": 163, "token_acc": 0.8312839300180154 }, { "epoch": 0.11376051330963323, "grad_norm": 0.01587890017511663, "learning_rate": 9.994705921833978e-06, "loss": 0.5514758229255676, "step": 164, "token_acc": 0.8364231840403533 }, { "epoch": 0.11445417497615538, "grad_norm": 0.015769956691642623, "learning_rate": 9.994134097267358e-06, "loss": 0.5668140649795532, "step": 165, "token_acc": 0.8299224418313735 }, { "epoch": 0.11514783664267754, "grad_norm": 0.015476119160444033, "learning_rate": 9.99353297188035e-06, "loss": 0.53995680809021, "step": 166, "token_acc": 0.8388271413951496 }, { "epoch": 0.1158414983091997, "grad_norm": 0.01508829174608977, "learning_rate": 9.992902549199782e-06, "loss": 0.5530000329017639, "step": 167, "token_acc": 0.8352409967591975 }, { "epoch": 0.11653515997572184, "grad_norm": 0.015746784741714894, "learning_rate": 9.992242832924376e-06, "loss": 0.5586972236633301, "step": 168, "token_acc": 0.8312006887360985 }, { "epoch": 0.11722882164224399, "grad_norm": 0.015102111170711613, "learning_rate": 9.991553826924716e-06, "loss": 0.5411838889122009, "step": 169, "token_acc": 0.8362826336544701 }, { "epoch": 0.11792248330876615, "grad_norm": 0.015784157183655133, "learning_rate": 9.990835535243235e-06, "loss": 0.5581400394439697, "step": 170, "token_acc": 0.8308193496834471 }, { "epoch": 0.1186161449752883, "grad_norm": 0.015958685414021807, "learning_rate": 9.990087962094185e-06, "loss": 0.5499991774559021, "step": 171, "token_acc": 0.8332892699323942 }, { "epoch": 0.11930980664181046, "grad_norm": 0.015508985488036498, "learning_rate": 9.989311111863609e-06, "loss": 0.5577118396759033, "step": 172, "token_acc": 0.8340525417298937 }, { "epoch": 0.12000346830833261, "grad_norm": 0.015632619505918637, "learning_rate": 9.988504989109326e-06, "loss": 0.5416381359100342, "step": 173, "token_acc": 0.8387505123815494 }, { "epoch": 0.12069712997485477, "grad_norm": 0.01463256979909443, "learning_rate": 9.987669598560897e-06, "loss": 0.5320215225219727, "step": 174, "token_acc": 0.8407282456956724 }, { "epoch": 0.12139079164137692, "grad_norm": 0.014889145204067904, "learning_rate": 9.986804945119596e-06, "loss": 0.5378844738006592, "step": 175, "token_acc": 0.8398543075548957 }, { "epoch": 0.12208445330789908, "grad_norm": 0.014850521067158975, "learning_rate": 9.98591103385839e-06, "loss": 0.5355537533760071, "step": 176, "token_acc": 0.8394754885155982 }, { "epoch": 0.12277811497442123, "grad_norm": 0.015357846344308729, "learning_rate": 9.984987870021892e-06, "loss": 0.5337123274803162, "step": 177, "token_acc": 0.8392483843608519 }, { "epoch": 0.12347177664094337, "grad_norm": 0.015882722672595977, "learning_rate": 9.984035459026352e-06, "loss": 0.5610445737838745, "step": 178, "token_acc": 0.832547575199509 }, { "epoch": 0.12416543830746553, "grad_norm": 0.015078755555245302, "learning_rate": 9.983053806459609e-06, "loss": 0.5338558554649353, "step": 179, "token_acc": 0.8384972291772971 }, { "epoch": 0.12485909997398768, "grad_norm": 0.01658926630607117, "learning_rate": 9.982042918081065e-06, "loss": 0.5516771674156189, "step": 180, "token_acc": 0.8349019465339099 }, { "epoch": 0.12555276164050985, "grad_norm": 0.014731468703923159, "learning_rate": 9.98100279982165e-06, "loss": 0.5484277606010437, "step": 181, "token_acc": 0.8359757223299178 }, { "epoch": 0.126246423307032, "grad_norm": 0.015158286039848135, "learning_rate": 9.979933457783789e-06, "loss": 0.5482351779937744, "step": 182, "token_acc": 0.8346498796879177 }, { "epoch": 0.12694008497355416, "grad_norm": 0.016366850771717303, "learning_rate": 9.978834898241359e-06, "loss": 0.553917646408081, "step": 183, "token_acc": 0.834047690914579 }, { "epoch": 0.1276337466400763, "grad_norm": 0.015837160118860633, "learning_rate": 9.97770712763966e-06, "loss": 0.5446297526359558, "step": 184, "token_acc": 0.8366695749719556 }, { "epoch": 0.12832740830659844, "grad_norm": 0.033598260370081186, "learning_rate": 9.97655015259538e-06, "loss": 0.5209952592849731, "step": 185, "token_acc": 0.8429542331546566 }, { "epoch": 0.1290210699731206, "grad_norm": 0.014633440478824247, "learning_rate": 9.975363979896541e-06, "loss": 0.5271025896072388, "step": 186, "token_acc": 0.8421776958783327 }, { "epoch": 0.12971473163964276, "grad_norm": 0.01545763336557415, "learning_rate": 9.974148616502478e-06, "loss": 0.5591514706611633, "step": 187, "token_acc": 0.8331176441861601 }, { "epoch": 0.1304083933061649, "grad_norm": 0.016709476748495013, "learning_rate": 9.972904069543783e-06, "loss": 0.560494601726532, "step": 188, "token_acc": 0.8298843180004051 }, { "epoch": 0.13110205497268707, "grad_norm": 0.015717023495956026, "learning_rate": 9.971630346322272e-06, "loss": 0.540519118309021, "step": 189, "token_acc": 0.8367073823543725 }, { "epoch": 0.13179571663920922, "grad_norm": 0.016039584566142445, "learning_rate": 9.970327454310941e-06, "loss": 0.5602277517318726, "step": 190, "token_acc": 0.8313256106644749 }, { "epoch": 0.13248937830573138, "grad_norm": 0.015514388275666037, "learning_rate": 9.968995401153916e-06, "loss": 0.526347279548645, "step": 191, "token_acc": 0.8399936952884406 }, { "epoch": 0.13318303997225353, "grad_norm": 0.016064635502262052, "learning_rate": 9.96763419466642e-06, "loss": 0.5606065392494202, "step": 192, "token_acc": 0.8322969940027227 }, { "epoch": 0.13387670163877569, "grad_norm": 0.015826240981113893, "learning_rate": 9.966243842834712e-06, "loss": 0.5611197352409363, "step": 193, "token_acc": 0.8334019519992251 }, { "epoch": 0.13457036330529784, "grad_norm": 0.015364519088654712, "learning_rate": 9.964824353816052e-06, "loss": 0.5385665893554688, "step": 194, "token_acc": 0.8394708528951487 }, { "epoch": 0.13526402497182, "grad_norm": 0.015655599376072433, "learning_rate": 9.963375735938652e-06, "loss": 0.5383186340332031, "step": 195, "token_acc": 0.836818761771675 }, { "epoch": 0.13595768663834215, "grad_norm": 0.015568536303406814, "learning_rate": 9.961897997701617e-06, "loss": 0.5532296299934387, "step": 196, "token_acc": 0.8315164542709453 }, { "epoch": 0.1366513483048643, "grad_norm": 0.01556102124183489, "learning_rate": 9.960391147774912e-06, "loss": 0.5429196357727051, "step": 197, "token_acc": 0.8370219420880527 }, { "epoch": 0.13734500997138646, "grad_norm": 0.016565993670870464, "learning_rate": 9.958855194999293e-06, "loss": 0.5403622388839722, "step": 198, "token_acc": 0.8378231670369622 }, { "epoch": 0.13803867163790862, "grad_norm": 0.015196391400369127, "learning_rate": 9.957290148386267e-06, "loss": 0.5283676385879517, "step": 199, "token_acc": 0.841893264664887 }, { "epoch": 0.13873233330443077, "grad_norm": 0.014966134263582115, "learning_rate": 9.955696017118039e-06, "loss": 0.5443913340568542, "step": 200, "token_acc": 0.8350258154501187 }, { "epoch": 0.13942599497095293, "grad_norm": 0.021230159598864085, "learning_rate": 9.95407281054745e-06, "loss": 0.5536177158355713, "step": 201, "token_acc": 0.8349350184809825 }, { "epoch": 0.14011965663747508, "grad_norm": 0.014422299695971951, "learning_rate": 9.952420538197932e-06, "loss": 0.522667646408081, "step": 202, "token_acc": 0.8421149301431423 }, { "epoch": 0.14081331830399724, "grad_norm": 0.015697934290558782, "learning_rate": 9.950739209763445e-06, "loss": 0.5595871210098267, "step": 203, "token_acc": 0.8308972175596369 }, { "epoch": 0.1415069799705194, "grad_norm": 0.015414153339377654, "learning_rate": 9.949028835108418e-06, "loss": 0.5476477742195129, "step": 204, "token_acc": 0.8332333092459 }, { "epoch": 0.14220064163704152, "grad_norm": 0.018011163477907845, "learning_rate": 9.947289424267702e-06, "loss": 0.5385111570358276, "step": 205, "token_acc": 0.8374781741082564 }, { "epoch": 0.14289430330356367, "grad_norm": 0.015209917826985232, "learning_rate": 9.945520987446507e-06, "loss": 0.5437875986099243, "step": 206, "token_acc": 0.8374889436064163 }, { "epoch": 0.14358796497008583, "grad_norm": 0.014994492275939521, "learning_rate": 9.943723535020327e-06, "loss": 0.5322327613830566, "step": 207, "token_acc": 0.8388524965919921 }, { "epoch": 0.14428162663660798, "grad_norm": 0.014868020852867765, "learning_rate": 9.941897077534906e-06, "loss": 0.5320675373077393, "step": 208, "token_acc": 0.8380426960901894 }, { "epoch": 0.14497528830313014, "grad_norm": 0.014956106875764978, "learning_rate": 9.940041625706156e-06, "loss": 0.5243188738822937, "step": 209, "token_acc": 0.8415488304479979 }, { "epoch": 0.1456689499696523, "grad_norm": 0.014650245693570024, "learning_rate": 9.938157190420098e-06, "loss": 0.5145503282546997, "step": 210, "token_acc": 0.8422839031659117 }, { "epoch": 0.14636261163617445, "grad_norm": 0.014865887507856525, "learning_rate": 9.936243782732803e-06, "loss": 0.5067030787467957, "step": 211, "token_acc": 0.8469028974624856 }, { "epoch": 0.1470562733026966, "grad_norm": 0.015394917999194737, "learning_rate": 9.934301413870327e-06, "loss": 0.5352437496185303, "step": 212, "token_acc": 0.837682124292628 }, { "epoch": 0.14774993496921876, "grad_norm": 0.015143119009222952, "learning_rate": 9.932330095228636e-06, "loss": 0.5304079055786133, "step": 213, "token_acc": 0.8390344522751271 }, { "epoch": 0.14844359663574092, "grad_norm": 0.015466996250930223, "learning_rate": 9.930329838373553e-06, "loss": 0.5333006381988525, "step": 214, "token_acc": 0.8389432999362078 }, { "epoch": 0.14913725830226307, "grad_norm": 0.016032312061321313, "learning_rate": 9.928300655040676e-06, "loss": 0.5604925155639648, "step": 215, "token_acc": 0.8318134509590127 }, { "epoch": 0.14983091996878523, "grad_norm": 0.014840488637149511, "learning_rate": 9.926242557135323e-06, "loss": 0.5322625041007996, "step": 216, "token_acc": 0.8393333805777998 }, { "epoch": 0.15052458163530738, "grad_norm": 0.014467602388929872, "learning_rate": 9.924155556732448e-06, "loss": 0.529295027256012, "step": 217, "token_acc": 0.8405948041427993 }, { "epoch": 0.15121824330182954, "grad_norm": 0.014983116654888067, "learning_rate": 9.922039666076583e-06, "loss": 0.5448846817016602, "step": 218, "token_acc": 0.8358387365523237 }, { "epoch": 0.1519119049683517, "grad_norm": 0.01553214874492577, "learning_rate": 9.919894897581758e-06, "loss": 0.5549994707107544, "step": 219, "token_acc": 0.8323089223696375 }, { "epoch": 0.15260556663487385, "grad_norm": 0.015865978230439698, "learning_rate": 9.91772126383143e-06, "loss": 0.5434461832046509, "step": 220, "token_acc": 0.835457359907496 }, { "epoch": 0.153299228301396, "grad_norm": 0.01427205348389222, "learning_rate": 9.915518777578412e-06, "loss": 0.5023703575134277, "step": 221, "token_acc": 0.8484948350511949 }, { "epoch": 0.15399288996791816, "grad_norm": 0.01539404968658148, "learning_rate": 9.913287451744793e-06, "loss": 0.5380567312240601, "step": 222, "token_acc": 0.8372376102347534 }, { "epoch": 0.1546865516344403, "grad_norm": 0.01502703426770042, "learning_rate": 9.911027299421869e-06, "loss": 0.5324046611785889, "step": 223, "token_acc": 0.8388153675741853 }, { "epoch": 0.15538021330096247, "grad_norm": 0.01665047767734628, "learning_rate": 9.90873833387006e-06, "loss": 0.5314779877662659, "step": 224, "token_acc": 0.8391528403207552 }, { "epoch": 0.15607387496748462, "grad_norm": 0.014597452415623728, "learning_rate": 9.906420568518833e-06, "loss": 0.5262778997421265, "step": 225, "token_acc": 0.8422722327206092 }, { "epoch": 0.15676753663400678, "grad_norm": 0.015414445235474968, "learning_rate": 9.90407401696663e-06, "loss": 0.5316914916038513, "step": 226, "token_acc": 0.8366774821544452 }, { "epoch": 0.1574611983005289, "grad_norm": 0.01546085592710086, "learning_rate": 9.901698692980776e-06, "loss": 0.5488492250442505, "step": 227, "token_acc": 0.8336599829137142 }, { "epoch": 0.15815485996705106, "grad_norm": 0.01679062160402724, "learning_rate": 9.899294610497412e-06, "loss": 0.5540822744369507, "step": 228, "token_acc": 0.8315783081314143 }, { "epoch": 0.15884852163357321, "grad_norm": 0.01563626475647191, "learning_rate": 9.896861783621401e-06, "loss": 0.5577372908592224, "step": 229, "token_acc": 0.8315223651372624 }, { "epoch": 0.15954218330009537, "grad_norm": 0.01529653664196656, "learning_rate": 9.894400226626253e-06, "loss": 0.5432524681091309, "step": 230, "token_acc": 0.8354719338715878 }, { "epoch": 0.16023584496661752, "grad_norm": 0.01551723980928376, "learning_rate": 9.891909953954042e-06, "loss": 0.5353432893753052, "step": 231, "token_acc": 0.8370151939224311 }, { "epoch": 0.16092950663313968, "grad_norm": 0.015280655905617887, "learning_rate": 9.889390980215312e-06, "loss": 0.5582616329193115, "step": 232, "token_acc": 0.832537380607091 }, { "epoch": 0.16162316829966183, "grad_norm": 0.016526250916236208, "learning_rate": 9.886843320189004e-06, "loss": 0.546466588973999, "step": 233, "token_acc": 0.8351272077243602 }, { "epoch": 0.162316829966184, "grad_norm": 0.015594698171938036, "learning_rate": 9.884266988822356e-06, "loss": 0.5056731700897217, "step": 234, "token_acc": 0.8493041879291022 }, { "epoch": 0.16301049163270614, "grad_norm": 0.014555895760162061, "learning_rate": 9.881662001230827e-06, "loss": 0.5184738636016846, "step": 235, "token_acc": 0.843655834221872 }, { "epoch": 0.1637041532992283, "grad_norm": 0.01627055000394062, "learning_rate": 9.879028372698005e-06, "loss": 0.5459529161453247, "step": 236, "token_acc": 0.8328477340058272 }, { "epoch": 0.16439781496575046, "grad_norm": 0.014947641797689705, "learning_rate": 9.876366118675507e-06, "loss": 0.524900496006012, "step": 237, "token_acc": 0.839129900454713 }, { "epoch": 0.1650914766322726, "grad_norm": 0.01482641548808544, "learning_rate": 9.873675254782908e-06, "loss": 0.533475935459137, "step": 238, "token_acc": 0.8392091982479528 }, { "epoch": 0.16578513829879477, "grad_norm": 0.015157416479391667, "learning_rate": 9.870955796807633e-06, "loss": 0.5359286069869995, "step": 239, "token_acc": 0.8378997310659887 }, { "epoch": 0.16647879996531692, "grad_norm": 0.0173374341836886, "learning_rate": 9.868207760704868e-06, "loss": 0.5082807540893555, "step": 240, "token_acc": 0.8460027361700635 }, { "epoch": 0.16717246163183908, "grad_norm": 0.01668189406155227, "learning_rate": 9.865431162597471e-06, "loss": 0.5606524348258972, "step": 241, "token_acc": 0.8302631238519055 }, { "epoch": 0.16786612329836123, "grad_norm": 0.01636696537626821, "learning_rate": 9.862626018775877e-06, "loss": 0.5421234369277954, "step": 242, "token_acc": 0.8350762332980932 }, { "epoch": 0.16855978496488339, "grad_norm": 0.014584890340229816, "learning_rate": 9.859792345697991e-06, "loss": 0.5082694888114929, "step": 243, "token_acc": 0.8476263051340961 }, { "epoch": 0.16925344663140554, "grad_norm": 0.015220704282106184, "learning_rate": 9.856930159989112e-06, "loss": 0.5250012278556824, "step": 244, "token_acc": 0.8417700260005225 }, { "epoch": 0.1699471082979277, "grad_norm": 0.01691322554271674, "learning_rate": 9.854039478441818e-06, "loss": 0.545636773109436, "step": 245, "token_acc": 0.8349983963127071 }, { "epoch": 0.17064076996444985, "grad_norm": 0.014827088313243378, "learning_rate": 9.851120318015872e-06, "loss": 0.518938422203064, "step": 246, "token_acc": 0.8434108348503353 }, { "epoch": 0.171334431630972, "grad_norm": 0.026700824281608648, "learning_rate": 9.848172695838127e-06, "loss": 0.5300254225730896, "step": 247, "token_acc": 0.8386072760253955 }, { "epoch": 0.17202809329749413, "grad_norm": 0.015256381581108256, "learning_rate": 9.845196629202422e-06, "loss": 0.5424628257751465, "step": 248, "token_acc": 0.8356689690757919 }, { "epoch": 0.1727217549640163, "grad_norm": 0.016060370379735418, "learning_rate": 9.84219213556948e-06, "loss": 0.5340221524238586, "step": 249, "token_acc": 0.8385717340175326 }, { "epoch": 0.17341541663053844, "grad_norm": 0.021904790942270394, "learning_rate": 9.839159232566806e-06, "loss": 0.5643888711929321, "step": 250, "token_acc": 0.8311936629336247 }, { "epoch": 0.1741090782970606, "grad_norm": 0.014878279039379499, "learning_rate": 9.836097937988584e-06, "loss": 0.5205628275871277, "step": 251, "token_acc": 0.8421450775977212 }, { "epoch": 0.17480273996358275, "grad_norm": 0.015417352594571497, "learning_rate": 9.833008269795578e-06, "loss": 0.538686990737915, "step": 252, "token_acc": 0.8371083929607426 }, { "epoch": 0.1754964016301049, "grad_norm": 0.015713963667316862, "learning_rate": 9.829890246115014e-06, "loss": 0.5252983570098877, "step": 253, "token_acc": 0.8414482489074178 }, { "epoch": 0.17619006329662706, "grad_norm": 0.015219867929408246, "learning_rate": 9.826743885240484e-06, "loss": 0.5080589056015015, "step": 254, "token_acc": 0.8451488272141191 }, { "epoch": 0.17688372496314922, "grad_norm": 0.016328958439011363, "learning_rate": 9.823569205631836e-06, "loss": 0.5332421064376831, "step": 255, "token_acc": 0.8377756209247922 }, { "epoch": 0.17757738662967137, "grad_norm": 0.017290399518106014, "learning_rate": 9.820366225915066e-06, "loss": 0.5292096138000488, "step": 256, "token_acc": 0.8388969703049759 }, { "epoch": 0.17827104829619353, "grad_norm": 0.014824608417857632, "learning_rate": 9.817134964882207e-06, "loss": 0.501818060874939, "step": 257, "token_acc": 0.8466316123795744 }, { "epoch": 0.17896470996271568, "grad_norm": 0.01592194918523882, "learning_rate": 9.813875441491218e-06, "loss": 0.5125102996826172, "step": 258, "token_acc": 0.8444196883123106 }, { "epoch": 0.17965837162923784, "grad_norm": 0.01647626053216573, "learning_rate": 9.810587674865879e-06, "loss": 0.5193585157394409, "step": 259, "token_acc": 0.844352908041121 }, { "epoch": 0.18035203329576, "grad_norm": 0.014896249586589038, "learning_rate": 9.807271684295669e-06, "loss": 0.5071588158607483, "step": 260, "token_acc": 0.845249014032835 }, { "epoch": 0.18104569496228215, "grad_norm": 0.015321531807004102, "learning_rate": 9.803927489235664e-06, "loss": 0.5360854864120483, "step": 261, "token_acc": 0.838599007829777 }, { "epoch": 0.1817393566288043, "grad_norm": 0.01585763296596539, "learning_rate": 9.800555109306416e-06, "loss": 0.5295257568359375, "step": 262, "token_acc": 0.8393081800968345 }, { "epoch": 0.18243301829532646, "grad_norm": 0.014880900260538336, "learning_rate": 9.797154564293831e-06, "loss": 0.5137016177177429, "step": 263, "token_acc": 0.8444305927818987 }, { "epoch": 0.18312667996184862, "grad_norm": 0.016868179462566704, "learning_rate": 9.793725874149071e-06, "loss": 0.5361820459365845, "step": 264, "token_acc": 0.837755525271079 }, { "epoch": 0.18382034162837077, "grad_norm": 0.016402525811100785, "learning_rate": 9.790269058988423e-06, "loss": 0.5364961624145508, "step": 265, "token_acc": 0.8370147464632213 }, { "epoch": 0.18451400329489293, "grad_norm": 0.015542755330546462, "learning_rate": 9.786784139093183e-06, "loss": 0.5535317659378052, "step": 266, "token_acc": 0.8324870505983517 }, { "epoch": 0.18520766496141508, "grad_norm": 0.015442063976340296, "learning_rate": 9.783271134909542e-06, "loss": 0.5325021743774414, "step": 267, "token_acc": 0.8387508285703565 }, { "epoch": 0.18590132662793724, "grad_norm": 0.014948116542735231, "learning_rate": 9.779730067048461e-06, "loss": 0.5177863836288452, "step": 268, "token_acc": 0.842761994087547 }, { "epoch": 0.1865949882944594, "grad_norm": 0.0147808735685992, "learning_rate": 9.77616095628555e-06, "loss": 0.4920388460159302, "step": 269, "token_acc": 0.850644073080306 }, { "epoch": 0.18728864996098152, "grad_norm": 0.015423884573997258, "learning_rate": 9.772563823560955e-06, "loss": 0.5306118130683899, "step": 270, "token_acc": 0.8374951067035196 }, { "epoch": 0.18798231162750367, "grad_norm": 0.01794595073426242, "learning_rate": 9.768938689979218e-06, "loss": 0.5213645696640015, "step": 271, "token_acc": 0.8416690482995142 }, { "epoch": 0.18867597329402583, "grad_norm": 0.0486225635400733, "learning_rate": 9.765285576809173e-06, "loss": 0.5155547857284546, "step": 272, "token_acc": 0.8421971289139031 }, { "epoch": 0.18936963496054798, "grad_norm": 0.01563788069131224, "learning_rate": 9.761604505483804e-06, "loss": 0.532556414604187, "step": 273, "token_acc": 0.8367973428614564 }, { "epoch": 0.19006329662707014, "grad_norm": 0.015428679435089155, "learning_rate": 9.757895497600133e-06, "loss": 0.5450507402420044, "step": 274, "token_acc": 0.8335554773161654 }, { "epoch": 0.1907569582935923, "grad_norm": 0.014481911595066796, "learning_rate": 9.754158574919082e-06, "loss": 0.510329008102417, "step": 275, "token_acc": 0.8451294068514287 }, { "epoch": 0.19145061996011445, "grad_norm": 0.015413931503630122, "learning_rate": 9.750393759365356e-06, "loss": 0.5252354145050049, "step": 276, "token_acc": 0.8397171258590369 }, { "epoch": 0.1921442816266366, "grad_norm": 0.015406264886534385, "learning_rate": 9.746601073027302e-06, "loss": 0.5111818313598633, "step": 277, "token_acc": 0.8437108181475125 }, { "epoch": 0.19283794329315876, "grad_norm": 0.014834751768309644, "learning_rate": 9.74278053815679e-06, "loss": 0.5176700353622437, "step": 278, "token_acc": 0.8434807824752607 }, { "epoch": 0.19353160495968091, "grad_norm": 0.01565018142281663, "learning_rate": 9.73893217716908e-06, "loss": 0.5456033945083618, "step": 279, "token_acc": 0.8334501026340696 }, { "epoch": 0.19422526662620307, "grad_norm": 0.014902425721450056, "learning_rate": 9.735056012642687e-06, "loss": 0.5211932063102722, "step": 280, "token_acc": 0.8423094943476015 }, { "epoch": 0.19491892829272522, "grad_norm": 0.015330182940862764, "learning_rate": 9.731152067319251e-06, "loss": 0.5376917123794556, "step": 281, "token_acc": 0.8381632012364462 }, { "epoch": 0.19561258995924738, "grad_norm": 0.015178386354236935, "learning_rate": 9.727220364103403e-06, "loss": 0.509064257144928, "step": 282, "token_acc": 0.8439874379495492 }, { "epoch": 0.19630625162576953, "grad_norm": 0.015400876206088267, "learning_rate": 9.723260926062632e-06, "loss": 0.5252615809440613, "step": 283, "token_acc": 0.8400122925629994 }, { "epoch": 0.1969999132922917, "grad_norm": 0.015876141906468747, "learning_rate": 9.719273776427147e-06, "loss": 0.5600495338439941, "step": 284, "token_acc": 0.8287693914461115 }, { "epoch": 0.19769357495881384, "grad_norm": 0.015937649217541667, "learning_rate": 9.715258938589743e-06, "loss": 0.5316158533096313, "step": 285, "token_acc": 0.8384752299021062 }, { "epoch": 0.198387236625336, "grad_norm": 0.01564360285233777, "learning_rate": 9.711216436105663e-06, "loss": 0.527535617351532, "step": 286, "token_acc": 0.8395043819885162 }, { "epoch": 0.19908089829185815, "grad_norm": 0.0159130931961062, "learning_rate": 9.707146292692463e-06, "loss": 0.5338237881660461, "step": 287, "token_acc": 0.8374134552580013 }, { "epoch": 0.1997745599583803, "grad_norm": 0.015376336422626428, "learning_rate": 9.703048532229862e-06, "loss": 0.5318648815155029, "step": 288, "token_acc": 0.8377332055158174 }, { "epoch": 0.20046822162490247, "grad_norm": 0.015217000473703076, "learning_rate": 9.698923178759616e-06, "loss": 0.510287880897522, "step": 289, "token_acc": 0.8429922839874824 }, { "epoch": 0.20116188329142462, "grad_norm": 0.015274123873941333, "learning_rate": 9.694770256485368e-06, "loss": 0.5122607946395874, "step": 290, "token_acc": 0.8448572536474238 }, { "epoch": 0.20185554495794678, "grad_norm": 0.015516080801325171, "learning_rate": 9.69058978977251e-06, "loss": 0.5056437253952026, "step": 291, "token_acc": 0.8466564942401547 }, { "epoch": 0.2025492066244689, "grad_norm": 0.015538107082463188, "learning_rate": 9.686381803148037e-06, "loss": 0.5067158341407776, "step": 292, "token_acc": 0.8461829310079992 }, { "epoch": 0.20324286829099106, "grad_norm": 0.01569727808847001, "learning_rate": 9.682146321300403e-06, "loss": 0.5152480602264404, "step": 293, "token_acc": 0.843454939564275 }, { "epoch": 0.2039365299575132, "grad_norm": 0.014748313420319311, "learning_rate": 9.677883369079383e-06, "loss": 0.5109955072402954, "step": 294, "token_acc": 0.8447273960269857 }, { "epoch": 0.20463019162403537, "grad_norm": 0.016985049011560078, "learning_rate": 9.673592971495916e-06, "loss": 0.5457242727279663, "step": 295, "token_acc": 0.8352385045122476 }, { "epoch": 0.20532385329055752, "grad_norm": 0.016091360350159317, "learning_rate": 9.66927515372197e-06, "loss": 0.5254797339439392, "step": 296, "token_acc": 0.8421007581315725 }, { "epoch": 0.20601751495707968, "grad_norm": 0.015323135303011815, "learning_rate": 9.664929941090381e-06, "loss": 0.5365530848503113, "step": 297, "token_acc": 0.8377375544085817 }, { "epoch": 0.20671117662360183, "grad_norm": 0.014454184551548548, "learning_rate": 9.660557359094723e-06, "loss": 0.505844235420227, "step": 298, "token_acc": 0.8455611390284757 }, { "epoch": 0.207404838290124, "grad_norm": 0.0152945411406488, "learning_rate": 9.656157433389135e-06, "loss": 0.5247935056686401, "step": 299, "token_acc": 0.83980588399027 }, { "epoch": 0.20809849995664614, "grad_norm": 0.014786416166733596, "learning_rate": 9.651730189788193e-06, "loss": 0.5098066329956055, "step": 300, "token_acc": 0.8463055630691552 }, { "epoch": 0.2087921616231683, "grad_norm": 0.01551818015455684, "learning_rate": 9.64727565426674e-06, "loss": 0.5791878700256348, "step": 301, "token_acc": 0.8266379664788256 }, { "epoch": 0.20948582328969045, "grad_norm": 0.015188715885544985, "learning_rate": 9.642793852959751e-06, "loss": 0.5115097761154175, "step": 302, "token_acc": 0.8446616664139323 }, { "epoch": 0.2101794849562126, "grad_norm": 0.016198805758335433, "learning_rate": 9.638284812162161e-06, "loss": 0.5664098858833313, "step": 303, "token_acc": 0.8301726218451103 }, { "epoch": 0.21087314662273476, "grad_norm": 0.014547202073788226, "learning_rate": 9.63374855832873e-06, "loss": 0.5140308737754822, "step": 304, "token_acc": 0.8443267905285887 }, { "epoch": 0.21156680828925692, "grad_norm": 0.015302713505284341, "learning_rate": 9.629185118073869e-06, "loss": 0.5288872718811035, "step": 305, "token_acc": 0.8396924466471358 }, { "epoch": 0.21226046995577907, "grad_norm": 0.01531091838548529, "learning_rate": 9.624594518171497e-06, "loss": 0.5370383262634277, "step": 306, "token_acc": 0.8371995665878644 }, { "epoch": 0.21295413162230123, "grad_norm": 0.01537291254152366, "learning_rate": 9.619976785554883e-06, "loss": 0.504914402961731, "step": 307, "token_acc": 0.8458787728085444 }, { "epoch": 0.21364779328882338, "grad_norm": 0.014958837817565452, "learning_rate": 9.615331947316482e-06, "loss": 0.5397136211395264, "step": 308, "token_acc": 0.835873850197109 }, { "epoch": 0.21434145495534554, "grad_norm": 0.01573174079349604, "learning_rate": 9.610660030707777e-06, "loss": 0.529837429523468, "step": 309, "token_acc": 0.8383763128169387 }, { "epoch": 0.2150351166218677, "grad_norm": 0.016854143274524776, "learning_rate": 9.605961063139126e-06, "loss": 0.5255622267723083, "step": 310, "token_acc": 0.8396826204160521 }, { "epoch": 0.21572877828838985, "grad_norm": 0.015545505400938225, "learning_rate": 9.60123507217959e-06, "loss": 0.53956139087677, "step": 311, "token_acc": 0.8358609243646947 }, { "epoch": 0.216422439954912, "grad_norm": 0.015544636331586847, "learning_rate": 9.596482085556785e-06, "loss": 0.531800389289856, "step": 312, "token_acc": 0.8387927029843438 }, { "epoch": 0.21711610162143413, "grad_norm": 0.014087393934559118, "learning_rate": 9.591702131156703e-06, "loss": 0.5243149995803833, "step": 313, "token_acc": 0.8416355069863283 }, { "epoch": 0.2178097632879563, "grad_norm": 0.01493318020165802, "learning_rate": 9.586895237023565e-06, "loss": 0.5200930833816528, "step": 314, "token_acc": 0.840651773853485 }, { "epoch": 0.21850342495447844, "grad_norm": 0.016077612397321174, "learning_rate": 9.582061431359647e-06, "loss": 0.5427871942520142, "step": 315, "token_acc": 0.837413661726028 }, { "epoch": 0.2191970866210006, "grad_norm": 0.0155392456629206, "learning_rate": 9.577200742525113e-06, "loss": 0.5360356569290161, "step": 316, "token_acc": 0.8369474935149571 }, { "epoch": 0.21989074828752275, "grad_norm": 0.015316760333058555, "learning_rate": 9.572313199037852e-06, "loss": 0.5236578583717346, "step": 317, "token_acc": 0.8405670083450825 }, { "epoch": 0.2205844099540449, "grad_norm": 0.015495583655269212, "learning_rate": 9.567398829573315e-06, "loss": 0.5284357070922852, "step": 318, "token_acc": 0.8394628357276702 }, { "epoch": 0.22127807162056706, "grad_norm": 0.016694295339418637, "learning_rate": 9.562457662964339e-06, "loss": 0.5318150520324707, "step": 319, "token_acc": 0.8368765487006798 }, { "epoch": 0.22197173328708922, "grad_norm": 0.014706522802670815, "learning_rate": 9.557489728200982e-06, "loss": 0.5087779760360718, "step": 320, "token_acc": 0.8442394160583941 }, { "epoch": 0.22266539495361137, "grad_norm": 0.016512990715332607, "learning_rate": 9.55249505443035e-06, "loss": 0.5382717251777649, "step": 321, "token_acc": 0.8384069608571634 }, { "epoch": 0.22335905662013353, "grad_norm": 0.01480115682519605, "learning_rate": 9.547473670956432e-06, "loss": 0.5110079050064087, "step": 322, "token_acc": 0.8450581223815424 }, { "epoch": 0.22405271828665568, "grad_norm": 0.015782241350395743, "learning_rate": 9.54242560723992e-06, "loss": 0.5445629358291626, "step": 323, "token_acc": 0.8346074865780945 }, { "epoch": 0.22474637995317784, "grad_norm": 0.01589861702965165, "learning_rate": 9.537350892898041e-06, "loss": 0.5334420204162598, "step": 324, "token_acc": 0.8376123930235078 }, { "epoch": 0.2254400416197, "grad_norm": 0.015211708845940808, "learning_rate": 9.532249557704386e-06, "loss": 0.5218173265457153, "step": 325, "token_acc": 0.8408685269752957 }, { "epoch": 0.22613370328622215, "grad_norm": 0.016394658933476657, "learning_rate": 9.527121631588728e-06, "loss": 0.5295275449752808, "step": 326, "token_acc": 0.8394880533339967 }, { "epoch": 0.2268273649527443, "grad_norm": 0.016202081113127407, "learning_rate": 9.521967144636852e-06, "loss": 0.5334320664405823, "step": 327, "token_acc": 0.8386738816019151 }, { "epoch": 0.22752102661926646, "grad_norm": 0.01492305903548267, "learning_rate": 9.516786127090375e-06, "loss": 0.5039514303207397, "step": 328, "token_acc": 0.8467187289288823 }, { "epoch": 0.2282146882857886, "grad_norm": 0.016766087936008884, "learning_rate": 9.511578609346572e-06, "loss": 0.5171111822128296, "step": 329, "token_acc": 0.8416836640089004 }, { "epoch": 0.22890834995231077, "grad_norm": 0.01721358650661982, "learning_rate": 9.506344621958197e-06, "loss": 0.5223368406295776, "step": 330, "token_acc": 0.8398600580056965 }, { "epoch": 0.22960201161883292, "grad_norm": 0.015477121591006652, "learning_rate": 9.501084195633299e-06, "loss": 0.503993570804596, "step": 331, "token_acc": 0.8480558143762925 }, { "epoch": 0.23029567328535508, "grad_norm": 0.015475701221194722, "learning_rate": 9.495797361235049e-06, "loss": 0.5070116519927979, "step": 332, "token_acc": 0.8453585835406653 }, { "epoch": 0.23098933495187723, "grad_norm": 0.01579658353017576, "learning_rate": 9.490484149781552e-06, "loss": 0.543958842754364, "step": 333, "token_acc": 0.8337185728490076 }, { "epoch": 0.2316829966183994, "grad_norm": 0.01664491088901619, "learning_rate": 9.485144592445669e-06, "loss": 0.5200075507164001, "step": 334, "token_acc": 0.8399713961001647 }, { "epoch": 0.23237665828492152, "grad_norm": 0.015149334554886748, "learning_rate": 9.47977872055484e-06, "loss": 0.5247831344604492, "step": 335, "token_acc": 0.8406001984126984 }, { "epoch": 0.23307031995144367, "grad_norm": 0.015865664265417977, "learning_rate": 9.474386565590882e-06, "loss": 0.5208053588867188, "step": 336, "token_acc": 0.8412903061094232 }, { "epoch": 0.23376398161796583, "grad_norm": 0.015436515663769391, "learning_rate": 9.468968159189832e-06, "loss": 0.5316664576530457, "step": 337, "token_acc": 0.8385586933182142 }, { "epoch": 0.23445764328448798, "grad_norm": 0.014921555631508716, "learning_rate": 9.463523533141725e-06, "loss": 0.4828224778175354, "step": 338, "token_acc": 0.8514941771039332 }, { "epoch": 0.23515130495101014, "grad_norm": 0.015166054852149483, "learning_rate": 9.458052719390444e-06, "loss": 0.5124372243881226, "step": 339, "token_acc": 0.8433891027539236 }, { "epoch": 0.2358449666175323, "grad_norm": 0.015240695035630478, "learning_rate": 9.452555750033514e-06, "loss": 0.5281767249107361, "step": 340, "token_acc": 0.8397743863633528 }, { "epoch": 0.23653862828405445, "grad_norm": 0.015705294736834453, "learning_rate": 9.447032657321911e-06, "loss": 0.5137337446212769, "step": 341, "token_acc": 0.8444738115184481 }, { "epoch": 0.2372322899505766, "grad_norm": 0.015362786286524323, "learning_rate": 9.44148347365988e-06, "loss": 0.5269559621810913, "step": 342, "token_acc": 0.8384749488365042 }, { "epoch": 0.23792595161709876, "grad_norm": 0.01569145364350026, "learning_rate": 9.435908231604743e-06, "loss": 0.5285457372665405, "step": 343, "token_acc": 0.838805739258228 }, { "epoch": 0.2386196132836209, "grad_norm": 0.015729534339120242, "learning_rate": 9.43030696386671e-06, "loss": 0.5052101612091064, "step": 344, "token_acc": 0.8453480443810822 }, { "epoch": 0.23931327495014307, "grad_norm": 0.015156127206428381, "learning_rate": 9.424679703308682e-06, "loss": 0.5546956658363342, "step": 345, "token_acc": 0.8336576112070433 }, { "epoch": 0.24000693661666522, "grad_norm": 0.014585139477232783, "learning_rate": 9.419026482946064e-06, "loss": 0.5120719075202942, "step": 346, "token_acc": 0.8431507427923198 }, { "epoch": 0.24070059828318738, "grad_norm": 0.014445993630856687, "learning_rate": 9.413347335946562e-06, "loss": 0.5229274034500122, "step": 347, "token_acc": 0.8424614231858946 }, { "epoch": 0.24139425994970953, "grad_norm": 0.015106842303586821, "learning_rate": 9.407642295630004e-06, "loss": 0.5001294612884521, "step": 348, "token_acc": 0.8459945333477826 }, { "epoch": 0.2420879216162317, "grad_norm": 0.01507694750675834, "learning_rate": 9.40191139546813e-06, "loss": 0.5336852073669434, "step": 349, "token_acc": 0.837959279194945 }, { "epoch": 0.24278158328275384, "grad_norm": 0.014786960467028636, "learning_rate": 9.3961546690844e-06, "loss": 0.5047065019607544, "step": 350, "token_acc": 0.8451935081148564 }, { "epoch": 0.243475244949276, "grad_norm": 0.016111400386701464, "learning_rate": 9.390372150253801e-06, "loss": 0.5363579392433167, "step": 351, "token_acc": 0.8377377713171071 }, { "epoch": 0.24416890661579815, "grad_norm": 0.014806934355474914, "learning_rate": 9.384563872902642e-06, "loss": 0.5233473777770996, "step": 352, "token_acc": 0.842768884499667 }, { "epoch": 0.2448625682823203, "grad_norm": 0.015536984137785283, "learning_rate": 9.378729871108361e-06, "loss": 0.5158038139343262, "step": 353, "token_acc": 0.8427148314715411 }, { "epoch": 0.24555622994884246, "grad_norm": 0.014270600895631246, "learning_rate": 9.372870179099324e-06, "loss": 0.4872702956199646, "step": 354, "token_acc": 0.8508143244916424 }, { "epoch": 0.24624989161536462, "grad_norm": 0.01756400628220267, "learning_rate": 9.366984831254617e-06, "loss": 0.5320186614990234, "step": 355, "token_acc": 0.8399570980565886 }, { "epoch": 0.24694355328188675, "grad_norm": 0.015501997710211914, "learning_rate": 9.361073862103856e-06, "loss": 0.5150004625320435, "step": 356, "token_acc": 0.8426325727962667 }, { "epoch": 0.2476372149484089, "grad_norm": 0.01516355708130367, "learning_rate": 9.355137306326974e-06, "loss": 0.5081771612167358, "step": 357, "token_acc": 0.8457236296419177 }, { "epoch": 0.24833087661493106, "grad_norm": 0.015162940988178946, "learning_rate": 9.349175198754026e-06, "loss": 0.5251328945159912, "step": 358, "token_acc": 0.8410816221949446 }, { "epoch": 0.2490245382814532, "grad_norm": 0.01610604809791819, "learning_rate": 9.343187574364976e-06, "loss": 0.5501012802124023, "step": 359, "token_acc": 0.8336980588023274 }, { "epoch": 0.24971819994797537, "grad_norm": 0.015014069549698788, "learning_rate": 9.3371744682895e-06, "loss": 0.5144484639167786, "step": 360, "token_acc": 0.8437477399291242 }, { "epoch": 0.2504118616144975, "grad_norm": 0.01536858131591527, "learning_rate": 9.331135915806776e-06, "loss": 0.529436469078064, "step": 361, "token_acc": 0.8398073877608827 }, { "epoch": 0.2511055232810197, "grad_norm": 0.014267477499566906, "learning_rate": 9.325071952345276e-06, "loss": 0.4936036467552185, "step": 362, "token_acc": 0.85001254934173 }, { "epoch": 0.25179918494754183, "grad_norm": 0.015101606089111463, "learning_rate": 9.31898261348256e-06, "loss": 0.5033895969390869, "step": 363, "token_acc": 0.8460487851631763 }, { "epoch": 0.252492846614064, "grad_norm": 0.01481168723993181, "learning_rate": 9.312867934945062e-06, "loss": 0.5156823396682739, "step": 364, "token_acc": 0.8427568232824151 }, { "epoch": 0.25318650828058614, "grad_norm": 0.01539197085930155, "learning_rate": 9.306727952607895e-06, "loss": 0.5506815910339355, "step": 365, "token_acc": 0.8339006954238195 }, { "epoch": 0.2538801699471083, "grad_norm": 0.01464757421410147, "learning_rate": 9.300562702494625e-06, "loss": 0.5148442983627319, "step": 366, "token_acc": 0.8424335441095218 }, { "epoch": 0.25457383161363045, "grad_norm": 0.01604410439885344, "learning_rate": 9.294372220777061e-06, "loss": 0.5515012741088867, "step": 367, "token_acc": 0.8331490729798076 }, { "epoch": 0.2552674932801526, "grad_norm": 0.014516128017340174, "learning_rate": 9.288156543775055e-06, "loss": 0.5175834894180298, "step": 368, "token_acc": 0.8440044132164218 }, { "epoch": 0.25596115494667476, "grad_norm": 0.015891226336616163, "learning_rate": 9.281915707956276e-06, "loss": 0.5368760824203491, "step": 369, "token_acc": 0.8355843495254796 }, { "epoch": 0.2566548166131969, "grad_norm": 0.015014325115193468, "learning_rate": 9.275649749936002e-06, "loss": 0.5097544193267822, "step": 370, "token_acc": 0.8460626185958254 }, { "epoch": 0.2573484782797191, "grad_norm": 0.01586829499574455, "learning_rate": 9.269358706476905e-06, "loss": 0.5155773162841797, "step": 371, "token_acc": 0.8438419140364319 }, { "epoch": 0.2580421399462412, "grad_norm": 0.014633808482407005, "learning_rate": 9.263042614488834e-06, "loss": 0.501129150390625, "step": 372, "token_acc": 0.8473576614892967 }, { "epoch": 0.2587358016127634, "grad_norm": 0.014595644983718459, "learning_rate": 9.256701511028599e-06, "loss": 0.5051275491714478, "step": 373, "token_acc": 0.8458912907843066 }, { "epoch": 0.2594294632792855, "grad_norm": 0.01653247226858408, "learning_rate": 9.250335433299751e-06, "loss": 0.5242918729782104, "step": 374, "token_acc": 0.8399441777649451 }, { "epoch": 0.2601231249458077, "grad_norm": 0.015066697389922447, "learning_rate": 9.243944418652368e-06, "loss": 0.5125404000282288, "step": 375, "token_acc": 0.8429374428823596 }, { "epoch": 0.2608167866123298, "grad_norm": 0.015470048212734164, "learning_rate": 9.23752850458284e-06, "loss": 0.5195497274398804, "step": 376, "token_acc": 0.840081398667345 }, { "epoch": 0.261510448278852, "grad_norm": 0.016527684656450006, "learning_rate": 9.231087728733632e-06, "loss": 0.48937085270881653, "step": 377, "token_acc": 0.8507273562989925 }, { "epoch": 0.26220410994537413, "grad_norm": 0.014857718075451047, "learning_rate": 9.224622128893081e-06, "loss": 0.4972577393054962, "step": 378, "token_acc": 0.8483801554094441 }, { "epoch": 0.2628977716118963, "grad_norm": 0.015307097668694956, "learning_rate": 9.218131742995169e-06, "loss": 0.5009753704071045, "step": 379, "token_acc": 0.8460398675711343 }, { "epoch": 0.26359143327841844, "grad_norm": 0.017137139710326304, "learning_rate": 9.211616609119292e-06, "loss": 0.4914396405220032, "step": 380, "token_acc": 0.8496380558428128 }, { "epoch": 0.2642850949449406, "grad_norm": 0.01688679835895317, "learning_rate": 9.205076765490051e-06, "loss": 0.5112553834915161, "step": 381, "token_acc": 0.8440843317450939 }, { "epoch": 0.26497875661146275, "grad_norm": 0.015498707272508344, "learning_rate": 9.198512250477017e-06, "loss": 0.5260264277458191, "step": 382, "token_acc": 0.8395060174228309 }, { "epoch": 0.26567241827798493, "grad_norm": 0.01601847202833902, "learning_rate": 9.191923102594508e-06, "loss": 0.5409051775932312, "step": 383, "token_acc": 0.8385167019655928 }, { "epoch": 0.26636607994450706, "grad_norm": 0.015779349550218487, "learning_rate": 9.185309360501367e-06, "loss": 0.5109480619430542, "step": 384, "token_acc": 0.8442505453412278 }, { "epoch": 0.26705974161102924, "grad_norm": 0.015237474081437172, "learning_rate": 9.17867106300073e-06, "loss": 0.5144761800765991, "step": 385, "token_acc": 0.8415920896176231 }, { "epoch": 0.26775340327755137, "grad_norm": 0.015170613469061449, "learning_rate": 9.1720082490398e-06, "loss": 0.5383806228637695, "step": 386, "token_acc": 0.8368532072786419 }, { "epoch": 0.26844706494407355, "grad_norm": 0.015870020601193538, "learning_rate": 9.165320957709625e-06, "loss": 0.5396860837936401, "step": 387, "token_acc": 0.8348558255359543 }, { "epoch": 0.2691407266105957, "grad_norm": 0.014835079143397236, "learning_rate": 9.158609228244854e-06, "loss": 0.5230482816696167, "step": 388, "token_acc": 0.8400951012675931 }, { "epoch": 0.2698343882771178, "grad_norm": 0.015554206431437009, "learning_rate": 9.151873100023524e-06, "loss": 0.5493577718734741, "step": 389, "token_acc": 0.8324703015534572 }, { "epoch": 0.27052804994364, "grad_norm": 0.014846461012395548, "learning_rate": 9.145112612566817e-06, "loss": 0.5434577465057373, "step": 390, "token_acc": 0.8357214343251885 }, { "epoch": 0.2712217116101621, "grad_norm": 0.01572551213512566, "learning_rate": 9.138327805538828e-06, "loss": 0.5342142581939697, "step": 391, "token_acc": 0.8370228492148268 }, { "epoch": 0.2719153732766843, "grad_norm": 0.015118844309971487, "learning_rate": 9.131518718746346e-06, "loss": 0.5311535596847534, "step": 392, "token_acc": 0.8364322995081803 }, { "epoch": 0.27260903494320643, "grad_norm": 0.03907869441146085, "learning_rate": 9.1246853921386e-06, "loss": 0.5179340839385986, "step": 393, "token_acc": 0.8438499563826694 }, { "epoch": 0.2733026966097286, "grad_norm": 0.0162873395387697, "learning_rate": 9.117827865807041e-06, "loss": 0.5215647220611572, "step": 394, "token_acc": 0.8385482325651757 }, { "epoch": 0.27399635827625074, "grad_norm": 0.01623205007566315, "learning_rate": 9.1109461799851e-06, "loss": 0.5324516296386719, "step": 395, "token_acc": 0.8382450393228222 }, { "epoch": 0.2746900199427729, "grad_norm": 0.03740891499895564, "learning_rate": 9.104040375047951e-06, "loss": 0.524557888507843, "step": 396, "token_acc": 0.8402539010843693 }, { "epoch": 0.27538368160929505, "grad_norm": 0.01476654155273099, "learning_rate": 9.097110491512278e-06, "loss": 0.49928998947143555, "step": 397, "token_acc": 0.8459154386049541 }, { "epoch": 0.27607734327581723, "grad_norm": 0.014596588737129206, "learning_rate": 9.090156570036036e-06, "loss": 0.5146458148956299, "step": 398, "token_acc": 0.8420197153782288 }, { "epoch": 0.27677100494233936, "grad_norm": 0.015991091976123323, "learning_rate": 9.08317865141821e-06, "loss": 0.5213555097579956, "step": 399, "token_acc": 0.8422244496417567 }, { "epoch": 0.27746466660886154, "grad_norm": 0.0151960245034311, "learning_rate": 9.076176776598574e-06, "loss": 0.519744873046875, "step": 400, "token_acc": 0.8404861052094842 }, { "epoch": 0.27815832827538367, "grad_norm": 0.01603505003785986, "learning_rate": 9.069150986657465e-06, "loss": 0.5275493860244751, "step": 401, "token_acc": 0.8370458346462418 }, { "epoch": 0.27885198994190585, "grad_norm": 0.015412215044839547, "learning_rate": 9.062101322815517e-06, "loss": 0.521660566329956, "step": 402, "token_acc": 0.8404096205340988 }, { "epoch": 0.279545651608428, "grad_norm": 0.01457699300711075, "learning_rate": 9.055027826433444e-06, "loss": 0.53773033618927, "step": 403, "token_acc": 0.8362848726390574 }, { "epoch": 0.28023931327495016, "grad_norm": 0.01555530113966596, "learning_rate": 9.047930539011785e-06, "loss": 0.5251874327659607, "step": 404, "token_acc": 0.8406034213122248 }, { "epoch": 0.2809329749414723, "grad_norm": 0.02037896650261726, "learning_rate": 9.040809502190655e-06, "loss": 0.5325033664703369, "step": 405, "token_acc": 0.837363690832834 }, { "epoch": 0.2816266366079945, "grad_norm": 0.01689631488207834, "learning_rate": 9.033664757749518e-06, "loss": 0.5118398666381836, "step": 406, "token_acc": 0.8438998206726606 }, { "epoch": 0.2823202982745166, "grad_norm": 0.015400803530188235, "learning_rate": 9.026496347606924e-06, "loss": 0.5068721771240234, "step": 407, "token_acc": 0.8444614684829188 }, { "epoch": 0.2830139599410388, "grad_norm": 0.0147724856627017, "learning_rate": 9.019304313820274e-06, "loss": 0.5092871189117432, "step": 408, "token_acc": 0.8448820268019194 }, { "epoch": 0.2837076216075609, "grad_norm": 0.016164114548866515, "learning_rate": 9.012088698585568e-06, "loss": 0.5129624009132385, "step": 409, "token_acc": 0.8434629372726932 }, { "epoch": 0.28440128327408304, "grad_norm": 0.015229387567882067, "learning_rate": 9.004849544237164e-06, "loss": 0.5178954601287842, "step": 410, "token_acc": 0.8421274799493457 }, { "epoch": 0.2850949449406052, "grad_norm": 0.01516520918598633, "learning_rate": 8.997586893247522e-06, "loss": 0.5288331508636475, "step": 411, "token_acc": 0.8386326721464783 }, { "epoch": 0.28578860660712735, "grad_norm": 0.015472198024663273, "learning_rate": 8.990300788226956e-06, "loss": 0.5039794445037842, "step": 412, "token_acc": 0.8459695256819858 }, { "epoch": 0.28648226827364953, "grad_norm": 0.01748734663276324, "learning_rate": 8.982991271923387e-06, "loss": 0.5192071199417114, "step": 413, "token_acc": 0.8423354257004926 }, { "epoch": 0.28717592994017166, "grad_norm": 0.014817663576995368, "learning_rate": 8.975658387222097e-06, "loss": 0.523143470287323, "step": 414, "token_acc": 0.8411820282530251 }, { "epoch": 0.28786959160669384, "grad_norm": 0.017954291143021404, "learning_rate": 8.968302177145461e-06, "loss": 0.5327160954475403, "step": 415, "token_acc": 0.8358237066800602 }, { "epoch": 0.28856325327321597, "grad_norm": 0.017415399248442612, "learning_rate": 8.960922684852715e-06, "loss": 0.5147889852523804, "step": 416, "token_acc": 0.8425771159398268 }, { "epoch": 0.28925691493973815, "grad_norm": 0.017303796913836763, "learning_rate": 8.953519953639684e-06, "loss": 0.5075184106826782, "step": 417, "token_acc": 0.8453761938038667 }, { "epoch": 0.2899505766062603, "grad_norm": 0.01542591766938294, "learning_rate": 8.946094026938545e-06, "loss": 0.525463342666626, "step": 418, "token_acc": 0.8403368640388956 }, { "epoch": 0.29064423827278246, "grad_norm": 0.014649266639539138, "learning_rate": 8.93864494831756e-06, "loss": 0.49865150451660156, "step": 419, "token_acc": 0.8483927019982623 }, { "epoch": 0.2913378999393046, "grad_norm": 0.016173572139256322, "learning_rate": 8.931172761480824e-06, "loss": 0.5305777788162231, "step": 420, "token_acc": 0.8386133603238867 }, { "epoch": 0.2920315616058268, "grad_norm": 0.01626199098828306, "learning_rate": 8.923677510268009e-06, "loss": 0.5181946158409119, "step": 421, "token_acc": 0.8426678768168009 }, { "epoch": 0.2927252232723489, "grad_norm": 0.016245550144932154, "learning_rate": 8.916159238654107e-06, "loss": 0.5103972554206848, "step": 422, "token_acc": 0.8448725376593279 }, { "epoch": 0.2934188849388711, "grad_norm": 0.042144487355583425, "learning_rate": 8.908617990749176e-06, "loss": 0.5151072144508362, "step": 423, "token_acc": 0.843064069606486 }, { "epoch": 0.2941125466053932, "grad_norm": 0.01857503092019543, "learning_rate": 8.901053810798067e-06, "loss": 0.5086730718612671, "step": 424, "token_acc": 0.8437840621893229 }, { "epoch": 0.2948062082719154, "grad_norm": 0.015855951202732168, "learning_rate": 8.893466743180187e-06, "loss": 0.5245629549026489, "step": 425, "token_acc": 0.8393296487787485 }, { "epoch": 0.2954998699384375, "grad_norm": 0.015263356017271449, "learning_rate": 8.885856832409217e-06, "loss": 0.5187796354293823, "step": 426, "token_acc": 0.8403163152053275 }, { "epoch": 0.2961935316049597, "grad_norm": 0.015265044326419034, "learning_rate": 8.87822412313286e-06, "loss": 0.5040236711502075, "step": 427, "token_acc": 0.8475853583512766 }, { "epoch": 0.29688719327148183, "grad_norm": 0.015115233475120563, "learning_rate": 8.870568660132588e-06, "loss": 0.5143234729766846, "step": 428, "token_acc": 0.8413271384322135 }, { "epoch": 0.297580854938004, "grad_norm": 0.015694267951841423, "learning_rate": 8.862890488323358e-06, "loss": 0.5411935448646545, "step": 429, "token_acc": 0.8347218865668604 }, { "epoch": 0.29827451660452614, "grad_norm": 0.015198421187940505, "learning_rate": 8.855189652753367e-06, "loss": 0.5250048041343689, "step": 430, "token_acc": 0.841224182417501 }, { "epoch": 0.2989681782710483, "grad_norm": 0.014733966273646234, "learning_rate": 8.847466198603783e-06, "loss": 0.5064525008201599, "step": 431, "token_acc": 0.8453483445312027 }, { "epoch": 0.29966183993757045, "grad_norm": 0.015520884060781717, "learning_rate": 8.839720171188475e-06, "loss": 0.5219970941543579, "step": 432, "token_acc": 0.8399242274167987 }, { "epoch": 0.3003555016040926, "grad_norm": 0.015490377898411156, "learning_rate": 8.83195161595375e-06, "loss": 0.5266472697257996, "step": 433, "token_acc": 0.8390728222951993 }, { "epoch": 0.30104916327061476, "grad_norm": 0.015471715919566418, "learning_rate": 8.82416057847809e-06, "loss": 0.5178579092025757, "step": 434, "token_acc": 0.8424208158121017 }, { "epoch": 0.3017428249371369, "grad_norm": 0.017342038595316578, "learning_rate": 8.816347104471876e-06, "loss": 0.525598406791687, "step": 435, "token_acc": 0.8395202163829169 }, { "epoch": 0.30243648660365907, "grad_norm": 0.01633426616890085, "learning_rate": 8.808511239777133e-06, "loss": 0.5070786476135254, "step": 436, "token_acc": 0.845313532160595 }, { "epoch": 0.3031301482701812, "grad_norm": 0.015992302104721953, "learning_rate": 8.800653030367248e-06, "loss": 0.5191164016723633, "step": 437, "token_acc": 0.8386458078709362 }, { "epoch": 0.3038238099367034, "grad_norm": 0.01679357812680105, "learning_rate": 8.792772522346704e-06, "loss": 0.5312725305557251, "step": 438, "token_acc": 0.8379193124238342 }, { "epoch": 0.3045174716032255, "grad_norm": 0.014246228157116679, "learning_rate": 8.784869761950813e-06, "loss": 0.49090519547462463, "step": 439, "token_acc": 0.850686299556402 }, { "epoch": 0.3052111332697477, "grad_norm": 0.015218062174844012, "learning_rate": 8.776944795545447e-06, "loss": 0.49736306071281433, "step": 440, "token_acc": 0.8473082334267265 }, { "epoch": 0.3059047949362698, "grad_norm": 0.015535651972883565, "learning_rate": 8.768997669626757e-06, "loss": 0.5122103095054626, "step": 441, "token_acc": 0.8435762311466473 }, { "epoch": 0.306598456602792, "grad_norm": 0.014436174670321014, "learning_rate": 8.761028430820904e-06, "loss": 0.4959847331047058, "step": 442, "token_acc": 0.8485278635390421 }, { "epoch": 0.30729211826931413, "grad_norm": 0.014538566525178398, "learning_rate": 8.753037125883787e-06, "loss": 0.5148453712463379, "step": 443, "token_acc": 0.8436062775591484 }, { "epoch": 0.3079857799358363, "grad_norm": 0.01819159839530199, "learning_rate": 8.745023801700772e-06, "loss": 0.5193444490432739, "step": 444, "token_acc": 0.8444109114729552 }, { "epoch": 0.30867944160235844, "grad_norm": 0.015527837634041903, "learning_rate": 8.73698850528641e-06, "loss": 0.5408623218536377, "step": 445, "token_acc": 0.8349287466788706 }, { "epoch": 0.3093731032688806, "grad_norm": 0.014681173328799646, "learning_rate": 8.72893128378416e-06, "loss": 0.5095841884613037, "step": 446, "token_acc": 0.8445333674549537 }, { "epoch": 0.31006676493540275, "grad_norm": 0.016037016016761607, "learning_rate": 8.720852184466127e-06, "loss": 0.5162643194198608, "step": 447, "token_acc": 0.8432611426178968 }, { "epoch": 0.31076042660192493, "grad_norm": 0.0150975551142059, "learning_rate": 8.712751254732762e-06, "loss": 0.5341596603393555, "step": 448, "token_acc": 0.8383838383838383 }, { "epoch": 0.31145408826844706, "grad_norm": 0.015292075378334085, "learning_rate": 8.704628542112605e-06, "loss": 0.5082976818084717, "step": 449, "token_acc": 0.8454041357935009 }, { "epoch": 0.31214774993496924, "grad_norm": 0.015022010251764747, "learning_rate": 8.696484094261994e-06, "loss": 0.5320202708244324, "step": 450, "token_acc": 0.8396169670139306 }, { "epoch": 0.31284141160149137, "grad_norm": 0.016080130602985576, "learning_rate": 8.68831795896479e-06, "loss": 0.5262729525566101, "step": 451, "token_acc": 0.8388069452471292 }, { "epoch": 0.31353507326801355, "grad_norm": 0.018925975187235794, "learning_rate": 8.680130184132092e-06, "loss": 0.4944055676460266, "step": 452, "token_acc": 0.8475179429420434 }, { "epoch": 0.3142287349345357, "grad_norm": 0.015844473548302117, "learning_rate": 8.67192081780196e-06, "loss": 0.5045117139816284, "step": 453, "token_acc": 0.8458836089180197 }, { "epoch": 0.3149223966010578, "grad_norm": 0.015169671398051332, "learning_rate": 8.663689908139139e-06, "loss": 0.5029076337814331, "step": 454, "token_acc": 0.8455077009104944 }, { "epoch": 0.31561605826758, "grad_norm": 0.01640410259152484, "learning_rate": 8.655437503434759e-06, "loss": 0.533231794834137, "step": 455, "token_acc": 0.8387481228994351 }, { "epoch": 0.3163097199341021, "grad_norm": 0.015849664984432348, "learning_rate": 8.64716365210607e-06, "loss": 0.5376383066177368, "step": 456, "token_acc": 0.8361460263408282 }, { "epoch": 0.3170033816006243, "grad_norm": 0.015613411181080235, "learning_rate": 8.638868402696147e-06, "loss": 0.5056921243667603, "step": 457, "token_acc": 0.8446214157153776 }, { "epoch": 0.31769704326714643, "grad_norm": 0.016281297656185603, "learning_rate": 8.630551803873611e-06, "loss": 0.5238337516784668, "step": 458, "token_acc": 0.839984063149769 }, { "epoch": 0.3183907049336686, "grad_norm": 0.015327350950266725, "learning_rate": 8.622213904432339e-06, "loss": 0.508357584476471, "step": 459, "token_acc": 0.845698881000244 }, { "epoch": 0.31908436660019074, "grad_norm": 0.015479920578196268, "learning_rate": 8.613854753291182e-06, "loss": 0.5006684064865112, "step": 460, "token_acc": 0.8456796212270665 }, { "epoch": 0.3197780282667129, "grad_norm": 0.015375097497476257, "learning_rate": 8.605474399493672e-06, "loss": 0.5011909604072571, "step": 461, "token_acc": 0.8462968327539591 }, { "epoch": 0.32047168993323505, "grad_norm": 0.0159448221217863, "learning_rate": 8.597072892207741e-06, "loss": 0.504135251045227, "step": 462, "token_acc": 0.8447735512812634 }, { "epoch": 0.32116535159975723, "grad_norm": 0.01469855751818441, "learning_rate": 8.58865028072543e-06, "loss": 0.5155189037322998, "step": 463, "token_acc": 0.8423325043238944 }, { "epoch": 0.32185901326627936, "grad_norm": 0.039280710325269405, "learning_rate": 8.580206614462595e-06, "loss": 0.5256026983261108, "step": 464, "token_acc": 0.8403409302383286 }, { "epoch": 0.32255267493280154, "grad_norm": 0.01527916216135021, "learning_rate": 8.571741942958627e-06, "loss": 0.5152538418769836, "step": 465, "token_acc": 0.841803699897225 }, { "epoch": 0.32324633659932367, "grad_norm": 0.014172802137942052, "learning_rate": 8.563256315876151e-06, "loss": 0.49651774764060974, "step": 466, "token_acc": 0.8479223766640563 }, { "epoch": 0.32393999826584585, "grad_norm": 0.014890066606866847, "learning_rate": 8.554749783000743e-06, "loss": 0.498933345079422, "step": 467, "token_acc": 0.8467583733497009 }, { "epoch": 0.324633659932368, "grad_norm": 0.01430783395061116, "learning_rate": 8.546222394240633e-06, "loss": 0.5099368095397949, "step": 468, "token_acc": 0.8441290545715062 }, { "epoch": 0.32532732159889016, "grad_norm": 0.015283127437614133, "learning_rate": 8.537674199626411e-06, "loss": 0.5056677460670471, "step": 469, "token_acc": 0.8444464029411024 }, { "epoch": 0.3260209832654123, "grad_norm": 0.016190399026903713, "learning_rate": 8.529105249310741e-06, "loss": 0.518047571182251, "step": 470, "token_acc": 0.8420991952777329 }, { "epoch": 0.3267146449319345, "grad_norm": 0.015229438277442633, "learning_rate": 8.520515593568057e-06, "loss": 0.5186741352081299, "step": 471, "token_acc": 0.8406854574860797 }, { "epoch": 0.3274083065984566, "grad_norm": 0.01437881043558416, "learning_rate": 8.511905282794275e-06, "loss": 0.49071556329727173, "step": 472, "token_acc": 0.8493257863788066 }, { "epoch": 0.3281019682649788, "grad_norm": 0.015697640163358017, "learning_rate": 8.503274367506495e-06, "loss": 0.49917885661125183, "step": 473, "token_acc": 0.8469001511491209 }, { "epoch": 0.3287956299315009, "grad_norm": 0.014924447343735261, "learning_rate": 8.494622898342703e-06, "loss": 0.5410422086715698, "step": 474, "token_acc": 0.8355143221037384 }, { "epoch": 0.32948929159802304, "grad_norm": 0.01472278755380871, "learning_rate": 8.48595092606148e-06, "loss": 0.5060070753097534, "step": 475, "token_acc": 0.8441943055163007 }, { "epoch": 0.3301829532645452, "grad_norm": 0.016211358035186307, "learning_rate": 8.477258501541692e-06, "loss": 0.5011782646179199, "step": 476, "token_acc": 0.8468153434433542 }, { "epoch": 0.33087661493106735, "grad_norm": 0.0159707448113501, "learning_rate": 8.468545675782206e-06, "loss": 0.5062922239303589, "step": 477, "token_acc": 0.8465792440515042 }, { "epoch": 0.33157027659758953, "grad_norm": 0.01438100696860169, "learning_rate": 8.459812499901582e-06, "loss": 0.5181795954704285, "step": 478, "token_acc": 0.8419339733083587 }, { "epoch": 0.33226393826411166, "grad_norm": 0.014888109009631865, "learning_rate": 8.451059025137775e-06, "loss": 0.5212734937667847, "step": 479, "token_acc": 0.8417836775770355 }, { "epoch": 0.33295759993063384, "grad_norm": 0.014752264866062395, "learning_rate": 8.442285302847834e-06, "loss": 0.5245628356933594, "step": 480, "token_acc": 0.8394935101407865 }, { "epoch": 0.33365126159715597, "grad_norm": 0.015533574654980844, "learning_rate": 8.433491384507603e-06, "loss": 0.5237957239151001, "step": 481, "token_acc": 0.8402925597742686 }, { "epoch": 0.33434492326367815, "grad_norm": 0.014782181001629592, "learning_rate": 8.424677321711414e-06, "loss": 0.5072699189186096, "step": 482, "token_acc": 0.8459030967550748 }, { "epoch": 0.3350385849302003, "grad_norm": 0.014901208254187023, "learning_rate": 8.415843166171789e-06, "loss": 0.5318183898925781, "step": 483, "token_acc": 0.8377705328248499 }, { "epoch": 0.33573224659672246, "grad_norm": 0.015089244464116603, "learning_rate": 8.406988969719137e-06, "loss": 0.509574294090271, "step": 484, "token_acc": 0.8452379495588541 }, { "epoch": 0.3364259082632446, "grad_norm": 0.015241047571159422, "learning_rate": 8.398114784301445e-06, "loss": 0.510574460029602, "step": 485, "token_acc": 0.8447450019907581 }, { "epoch": 0.33711956992976677, "grad_norm": 0.01605660543597658, "learning_rate": 8.38922066198398e-06, "loss": 0.5048902630805969, "step": 486, "token_acc": 0.8450277135684637 }, { "epoch": 0.3378132315962889, "grad_norm": 0.015067886972918882, "learning_rate": 8.380306654948975e-06, "loss": 0.5201930403709412, "step": 487, "token_acc": 0.840404237660499 }, { "epoch": 0.3385068932628111, "grad_norm": 0.014698047397789505, "learning_rate": 8.37137281549533e-06, "loss": 0.5247317552566528, "step": 488, "token_acc": 0.8401180303682302 }, { "epoch": 0.3392005549293332, "grad_norm": 0.01547225108165908, "learning_rate": 8.362419196038303e-06, "loss": 0.5395357608795166, "step": 489, "token_acc": 0.8365361832670316 }, { "epoch": 0.3398942165958554, "grad_norm": 0.016592095350948615, "learning_rate": 8.353445849109201e-06, "loss": 0.5161144137382507, "step": 490, "token_acc": 0.8423169245076724 }, { "epoch": 0.3405878782623775, "grad_norm": 0.01565361979028549, "learning_rate": 8.344452827355077e-06, "loss": 0.49641135334968567, "step": 491, "token_acc": 0.8491467576791809 }, { "epoch": 0.3412815399288997, "grad_norm": 0.016046102509797076, "learning_rate": 8.33544018353841e-06, "loss": 0.5233786106109619, "step": 492, "token_acc": 0.8401366291700865 }, { "epoch": 0.34197520159542183, "grad_norm": 0.015055077625562647, "learning_rate": 8.32640797053681e-06, "loss": 0.514272928237915, "step": 493, "token_acc": 0.8433081106644597 }, { "epoch": 0.342668863261944, "grad_norm": 0.015196469722049017, "learning_rate": 8.317356241342693e-06, "loss": 0.5114027857780457, "step": 494, "token_acc": 0.8429557251908397 }, { "epoch": 0.34336252492846614, "grad_norm": 0.015205736991015855, "learning_rate": 8.308285049062983e-06, "loss": 0.5154869556427002, "step": 495, "token_acc": 0.8414052005373249 }, { "epoch": 0.34405618659498827, "grad_norm": 0.01573164641782511, "learning_rate": 8.299194446918795e-06, "loss": 0.5267399549484253, "step": 496, "token_acc": 0.8391584873698371 }, { "epoch": 0.34474984826151045, "grad_norm": 0.015106113454988985, "learning_rate": 8.290084488245119e-06, "loss": 0.5137426257133484, "step": 497, "token_acc": 0.8434313212157478 }, { "epoch": 0.3454435099280326, "grad_norm": 0.016043742136838998, "learning_rate": 8.280955226490513e-06, "loss": 0.5140025615692139, "step": 498, "token_acc": 0.8426554897143133 }, { "epoch": 0.34613717159455476, "grad_norm": 0.01512099091343486, "learning_rate": 8.271806715216785e-06, "loss": 0.4873787760734558, "step": 499, "token_acc": 0.8508402332075842 }, { "epoch": 0.3468308332610769, "grad_norm": 0.015337253564296958, "learning_rate": 8.262639008098683e-06, "loss": 0.5433511137962341, "step": 500, "token_acc": 0.8338701731782182 }, { "epoch": 0.34752449492759907, "grad_norm": 0.014767011280467526, "learning_rate": 8.253452158923578e-06, "loss": 0.5211372375488281, "step": 501, "token_acc": 0.8411593362099182 }, { "epoch": 0.3482181565941212, "grad_norm": 0.015057936775405978, "learning_rate": 8.244246221591146e-06, "loss": 0.5089165568351746, "step": 502, "token_acc": 0.8449070004570866 }, { "epoch": 0.3489118182606434, "grad_norm": 0.017312507250060734, "learning_rate": 8.235021250113058e-06, "loss": 0.49309051036834717, "step": 503, "token_acc": 0.8477640576573031 }, { "epoch": 0.3496054799271655, "grad_norm": 0.015234434977666102, "learning_rate": 8.225777298612655e-06, "loss": 0.5010501146316528, "step": 504, "token_acc": 0.8465338873081822 }, { "epoch": 0.3502991415936877, "grad_norm": 0.018668465614805525, "learning_rate": 8.21651442132464e-06, "loss": 0.5293244123458862, "step": 505, "token_acc": 0.8389824486605557 }, { "epoch": 0.3509928032602098, "grad_norm": 0.015044229633925946, "learning_rate": 8.207232672594744e-06, "loss": 0.5010188221931458, "step": 506, "token_acc": 0.8466042012630456 }, { "epoch": 0.351686464926732, "grad_norm": 0.016028619793912523, "learning_rate": 8.197932106879433e-06, "loss": 0.5274627804756165, "step": 507, "token_acc": 0.8375374256306138 }, { "epoch": 0.35238012659325413, "grad_norm": 0.01530508358390178, "learning_rate": 8.188612778745559e-06, "loss": 0.5176675319671631, "step": 508, "token_acc": 0.8419521165776273 }, { "epoch": 0.3530737882597763, "grad_norm": 0.01983842999621884, "learning_rate": 8.179274742870062e-06, "loss": 0.5248233079910278, "step": 509, "token_acc": 0.8406174867290342 }, { "epoch": 0.35376744992629844, "grad_norm": 0.014921336840486051, "learning_rate": 8.16991805403964e-06, "loss": 0.5148507356643677, "step": 510, "token_acc": 0.8431997698697127 }, { "epoch": 0.3544611115928206, "grad_norm": 0.015242312671167433, "learning_rate": 8.160542767150426e-06, "loss": 0.5068631768226624, "step": 511, "token_acc": 0.8436426725206245 }, { "epoch": 0.35515477325934275, "grad_norm": 0.01604486887118131, "learning_rate": 8.151148937207672e-06, "loss": 0.49771061539649963, "step": 512, "token_acc": 0.8488374932838373 }, { "epoch": 0.35584843492586493, "grad_norm": 0.015221824748281283, "learning_rate": 8.141736619325421e-06, "loss": 0.5147275328636169, "step": 513, "token_acc": 0.8441062824977341 }, { "epoch": 0.35654209659238706, "grad_norm": 0.031081505810062766, "learning_rate": 8.132305868726185e-06, "loss": 0.5092084407806396, "step": 514, "token_acc": 0.8438091054410563 }, { "epoch": 0.35723575825890924, "grad_norm": 0.018281442943547502, "learning_rate": 8.122856740740625e-06, "loss": 0.5070136189460754, "step": 515, "token_acc": 0.8447560264963668 }, { "epoch": 0.35792941992543137, "grad_norm": 0.015492926707064345, "learning_rate": 8.113389290807218e-06, "loss": 0.5224616527557373, "step": 516, "token_acc": 0.8396173454774589 }, { "epoch": 0.35862308159195355, "grad_norm": 0.014889673406533882, "learning_rate": 8.103903574471943e-06, "loss": 0.5071095824241638, "step": 517, "token_acc": 0.8456110575078758 }, { "epoch": 0.3593167432584757, "grad_norm": 0.01579182762034589, "learning_rate": 8.094399647387943e-06, "loss": 0.5151270627975464, "step": 518, "token_acc": 0.8428417783470895 }, { "epoch": 0.3600104049249978, "grad_norm": 0.015170324528466457, "learning_rate": 8.084877565315206e-06, "loss": 0.5037021636962891, "step": 519, "token_acc": 0.8458199318033421 }, { "epoch": 0.36070406659152, "grad_norm": 0.016394029414049695, "learning_rate": 8.07533738412024e-06, "loss": 0.5156444311141968, "step": 520, "token_acc": 0.841447970609256 }, { "epoch": 0.3613977282580421, "grad_norm": 0.01551944839191558, "learning_rate": 8.065779159775732e-06, "loss": 0.513932466506958, "step": 521, "token_acc": 0.8429502177747381 }, { "epoch": 0.3620913899245643, "grad_norm": 0.016164469432509796, "learning_rate": 8.05620294836024e-06, "loss": 0.5044457912445068, "step": 522, "token_acc": 0.8452438069745714 }, { "epoch": 0.3627850515910864, "grad_norm": 0.01467591965859679, "learning_rate": 8.04660880605785e-06, "loss": 0.49637460708618164, "step": 523, "token_acc": 0.848197493938776 }, { "epoch": 0.3634787132576086, "grad_norm": 0.014432347061017056, "learning_rate": 8.036996789157844e-06, "loss": 0.5002917647361755, "step": 524, "token_acc": 0.8447456455455566 }, { "epoch": 0.36417237492413074, "grad_norm": 0.015340352121860277, "learning_rate": 8.02736695405438e-06, "loss": 0.48765647411346436, "step": 525, "token_acc": 0.8511527963296359 }, { "epoch": 0.3648660365906529, "grad_norm": 0.014869872840173612, "learning_rate": 8.017719357246152e-06, "loss": 0.48543187975883484, "step": 526, "token_acc": 0.8518869152574465 }, { "epoch": 0.36555969825717505, "grad_norm": 0.0157664713234682, "learning_rate": 8.00805405533607e-06, "loss": 0.5169358253479004, "step": 527, "token_acc": 0.840858613901368 }, { "epoch": 0.36625335992369723, "grad_norm": 0.018077047318306658, "learning_rate": 7.998371105030915e-06, "loss": 0.510743260383606, "step": 528, "token_acc": 0.842917558472369 }, { "epoch": 0.36694702159021936, "grad_norm": 0.014398964890116222, "learning_rate": 7.988670563141014e-06, "loss": 0.48773178458213806, "step": 529, "token_acc": 0.8512391890635171 }, { "epoch": 0.36764068325674154, "grad_norm": 0.015858021003966053, "learning_rate": 7.9789524865799e-06, "loss": 0.48927903175354004, "step": 530, "token_acc": 0.8485064440928143 }, { "epoch": 0.36833434492326367, "grad_norm": 0.016050408581791166, "learning_rate": 7.969216932363992e-06, "loss": 0.5010135173797607, "step": 531, "token_acc": 0.8450528256571366 }, { "epoch": 0.36902800658978585, "grad_norm": 0.015122861869794628, "learning_rate": 7.959463957612244e-06, "loss": 0.5088565349578857, "step": 532, "token_acc": 0.8441640453722238 }, { "epoch": 0.369721668256308, "grad_norm": 0.014911865917614859, "learning_rate": 7.949693619545815e-06, "loss": 0.5193741321563721, "step": 533, "token_acc": 0.8415699984109328 }, { "epoch": 0.37041532992283016, "grad_norm": 0.015394286683363782, "learning_rate": 7.939905975487745e-06, "loss": 0.4901498258113861, "step": 534, "token_acc": 0.8495233347872142 }, { "epoch": 0.3711089915893523, "grad_norm": 0.01420213994849795, "learning_rate": 7.930101082862602e-06, "loss": 0.4910246729850769, "step": 535, "token_acc": 0.8481871510354089 }, { "epoch": 0.37180265325587447, "grad_norm": 0.015956301544136353, "learning_rate": 7.920278999196154e-06, "loss": 0.502273440361023, "step": 536, "token_acc": 0.8446495992373594 }, { "epoch": 0.3724963149223966, "grad_norm": 0.01642554753590517, "learning_rate": 7.910439782115027e-06, "loss": 0.5083401799201965, "step": 537, "token_acc": 0.8437343557308392 }, { "epoch": 0.3731899765889188, "grad_norm": 0.014411355251447306, "learning_rate": 7.900583489346374e-06, "loss": 0.48945099115371704, "step": 538, "token_acc": 0.8519457922854482 }, { "epoch": 0.3738836382554409, "grad_norm": 0.014658379214605286, "learning_rate": 7.89071017871753e-06, "loss": 0.5036424398422241, "step": 539, "token_acc": 0.8457474322027636 }, { "epoch": 0.37457729992196304, "grad_norm": 0.015294871956818026, "learning_rate": 7.880819908155671e-06, "loss": 0.5075095891952515, "step": 540, "token_acc": 0.8466974556650545 }, { "epoch": 0.3752709615884852, "grad_norm": 0.015609114971496588, "learning_rate": 7.870912735687485e-06, "loss": 0.5383604168891907, "step": 541, "token_acc": 0.835771899003629 }, { "epoch": 0.37596462325500735, "grad_norm": 0.014930629598757714, "learning_rate": 7.860988719438818e-06, "loss": 0.5068605542182922, "step": 542, "token_acc": 0.8439461778562327 }, { "epoch": 0.37665828492152953, "grad_norm": 0.015616618153280581, "learning_rate": 7.851047917634345e-06, "loss": 0.5028555989265442, "step": 543, "token_acc": 0.8447262080480074 }, { "epoch": 0.37735194658805166, "grad_norm": 0.014738517543084702, "learning_rate": 7.841090388597213e-06, "loss": 0.4893748164176941, "step": 544, "token_acc": 0.8502333525789069 }, { "epoch": 0.37804560825457384, "grad_norm": 0.016124923908498772, "learning_rate": 7.83111619074872e-06, "loss": 0.5217632055282593, "step": 545, "token_acc": 0.8415282989770215 }, { "epoch": 0.37873926992109597, "grad_norm": 0.015112404823630535, "learning_rate": 7.821125382607952e-06, "loss": 0.49577969312667847, "step": 546, "token_acc": 0.8473670109119251 }, { "epoch": 0.37943293158761815, "grad_norm": 0.016704553707368532, "learning_rate": 7.811118022791457e-06, "loss": 0.5073549151420593, "step": 547, "token_acc": 0.8434277960187552 }, { "epoch": 0.3801265932541403, "grad_norm": 0.01489509368863791, "learning_rate": 7.801094170012881e-06, "loss": 0.5067884922027588, "step": 548, "token_acc": 0.8441761628993087 }, { "epoch": 0.38082025492066246, "grad_norm": 0.01577337611694308, "learning_rate": 7.791053883082646e-06, "loss": 0.5137611627578735, "step": 549, "token_acc": 0.84219469282495 }, { "epoch": 0.3815139165871846, "grad_norm": 0.01575687423458691, "learning_rate": 7.780997220907584e-06, "loss": 0.5321401357650757, "step": 550, "token_acc": 0.8381193971037336 }, { "epoch": 0.38220757825370677, "grad_norm": 0.01496396244346265, "learning_rate": 7.770924242490613e-06, "loss": 0.5236809253692627, "step": 551, "token_acc": 0.8398203967281187 }, { "epoch": 0.3829012399202289, "grad_norm": 0.014360744449394731, "learning_rate": 7.760835006930365e-06, "loss": 0.49986565113067627, "step": 552, "token_acc": 0.8469931371408631 }, { "epoch": 0.3835949015867511, "grad_norm": 0.016088836435728376, "learning_rate": 7.75072957342087e-06, "loss": 0.505104660987854, "step": 553, "token_acc": 0.8448271450139461 }, { "epoch": 0.3842885632532732, "grad_norm": 0.015532584111386492, "learning_rate": 7.740608001251175e-06, "loss": 0.5222662687301636, "step": 554, "token_acc": 0.8396502192429817 }, { "epoch": 0.3849822249197954, "grad_norm": 0.014715363885695935, "learning_rate": 7.730470349805027e-06, "loss": 0.5129363536834717, "step": 555, "token_acc": 0.844133225175359 }, { "epoch": 0.3856758865863175, "grad_norm": 0.01574371748404612, "learning_rate": 7.720316678560501e-06, "loss": 0.5008957982063293, "step": 556, "token_acc": 0.8474052948105896 }, { "epoch": 0.3863695482528397, "grad_norm": 0.015123261299390416, "learning_rate": 7.710147047089671e-06, "loss": 0.4960119128227234, "step": 557, "token_acc": 0.8487981234281292 }, { "epoch": 0.38706320991936183, "grad_norm": 0.016687588666084573, "learning_rate": 7.699961515058239e-06, "loss": 0.5221847295761108, "step": 558, "token_acc": 0.840855284471802 }, { "epoch": 0.387756871585884, "grad_norm": 0.015248504084549152, "learning_rate": 7.689760142225205e-06, "loss": 0.5074684023857117, "step": 559, "token_acc": 0.844795719844358 }, { "epoch": 0.38845053325240614, "grad_norm": 0.015144467699204878, "learning_rate": 7.679542988442504e-06, "loss": 0.5165252089500427, "step": 560, "token_acc": 0.8413378966239171 }, { "epoch": 0.38914419491892827, "grad_norm": 0.015347878903393824, "learning_rate": 7.66931011365466e-06, "loss": 0.5050373077392578, "step": 561, "token_acc": 0.8466537791007958 }, { "epoch": 0.38983785658545045, "grad_norm": 0.015392075645116247, "learning_rate": 7.659061577898429e-06, "loss": 0.5137825012207031, "step": 562, "token_acc": 0.8416443141413511 }, { "epoch": 0.3905315182519726, "grad_norm": 0.015212651404956236, "learning_rate": 7.648797441302458e-06, "loss": 0.5008153319358826, "step": 563, "token_acc": 0.8476000143604226 }, { "epoch": 0.39122517991849476, "grad_norm": 0.014430418884031099, "learning_rate": 7.638517764086919e-06, "loss": 0.4883427619934082, "step": 564, "token_acc": 0.8491169710200168 }, { "epoch": 0.3919188415850169, "grad_norm": 0.06265184932182767, "learning_rate": 7.628222606563165e-06, "loss": 0.5409214496612549, "step": 565, "token_acc": 0.833978222586006 }, { "epoch": 0.39261250325153907, "grad_norm": 0.015171982482754493, "learning_rate": 7.617912029133368e-06, "loss": 0.5289785861968994, "step": 566, "token_acc": 0.837873754152824 }, { "epoch": 0.3933061649180612, "grad_norm": 0.016811068612674653, "learning_rate": 7.607586092290174e-06, "loss": 0.5144176483154297, "step": 567, "token_acc": 0.8435974814649231 }, { "epoch": 0.3939998265845834, "grad_norm": 0.01550754777718078, "learning_rate": 7.59724485661634e-06, "loss": 0.5146657228469849, "step": 568, "token_acc": 0.8436567119082438 }, { "epoch": 0.3946934882511055, "grad_norm": 0.015220754882641463, "learning_rate": 7.586888382784388e-06, "loss": 0.5221496820449829, "step": 569, "token_acc": 0.8411169047079271 }, { "epoch": 0.3953871499176277, "grad_norm": 0.015935416449661493, "learning_rate": 7.576516731556231e-06, "loss": 0.522467851638794, "step": 570, "token_acc": 0.8390240770465489 }, { "epoch": 0.3960808115841498, "grad_norm": 0.016270870838372926, "learning_rate": 7.566129963782845e-06, "loss": 0.4988764524459839, "step": 571, "token_acc": 0.8474220400049696 }, { "epoch": 0.396774473250672, "grad_norm": 0.01587640754621707, "learning_rate": 7.555728140403879e-06, "loss": 0.5069674253463745, "step": 572, "token_acc": 0.8470396315132904 }, { "epoch": 0.3974681349171941, "grad_norm": 0.01590833975205312, "learning_rate": 7.545311322447327e-06, "loss": 0.5282710790634155, "step": 573, "token_acc": 0.8386743227466584 }, { "epoch": 0.3981617965837163, "grad_norm": 0.01490192427843546, "learning_rate": 7.53487957102915e-06, "loss": 0.49151182174682617, "step": 574, "token_acc": 0.8498991241880371 }, { "epoch": 0.39885545825023844, "grad_norm": 0.015113868595483637, "learning_rate": 7.524432947352922e-06, "loss": 0.5144431591033936, "step": 575, "token_acc": 0.8426929904524301 }, { "epoch": 0.3995491199167606, "grad_norm": 0.014985935532956358, "learning_rate": 7.513971512709484e-06, "loss": 0.5148171186447144, "step": 576, "token_acc": 0.8410379280959379 }, { "epoch": 0.40024278158328275, "grad_norm": 0.016087381632468086, "learning_rate": 7.503495328476559e-06, "loss": 0.502284586429596, "step": 577, "token_acc": 0.8461963882618511 }, { "epoch": 0.40093644324980493, "grad_norm": 0.015371634006335573, "learning_rate": 7.4930044561184204e-06, "loss": 0.5128270983695984, "step": 578, "token_acc": 0.8438990577786132 }, { "epoch": 0.40163010491632706, "grad_norm": 0.01647387368015411, "learning_rate": 7.482498957185508e-06, "loss": 0.5118023753166199, "step": 579, "token_acc": 0.842189382888311 }, { "epoch": 0.40232376658284924, "grad_norm": 0.014601633990668801, "learning_rate": 7.47197889331408e-06, "loss": 0.508476197719574, "step": 580, "token_acc": 0.845393900504861 }, { "epoch": 0.40301742824937137, "grad_norm": 0.015942022641233997, "learning_rate": 7.46144432622585e-06, "loss": 0.4980703890323639, "step": 581, "token_acc": 0.8476588398653974 }, { "epoch": 0.40371108991589355, "grad_norm": 0.021344423563229277, "learning_rate": 7.4508953177276165e-06, "loss": 0.5112653970718384, "step": 582, "token_acc": 0.8438414825926015 }, { "epoch": 0.4044047515824157, "grad_norm": 0.015201252953788914, "learning_rate": 7.440331929710912e-06, "loss": 0.5032830238342285, "step": 583, "token_acc": 0.8472929199444702 }, { "epoch": 0.4050984132489378, "grad_norm": 0.01632738587842647, "learning_rate": 7.429754224151634e-06, "loss": 0.524311900138855, "step": 584, "token_acc": 0.838997419830446 }, { "epoch": 0.40579207491546, "grad_norm": 0.015795051262302733, "learning_rate": 7.4191622631096764e-06, "loss": 0.5278071165084839, "step": 585, "token_acc": 0.8384806442710555 }, { "epoch": 0.4064857365819821, "grad_norm": 0.014896175713154214, "learning_rate": 7.408556108728577e-06, "loss": 0.5048895478248596, "step": 586, "token_acc": 0.8466933378869563 }, { "epoch": 0.4071793982485043, "grad_norm": 0.016172611616148445, "learning_rate": 7.397935823235145e-06, "loss": 0.4993014931678772, "step": 587, "token_acc": 0.8491087482519035 }, { "epoch": 0.4078730599150264, "grad_norm": 0.015684765945550204, "learning_rate": 7.387301468939092e-06, "loss": 0.5176827311515808, "step": 588, "token_acc": 0.8422648901896456 }, { "epoch": 0.4085667215815486, "grad_norm": 0.014579124390475283, "learning_rate": 7.3766531082326806e-06, "loss": 0.5150560140609741, "step": 589, "token_acc": 0.8425293458384947 }, { "epoch": 0.40926038324807074, "grad_norm": 0.016369392934038596, "learning_rate": 7.365990803590345e-06, "loss": 0.5151157379150391, "step": 590, "token_acc": 0.8421297202113089 }, { "epoch": 0.4099540449145929, "grad_norm": 0.014595351743842383, "learning_rate": 7.355314617568329e-06, "loss": 0.49290746450424194, "step": 591, "token_acc": 0.8494864366605215 }, { "epoch": 0.41064770658111505, "grad_norm": 0.01759244576862526, "learning_rate": 7.344624612804319e-06, "loss": 0.5050020217895508, "step": 592, "token_acc": 0.8464161787677522 }, { "epoch": 0.41134136824763723, "grad_norm": 0.01592971568442015, "learning_rate": 7.333920852017078e-06, "loss": 0.5158231258392334, "step": 593, "token_acc": 0.8433704957196895 }, { "epoch": 0.41203502991415936, "grad_norm": 0.014914574371074628, "learning_rate": 7.323203398006076e-06, "loss": 0.4748554229736328, "step": 594, "token_acc": 0.8528600420469321 }, { "epoch": 0.41272869158068154, "grad_norm": 0.015095061874448746, "learning_rate": 7.31247231365112e-06, "loss": 0.49743223190307617, "step": 595, "token_acc": 0.84577738132549 }, { "epoch": 0.41342235324720367, "grad_norm": 0.01585386478346133, "learning_rate": 7.301727661911988e-06, "loss": 0.5447564721107483, "step": 596, "token_acc": 0.8358414010939627 }, { "epoch": 0.41411601491372585, "grad_norm": 0.015529904568464214, "learning_rate": 7.290969505828062e-06, "loss": 0.4849455952644348, "step": 597, "token_acc": 0.850695918848785 }, { "epoch": 0.414809676580248, "grad_norm": 0.015729581129334, "learning_rate": 7.280197908517948e-06, "loss": 0.5125923156738281, "step": 598, "token_acc": 0.8434386635805575 }, { "epoch": 0.41550333824677016, "grad_norm": 0.015942302703669354, "learning_rate": 7.269412933179117e-06, "loss": 0.5028844475746155, "step": 599, "token_acc": 0.8454600423259095 }, { "epoch": 0.4161969999132923, "grad_norm": 0.014654830476192468, "learning_rate": 7.258614643087528e-06, "loss": 0.5083805322647095, "step": 600, "token_acc": 0.8442212262847393 }, { "epoch": 0.41689066157981447, "grad_norm": 0.01411652077589708, "learning_rate": 7.247803101597259e-06, "loss": 0.4753497838973999, "step": 601, "token_acc": 0.8523341728636739 }, { "epoch": 0.4175843232463366, "grad_norm": 0.015351771824825512, "learning_rate": 7.236978372140131e-06, "loss": 0.48558640480041504, "step": 602, "token_acc": 0.8520386426704434 }, { "epoch": 0.4182779849128588, "grad_norm": 0.014245821419926422, "learning_rate": 7.226140518225349e-06, "loss": 0.4876019358634949, "step": 603, "token_acc": 0.8497593872469659 }, { "epoch": 0.4189716465793809, "grad_norm": 0.015274096664000791, "learning_rate": 7.215289603439108e-06, "loss": 0.5049581527709961, "step": 604, "token_acc": 0.8430962882715571 }, { "epoch": 0.41966530824590303, "grad_norm": 0.014643554985779223, "learning_rate": 7.204425691444237e-06, "loss": 0.499723345041275, "step": 605, "token_acc": 0.8474803507951931 }, { "epoch": 0.4203589699124252, "grad_norm": 0.014350208805118161, "learning_rate": 7.193548845979822e-06, "loss": 0.48519137501716614, "step": 606, "token_acc": 0.8508365248941746 }, { "epoch": 0.42105263157894735, "grad_norm": 0.015354276667815804, "learning_rate": 7.182659130860828e-06, "loss": 0.49860090017318726, "step": 607, "token_acc": 0.8465155578413724 }, { "epoch": 0.42174629324546953, "grad_norm": 0.015382527156851504, "learning_rate": 7.171756609977728e-06, "loss": 0.49276769161224365, "step": 608, "token_acc": 0.8499460068547819 }, { "epoch": 0.42243995491199166, "grad_norm": 0.014432865737421802, "learning_rate": 7.160841347296127e-06, "loss": 0.4919179081916809, "step": 609, "token_acc": 0.8486722744515917 }, { "epoch": 0.42313361657851384, "grad_norm": 0.01568894212484043, "learning_rate": 7.149913406856385e-06, "loss": 0.4855741262435913, "step": 610, "token_acc": 0.8507316275647838 }, { "epoch": 0.42382727824503597, "grad_norm": 0.0148265389502243, "learning_rate": 7.138972852773243e-06, "loss": 0.49698615074157715, "step": 611, "token_acc": 0.8475810268046742 }, { "epoch": 0.42452093991155815, "grad_norm": 0.01587437605767841, "learning_rate": 7.128019749235451e-06, "loss": 0.5152075290679932, "step": 612, "token_acc": 0.8421046407114812 }, { "epoch": 0.4252146015780803, "grad_norm": 0.016772791374097245, "learning_rate": 7.117054160505381e-06, "loss": 0.5322698950767517, "step": 613, "token_acc": 0.8373853016913089 }, { "epoch": 0.42590826324460246, "grad_norm": 0.015231069680902183, "learning_rate": 7.106076150918661e-06, "loss": 0.4948210120201111, "step": 614, "token_acc": 0.8471771262418222 }, { "epoch": 0.4266019249111246, "grad_norm": 0.016879307258017784, "learning_rate": 7.095085784883793e-06, "loss": 0.5225117206573486, "step": 615, "token_acc": 0.8408453242039692 }, { "epoch": 0.42729558657764677, "grad_norm": 0.0161866199361681, "learning_rate": 7.08408312688177e-06, "loss": 0.5055728554725647, "step": 616, "token_acc": 0.8436002039775624 }, { "epoch": 0.4279892482441689, "grad_norm": 0.01520470496736027, "learning_rate": 7.0730682414657084e-06, "loss": 0.4887295663356781, "step": 617, "token_acc": 0.8500861643026717 }, { "epoch": 0.4286829099106911, "grad_norm": 0.015083082386209392, "learning_rate": 7.062041193260459e-06, "loss": 0.5187944173812866, "step": 618, "token_acc": 0.8407549171940728 }, { "epoch": 0.4293765715772132, "grad_norm": 0.01962446864648047, "learning_rate": 7.0510020469622344e-06, "loss": 0.47335535287857056, "step": 619, "token_acc": 0.8544254566882734 }, { "epoch": 0.4300702332437354, "grad_norm": 0.014793829921417069, "learning_rate": 7.0399508673382255e-06, "loss": 0.5001312494277954, "step": 620, "token_acc": 0.8463477885030976 }, { "epoch": 0.4307638949102575, "grad_norm": 0.016647106807648592, "learning_rate": 7.028887719226226e-06, "loss": 0.5153214335441589, "step": 621, "token_acc": 0.8426855335691696 }, { "epoch": 0.4314575565767797, "grad_norm": 0.015095763122291259, "learning_rate": 7.0178126675342485e-06, "loss": 0.5037901401519775, "step": 622, "token_acc": 0.8446621784082712 }, { "epoch": 0.4321512182433018, "grad_norm": 0.014597692387143814, "learning_rate": 7.006725777240143e-06, "loss": 0.49295979738235474, "step": 623, "token_acc": 0.8468579136257437 }, { "epoch": 0.432844879909824, "grad_norm": 0.014301141973326655, "learning_rate": 6.9956271133912165e-06, "loss": 0.4909595251083374, "step": 624, "token_acc": 0.8503226373819652 }, { "epoch": 0.43353854157634614, "grad_norm": 0.015492031208285639, "learning_rate": 6.9845167411038575e-06, "loss": 0.5187467336654663, "step": 625, "token_acc": 0.8398680881594018 }, { "epoch": 0.43423220324286826, "grad_norm": 0.01416069429779636, "learning_rate": 6.973394725563141e-06, "loss": 0.46628591418266296, "step": 626, "token_acc": 0.856103310443784 }, { "epoch": 0.43492586490939045, "grad_norm": 0.015443798175496846, "learning_rate": 6.962261132022455e-06, "loss": 0.5220483541488647, "step": 627, "token_acc": 0.8404459259801946 }, { "epoch": 0.4356195265759126, "grad_norm": 0.015582231633810876, "learning_rate": 6.951116025803123e-06, "loss": 0.5344078540802002, "step": 628, "token_acc": 0.8365674015709497 }, { "epoch": 0.43631318824243476, "grad_norm": 0.015026213017322683, "learning_rate": 6.939959472294008e-06, "loss": 0.49049192667007446, "step": 629, "token_acc": 0.8503702846563296 }, { "epoch": 0.4370068499089569, "grad_norm": 0.015780788809308926, "learning_rate": 6.9287915369511336e-06, "loss": 0.4976133704185486, "step": 630, "token_acc": 0.8456091668062101 }, { "epoch": 0.43770051157547907, "grad_norm": 0.016724049297545094, "learning_rate": 6.917612285297306e-06, "loss": 0.520248293876648, "step": 631, "token_acc": 0.8414153508443712 }, { "epoch": 0.4383941732420012, "grad_norm": 0.015678839149555934, "learning_rate": 6.9064217829217185e-06, "loss": 0.5187111496925354, "step": 632, "token_acc": 0.8410435718128025 }, { "epoch": 0.4390878349085234, "grad_norm": 0.01626909309701958, "learning_rate": 6.895220095479579e-06, "loss": 0.5246286988258362, "step": 633, "token_acc": 0.8393796137122055 }, { "epoch": 0.4397814965750455, "grad_norm": 0.01487563377104816, "learning_rate": 6.8840072886917174e-06, "loss": 0.5001422166824341, "step": 634, "token_acc": 0.8448846088373337 }, { "epoch": 0.4404751582415677, "grad_norm": 0.015146918286453844, "learning_rate": 6.872783428344199e-06, "loss": 0.51044762134552, "step": 635, "token_acc": 0.8446748304027086 }, { "epoch": 0.4411688199080898, "grad_norm": 0.01581206830208639, "learning_rate": 6.861548580287942e-06, "loss": 0.4889146089553833, "step": 636, "token_acc": 0.8484430259639437 }, { "epoch": 0.441862481574612, "grad_norm": 0.016700033759981088, "learning_rate": 6.850302810438329e-06, "loss": 0.5134849548339844, "step": 637, "token_acc": 0.8426547259710131 }, { "epoch": 0.4425561432411341, "grad_norm": 0.01767181355352533, "learning_rate": 6.839046184774823e-06, "loss": 0.4934767484664917, "step": 638, "token_acc": 0.8495335979931319 }, { "epoch": 0.4432498049076563, "grad_norm": 0.014665981635773057, "learning_rate": 6.82777876934058e-06, "loss": 0.5013669729232788, "step": 639, "token_acc": 0.8462632846129526 }, { "epoch": 0.44394346657417844, "grad_norm": 0.014690604976370833, "learning_rate": 6.816500630242053e-06, "loss": 0.4832534193992615, "step": 640, "token_acc": 0.8513090866011136 }, { "epoch": 0.4446371282407006, "grad_norm": 0.01474110948801533, "learning_rate": 6.80521183364862e-06, "loss": 0.5055937767028809, "step": 641, "token_acc": 0.8444152587905199 }, { "epoch": 0.44533078990722275, "grad_norm": 0.0158865661426465, "learning_rate": 6.7939124457921855e-06, "loss": 0.4801962077617645, "step": 642, "token_acc": 0.8532410170705503 }, { "epoch": 0.44602445157374493, "grad_norm": 0.01675112392447675, "learning_rate": 6.782602532966787e-06, "loss": 0.4969950318336487, "step": 643, "token_acc": 0.8460468952515612 }, { "epoch": 0.44671811324026706, "grad_norm": 0.015325349774594996, "learning_rate": 6.771282161528217e-06, "loss": 0.5147734880447388, "step": 644, "token_acc": 0.8427586737047097 }, { "epoch": 0.44741177490678924, "grad_norm": 0.015516255956213578, "learning_rate": 6.75995139789363e-06, "loss": 0.5213358402252197, "step": 645, "token_acc": 0.8405693171597346 }, { "epoch": 0.44810543657331137, "grad_norm": 0.0154154286852692, "learning_rate": 6.748610308541151e-06, "loss": 0.4938429892063141, "step": 646, "token_acc": 0.8476492903632419 }, { "epoch": 0.4487990982398335, "grad_norm": 0.015481032148350338, "learning_rate": 6.737258960009488e-06, "loss": 0.5092982053756714, "step": 647, "token_acc": 0.8429945189390232 }, { "epoch": 0.4494927599063557, "grad_norm": 0.014660619171545585, "learning_rate": 6.725897418897535e-06, "loss": 0.49372732639312744, "step": 648, "token_acc": 0.8490638760088298 }, { "epoch": 0.4501864215728778, "grad_norm": 0.014668154001201241, "learning_rate": 6.714525751863992e-06, "loss": 0.5152393579483032, "step": 649, "token_acc": 0.8430133326737081 }, { "epoch": 0.4508800832394, "grad_norm": 0.015256538373710837, "learning_rate": 6.703144025626963e-06, "loss": 0.5141582489013672, "step": 650, "token_acc": 0.8415374573578492 }, { "epoch": 0.4515737449059221, "grad_norm": 0.016471149364377314, "learning_rate": 6.691752306963577e-06, "loss": 0.5107378363609314, "step": 651, "token_acc": 0.843435840434729 }, { "epoch": 0.4522674065724443, "grad_norm": 0.015018460839740514, "learning_rate": 6.680350662709583e-06, "loss": 0.520153820514679, "step": 652, "token_acc": 0.8408050178935622 }, { "epoch": 0.4529610682389664, "grad_norm": 0.015240368734682498, "learning_rate": 6.668939159758966e-06, "loss": 0.5291426181793213, "step": 653, "token_acc": 0.8368269365057197 }, { "epoch": 0.4536547299054886, "grad_norm": 0.01496876799040513, "learning_rate": 6.657517865063552e-06, "loss": 0.4902517795562744, "step": 654, "token_acc": 0.8500736245593682 }, { "epoch": 0.45434839157201073, "grad_norm": 0.014622132116903228, "learning_rate": 6.646086845632615e-06, "loss": 0.5274654030799866, "step": 655, "token_acc": 0.8393647178830256 }, { "epoch": 0.4550420532385329, "grad_norm": 0.016017862564660673, "learning_rate": 6.6346461685324895e-06, "loss": 0.5209780335426331, "step": 656, "token_acc": 0.842681978587236 }, { "epoch": 0.45573571490505504, "grad_norm": 0.0148592501492658, "learning_rate": 6.623195900886162e-06, "loss": 0.5236986875534058, "step": 657, "token_acc": 0.8395814421354793 }, { "epoch": 0.4564293765715772, "grad_norm": 0.01486870302301284, "learning_rate": 6.611736109872897e-06, "loss": 0.5096068382263184, "step": 658, "token_acc": 0.8412554404462699 }, { "epoch": 0.45712303823809936, "grad_norm": 0.015476074440374132, "learning_rate": 6.600266862727826e-06, "loss": 0.500363290309906, "step": 659, "token_acc": 0.8464202244166269 }, { "epoch": 0.45781669990462154, "grad_norm": 0.017313671279678363, "learning_rate": 6.588788226741567e-06, "loss": 0.4940526485443115, "step": 660, "token_acc": 0.8468642381343915 }, { "epoch": 0.45851036157114367, "grad_norm": 0.015251556111081602, "learning_rate": 6.577300269259818e-06, "loss": 0.514190673828125, "step": 661, "token_acc": 0.8423183095453252 }, { "epoch": 0.45920402323766585, "grad_norm": 0.014811975341780907, "learning_rate": 6.565803057682965e-06, "loss": 0.48044323921203613, "step": 662, "token_acc": 0.8525970121640997 }, { "epoch": 0.459897684904188, "grad_norm": 0.01769403219866028, "learning_rate": 6.554296659465692e-06, "loss": 0.4968039393424988, "step": 663, "token_acc": 0.8482896544954849 }, { "epoch": 0.46059134657071016, "grad_norm": 0.015509960834765451, "learning_rate": 6.542781142116582e-06, "loss": 0.5000349879264832, "step": 664, "token_acc": 0.8474969636479515 }, { "epoch": 0.4612850082372323, "grad_norm": 0.017028683336886407, "learning_rate": 6.531256573197715e-06, "loss": 0.49343976378440857, "step": 665, "token_acc": 0.8467936091048369 }, { "epoch": 0.46197866990375447, "grad_norm": 0.015106833802020929, "learning_rate": 6.519723020324285e-06, "loss": 0.49529850482940674, "step": 666, "token_acc": 0.8498797583435979 }, { "epoch": 0.4626723315702766, "grad_norm": 0.015788452057611348, "learning_rate": 6.508180551164188e-06, "loss": 0.4929038882255554, "step": 667, "token_acc": 0.8492230693126928 }, { "epoch": 0.4633659932367988, "grad_norm": 0.014672434376472058, "learning_rate": 6.496629233437637e-06, "loss": 0.4966810643672943, "step": 668, "token_acc": 0.8478131802889373 }, { "epoch": 0.4640596549033209, "grad_norm": 0.015565407144482568, "learning_rate": 6.485069134916756e-06, "loss": 0.4902818500995636, "step": 669, "token_acc": 0.8485557388756187 }, { "epoch": 0.46475331656984303, "grad_norm": 0.014757964624491594, "learning_rate": 6.473500323425191e-06, "loss": 0.5027964115142822, "step": 670, "token_acc": 0.8450289527922269 }, { "epoch": 0.4654469782363652, "grad_norm": 0.015565276840284444, "learning_rate": 6.461922866837701e-06, "loss": 0.5300242900848389, "step": 671, "token_acc": 0.837773440470457 }, { "epoch": 0.46614063990288734, "grad_norm": 0.0159156632334003, "learning_rate": 6.4503368330797736e-06, "loss": 0.5081501007080078, "step": 672, "token_acc": 0.8440878542904451 }, { "epoch": 0.4668343015694095, "grad_norm": 0.015126319434380217, "learning_rate": 6.438742290127215e-06, "loss": 0.4911293685436249, "step": 673, "token_acc": 0.8482481177107426 }, { "epoch": 0.46752796323593165, "grad_norm": 0.016768907829922946, "learning_rate": 6.427139306005754e-06, "loss": 0.5147792100906372, "step": 674, "token_acc": 0.8420371906876861 }, { "epoch": 0.46822162490245384, "grad_norm": 0.01703267761225986, "learning_rate": 6.415527948790649e-06, "loss": 0.5052201747894287, "step": 675, "token_acc": 0.8457657787204468 }, { "epoch": 0.46891528656897596, "grad_norm": 0.01508332302007471, "learning_rate": 6.403908286606278e-06, "loss": 0.5011932849884033, "step": 676, "token_acc": 0.8470649313017652 }, { "epoch": 0.46960894823549815, "grad_norm": 0.01442533636646134, "learning_rate": 6.392280387625748e-06, "loss": 0.49350035190582275, "step": 677, "token_acc": 0.8478157813648107 }, { "epoch": 0.4703026099020203, "grad_norm": 0.014782337966230515, "learning_rate": 6.3806443200704894e-06, "loss": 0.5067170262336731, "step": 678, "token_acc": 0.8436904732454296 }, { "epoch": 0.47099627156854246, "grad_norm": 0.014417788256689944, "learning_rate": 6.3690001522098635e-06, "loss": 0.49942344427108765, "step": 679, "token_acc": 0.8464717478564473 }, { "epoch": 0.4716899332350646, "grad_norm": 0.014628521210758456, "learning_rate": 6.357347952360748e-06, "loss": 0.47368913888931274, "step": 680, "token_acc": 0.8534503864852576 }, { "epoch": 0.47238359490158677, "grad_norm": 0.015259212364710624, "learning_rate": 6.345687788887154e-06, "loss": 0.487079918384552, "step": 681, "token_acc": 0.849131862975129 }, { "epoch": 0.4730772565681089, "grad_norm": 0.014751128257365558, "learning_rate": 6.3340197301998075e-06, "loss": 0.4886798858642578, "step": 682, "token_acc": 0.8496417469785529 }, { "epoch": 0.4737709182346311, "grad_norm": 0.015302114971936212, "learning_rate": 6.322343844755761e-06, "loss": 0.4939102232456207, "step": 683, "token_acc": 0.8474902759770327 }, { "epoch": 0.4744645799011532, "grad_norm": 0.017090770061684125, "learning_rate": 6.310660201057981e-06, "loss": 0.4869672656059265, "step": 684, "token_acc": 0.851120694891575 }, { "epoch": 0.4751582415676754, "grad_norm": 0.014396738421783125, "learning_rate": 6.298968867654962e-06, "loss": 0.49502626061439514, "step": 685, "token_acc": 0.8476843111775811 }, { "epoch": 0.4758519032341975, "grad_norm": 0.01520175602512072, "learning_rate": 6.287269913140306e-06, "loss": 0.48431143164634705, "step": 686, "token_acc": 0.8499350633438703 }, { "epoch": 0.4765455649007197, "grad_norm": 0.015486138254521756, "learning_rate": 6.275563406152333e-06, "loss": 0.4980413615703583, "step": 687, "token_acc": 0.8467499735004181 }, { "epoch": 0.4772392265672418, "grad_norm": 0.015170084377432213, "learning_rate": 6.26384941537367e-06, "loss": 0.5188277959823608, "step": 688, "token_acc": 0.8398275120645994 }, { "epoch": 0.477932888233764, "grad_norm": 0.015514627076366206, "learning_rate": 6.252128009530853e-06, "loss": 0.4931778311729431, "step": 689, "token_acc": 0.848748495788207 }, { "epoch": 0.47862654990028614, "grad_norm": 0.01579167652203062, "learning_rate": 6.2403992573939234e-06, "loss": 0.49214962124824524, "step": 690, "token_acc": 0.8487206362907701 }, { "epoch": 0.47932021156680826, "grad_norm": 0.014102528715939523, "learning_rate": 6.228663227776026e-06, "loss": 0.4668927490711212, "step": 691, "token_acc": 0.8548228934969266 }, { "epoch": 0.48001387323333045, "grad_norm": 0.014547806834821368, "learning_rate": 6.216919989532999e-06, "loss": 0.5045500993728638, "step": 692, "token_acc": 0.84413365166701 }, { "epoch": 0.4807075348998526, "grad_norm": 0.015697235992642413, "learning_rate": 6.205169611562973e-06, "loss": 0.5013386011123657, "step": 693, "token_acc": 0.8465240577046326 }, { "epoch": 0.48140119656637476, "grad_norm": 0.015336711422335432, "learning_rate": 6.193412162805973e-06, "loss": 0.49659475684165955, "step": 694, "token_acc": 0.8484456415624743 }, { "epoch": 0.4820948582328969, "grad_norm": 0.016194087699506823, "learning_rate": 6.181647712243504e-06, "loss": 0.5185307264328003, "step": 695, "token_acc": 0.8409701201054585 }, { "epoch": 0.48278851989941907, "grad_norm": 0.015040003038327933, "learning_rate": 6.169876328898152e-06, "loss": 0.5118576884269714, "step": 696, "token_acc": 0.8427271157593548 }, { "epoch": 0.4834821815659412, "grad_norm": 0.017099499606268557, "learning_rate": 6.15809808183318e-06, "loss": 0.48285025358200073, "step": 697, "token_acc": 0.8510741882731846 }, { "epoch": 0.4841758432324634, "grad_norm": 0.01687564195562754, "learning_rate": 6.1463130401521184e-06, "loss": 0.5011248588562012, "step": 698, "token_acc": 0.8472699927265555 }, { "epoch": 0.4848695048989855, "grad_norm": 0.015639724723427395, "learning_rate": 6.134521272998365e-06, "loss": 0.5285558700561523, "step": 699, "token_acc": 0.8386686479951642 }, { "epoch": 0.4855631665655077, "grad_norm": 0.015754338421147752, "learning_rate": 6.122722849554771e-06, "loss": 0.5217918753623962, "step": 700, "token_acc": 0.8393565475805931 }, { "epoch": 0.4862568282320298, "grad_norm": 0.015081383336598477, "learning_rate": 6.110917839043245e-06, "loss": 0.5020524263381958, "step": 701, "token_acc": 0.8451674688755395 }, { "epoch": 0.486950489898552, "grad_norm": 0.014796060957395102, "learning_rate": 6.0991063107243385e-06, "loss": 0.5073859095573425, "step": 702, "token_acc": 0.8442136675619218 }, { "epoch": 0.4876441515650741, "grad_norm": 0.014401866757381181, "learning_rate": 6.087288333896847e-06, "loss": 0.49525192379951477, "step": 703, "token_acc": 0.8498308205502217 }, { "epoch": 0.4883378132315963, "grad_norm": 0.014650020239198876, "learning_rate": 6.0754639778974e-06, "loss": 0.5157489776611328, "step": 704, "token_acc": 0.8410396289442695 }, { "epoch": 0.48903147489811843, "grad_norm": 0.014569622358210743, "learning_rate": 6.063633312100051e-06, "loss": 0.4892430901527405, "step": 705, "token_acc": 0.8492494410731396 }, { "epoch": 0.4897251365646406, "grad_norm": 0.015280598783060238, "learning_rate": 6.051796405915873e-06, "loss": 0.5018570423126221, "step": 706, "token_acc": 0.845664591411133 }, { "epoch": 0.49041879823116274, "grad_norm": 0.015299095975907756, "learning_rate": 6.039953328792554e-06, "loss": 0.48665446043014526, "step": 707, "token_acc": 0.8519121236777868 }, { "epoch": 0.4911124598976849, "grad_norm": 0.018768464778860416, "learning_rate": 6.028104150213986e-06, "loss": 0.49814218282699585, "step": 708, "token_acc": 0.847243591260918 }, { "epoch": 0.49180612156420705, "grad_norm": 0.017829446393557356, "learning_rate": 6.016248939699859e-06, "loss": 0.4978293180465698, "step": 709, "token_acc": 0.8473909263914667 }, { "epoch": 0.49249978323072924, "grad_norm": 0.015423430084044034, "learning_rate": 6.004387766805253e-06, "loss": 0.5167397260665894, "step": 710, "token_acc": 0.8418712851992595 }, { "epoch": 0.49319344489725137, "grad_norm": 0.01675725226559549, "learning_rate": 5.992520701120228e-06, "loss": 0.4957720935344696, "step": 711, "token_acc": 0.8487051286597402 }, { "epoch": 0.4938871065637735, "grad_norm": 0.01475781911416343, "learning_rate": 5.98064781226942e-06, "loss": 0.4954817295074463, "step": 712, "token_acc": 0.8477892733093534 }, { "epoch": 0.4945807682302957, "grad_norm": 0.015047139303780573, "learning_rate": 5.968769169911626e-06, "loss": 0.5021920800209045, "step": 713, "token_acc": 0.8460122394148962 }, { "epoch": 0.4952744298968178, "grad_norm": 0.01492732325366602, "learning_rate": 5.956884843739404e-06, "loss": 0.5014461278915405, "step": 714, "token_acc": 0.8451870894030277 }, { "epoch": 0.49596809156334, "grad_norm": 0.01445339622488776, "learning_rate": 5.944994903478651e-06, "loss": 0.49499326944351196, "step": 715, "token_acc": 0.846825488608766 }, { "epoch": 0.4966617532298621, "grad_norm": 0.014358755038962567, "learning_rate": 5.933099418888212e-06, "loss": 0.4730246067047119, "step": 716, "token_acc": 0.8544866122834874 }, { "epoch": 0.4973554148963843, "grad_norm": 0.014751109611916483, "learning_rate": 5.921198459759456e-06, "loss": 0.5087065696716309, "step": 717, "token_acc": 0.8449281711900753 }, { "epoch": 0.4980490765629064, "grad_norm": 0.014476644192428669, "learning_rate": 5.909292095915873e-06, "loss": 0.48804765939712524, "step": 718, "token_acc": 0.8494984463059891 }, { "epoch": 0.4987427382294286, "grad_norm": 0.015738724075766524, "learning_rate": 5.897380397212657e-06, "loss": 0.5069707632064819, "step": 719, "token_acc": 0.8455204137569416 }, { "epoch": 0.49943639989595073, "grad_norm": 0.016830729747537392, "learning_rate": 5.88546343353631e-06, "loss": 0.512635350227356, "step": 720, "token_acc": 0.8423995451099318 }, { "epoch": 0.5001300615624729, "grad_norm": 0.01572921809498002, "learning_rate": 5.87354127480422e-06, "loss": 0.4841790199279785, "step": 721, "token_acc": 0.8504658870614721 }, { "epoch": 0.500823723228995, "grad_norm": 0.015418844111624029, "learning_rate": 5.861613990964252e-06, "loss": 0.48561617732048035, "step": 722, "token_acc": 0.8506494252213558 }, { "epoch": 0.5015173848955172, "grad_norm": 0.014514459009852335, "learning_rate": 5.849681651994347e-06, "loss": 0.48457562923431396, "step": 723, "token_acc": 0.8502209595959596 }, { "epoch": 0.5022110465620394, "grad_norm": 0.015190088220543812, "learning_rate": 5.837744327902097e-06, "loss": 0.4996800422668457, "step": 724, "token_acc": 0.8461376055638351 }, { "epoch": 0.5029047082285615, "grad_norm": 0.015325587904289825, "learning_rate": 5.825802088724348e-06, "loss": 0.4859788417816162, "step": 725, "token_acc": 0.8508919473057421 }, { "epoch": 0.5035983698950837, "grad_norm": 0.015607693519784175, "learning_rate": 5.81385500452678e-06, "loss": 0.501683235168457, "step": 726, "token_acc": 0.8460120481927711 }, { "epoch": 0.5042920315616058, "grad_norm": 0.0176826359733667, "learning_rate": 5.801903145403501e-06, "loss": 0.5105983018875122, "step": 727, "token_acc": 0.8441104723145004 }, { "epoch": 0.504985693228128, "grad_norm": 0.014534074928930127, "learning_rate": 5.789946581476631e-06, "loss": 0.49362805485725403, "step": 728, "token_acc": 0.8482781255128115 }, { "epoch": 0.5056793548946501, "grad_norm": 0.014898989063064288, "learning_rate": 5.777985382895893e-06, "loss": 0.5054072141647339, "step": 729, "token_acc": 0.8446229822363087 }, { "epoch": 0.5063730165611723, "grad_norm": 0.016856223517821126, "learning_rate": 5.766019619838208e-06, "loss": 0.4931100308895111, "step": 730, "token_acc": 0.8464648754865284 }, { "epoch": 0.5070666782276945, "grad_norm": 0.017095740969811927, "learning_rate": 5.754049362507269e-06, "loss": 0.49818065762519836, "step": 731, "token_acc": 0.8467558284369849 }, { "epoch": 0.5077603398942167, "grad_norm": 0.015298628979571184, "learning_rate": 5.7420746811331404e-06, "loss": 0.516757607460022, "step": 732, "token_acc": 0.8422221946406186 }, { "epoch": 0.5084540015607387, "grad_norm": 0.014602699694629448, "learning_rate": 5.730095645971845e-06, "loss": 0.487848162651062, "step": 733, "token_acc": 0.8495060586889873 }, { "epoch": 0.5091476632272609, "grad_norm": 0.015084054047211146, "learning_rate": 5.7181123273049444e-06, "loss": 0.5014725923538208, "step": 734, "token_acc": 0.8457305963804203 }, { "epoch": 0.5098413248937831, "grad_norm": 0.016184216148455194, "learning_rate": 5.706124795439139e-06, "loss": 0.4882371127605438, "step": 735, "token_acc": 0.8489412709768924 }, { "epoch": 0.5105349865603052, "grad_norm": 0.015161123102227721, "learning_rate": 5.69413312070584e-06, "loss": 0.48968952894210815, "step": 736, "token_acc": 0.8494366030126913 }, { "epoch": 0.5112286482268273, "grad_norm": 0.015336469710521319, "learning_rate": 5.68213737346077e-06, "loss": 0.5125553607940674, "step": 737, "token_acc": 0.8421072207895259 }, { "epoch": 0.5119223098933495, "grad_norm": 0.015497290614009802, "learning_rate": 5.670137624083544e-06, "loss": 0.5050754547119141, "step": 738, "token_acc": 0.8458207831325302 }, { "epoch": 0.5126159715598717, "grad_norm": 0.014452203659372685, "learning_rate": 5.658133942977259e-06, "loss": 0.4921966791152954, "step": 739, "token_acc": 0.8498231732901756 }, { "epoch": 0.5133096332263938, "grad_norm": 0.015523479326821611, "learning_rate": 5.646126400568076e-06, "loss": 0.5123386383056641, "step": 740, "token_acc": 0.8427349799977979 }, { "epoch": 0.514003294892916, "grad_norm": 0.015151568780163587, "learning_rate": 5.634115067304815e-06, "loss": 0.5149898529052734, "step": 741, "token_acc": 0.8404042675461609 }, { "epoch": 0.5146969565594381, "grad_norm": 0.015449767477646833, "learning_rate": 5.622100013658535e-06, "loss": 0.499658465385437, "step": 742, "token_acc": 0.8447573314610264 }, { "epoch": 0.5153906182259603, "grad_norm": 0.016256167263827247, "learning_rate": 5.610081310122121e-06, "loss": 0.4935149550437927, "step": 743, "token_acc": 0.8483036398230744 }, { "epoch": 0.5160842798924824, "grad_norm": 0.015604440920880275, "learning_rate": 5.598059027209876e-06, "loss": 0.5187499523162842, "step": 744, "token_acc": 0.842974301567215 }, { "epoch": 0.5167779415590046, "grad_norm": 0.015467557816253835, "learning_rate": 5.586033235457099e-06, "loss": 0.48286837339401245, "step": 745, "token_acc": 0.8527682596934175 }, { "epoch": 0.5174716032255268, "grad_norm": 0.016392082768837298, "learning_rate": 5.574004005419677e-06, "loss": 0.5204586982727051, "step": 746, "token_acc": 0.8419840237785622 }, { "epoch": 0.518165264892049, "grad_norm": 0.016985749646012228, "learning_rate": 5.561971407673673e-06, "loss": 0.48645615577697754, "step": 747, "token_acc": 0.8506366194584122 }, { "epoch": 0.518858926558571, "grad_norm": 0.014999663975402769, "learning_rate": 5.549935512814904e-06, "loss": 0.5076377987861633, "step": 748, "token_acc": 0.8436739116578494 }, { "epoch": 0.5195525882250932, "grad_norm": 0.01456614451874514, "learning_rate": 5.537896391458535e-06, "loss": 0.4954346716403961, "step": 749, "token_acc": 0.8474823448355056 }, { "epoch": 0.5202462498916154, "grad_norm": 0.015274297847170926, "learning_rate": 5.525854114238656e-06, "loss": 0.4871617555618286, "step": 750, "token_acc": 0.8515209352260528 }, { "epoch": 0.5209399115581376, "grad_norm": 0.014608599336235827, "learning_rate": 5.513808751807877e-06, "loss": 0.4884047508239746, "step": 751, "token_acc": 0.8507710754692341 }, { "epoch": 0.5216335732246596, "grad_norm": 0.014993125765896688, "learning_rate": 5.501760374836908e-06, "loss": 0.47843021154403687, "step": 752, "token_acc": 0.8526602672326468 }, { "epoch": 0.5223272348911818, "grad_norm": 0.014899851423767351, "learning_rate": 5.489709054014145e-06, "loss": 0.4828718602657318, "step": 753, "token_acc": 0.8510986241070221 }, { "epoch": 0.523020896557704, "grad_norm": 0.015656197782707975, "learning_rate": 5.477654860045256e-06, "loss": 0.4849317669868469, "step": 754, "token_acc": 0.8499406112920866 }, { "epoch": 0.5237145582242261, "grad_norm": 0.015608344798152731, "learning_rate": 5.465597863652765e-06, "loss": 0.5119458436965942, "step": 755, "token_acc": 0.8438615814764159 }, { "epoch": 0.5244082198907483, "grad_norm": 0.015406497694169762, "learning_rate": 5.45353813557564e-06, "loss": 0.4912222623825073, "step": 756, "token_acc": 0.8500976795158908 }, { "epoch": 0.5251018815572704, "grad_norm": 0.015002105587376437, "learning_rate": 5.441475746568875e-06, "loss": 0.49809640645980835, "step": 757, "token_acc": 0.8456534739257252 }, { "epoch": 0.5257955432237926, "grad_norm": 0.014596522835667954, "learning_rate": 5.429410767403074e-06, "loss": 0.48345646262168884, "step": 758, "token_acc": 0.8507677486113919 }, { "epoch": 0.5264892048903147, "grad_norm": 0.015897761117042143, "learning_rate": 5.4173432688640384e-06, "loss": 0.49711257219314575, "step": 759, "token_acc": 0.8468898350707189 }, { "epoch": 0.5271828665568369, "grad_norm": 0.019708396452012913, "learning_rate": 5.4052733217523514e-06, "loss": 0.4839705228805542, "step": 760, "token_acc": 0.8510253744743184 }, { "epoch": 0.5278765282233591, "grad_norm": 0.015329182020960607, "learning_rate": 5.393200996882966e-06, "loss": 0.47920316457748413, "step": 761, "token_acc": 0.8512017576686747 }, { "epoch": 0.5285701898898812, "grad_norm": 0.01485747424226582, "learning_rate": 5.3811263650847785e-06, "loss": 0.5058380365371704, "step": 762, "token_acc": 0.8455418099686874 }, { "epoch": 0.5292638515564033, "grad_norm": 0.014417229232445402, "learning_rate": 5.369049497200224e-06, "loss": 0.4932381212711334, "step": 763, "token_acc": 0.8483640733836947 }, { "epoch": 0.5299575132229255, "grad_norm": 0.015522250028641311, "learning_rate": 5.356970464084857e-06, "loss": 0.49768921732902527, "step": 764, "token_acc": 0.8457577545101509 }, { "epoch": 0.5306511748894477, "grad_norm": 0.04786147812630508, "learning_rate": 5.3448893366069335e-06, "loss": 0.4939868450164795, "step": 765, "token_acc": 0.8473806325558676 }, { "epoch": 0.5313448365559699, "grad_norm": 0.01599538471667582, "learning_rate": 5.332806185646998e-06, "loss": 0.48593980073928833, "step": 766, "token_acc": 0.850311377245509 }, { "epoch": 0.5320384982224919, "grad_norm": 0.020814212211196573, "learning_rate": 5.320721082097471e-06, "loss": 0.48359960317611694, "step": 767, "token_acc": 0.850473321858864 }, { "epoch": 0.5327321598890141, "grad_norm": 0.014799990124033428, "learning_rate": 5.30863409686222e-06, "loss": 0.5003825426101685, "step": 768, "token_acc": 0.8467842274010624 }, { "epoch": 0.5334258215555363, "grad_norm": 0.01601651073590232, "learning_rate": 5.296545300856161e-06, "loss": 0.520452618598938, "step": 769, "token_acc": 0.8401669314370729 }, { "epoch": 0.5341194832220585, "grad_norm": 0.0148698296505831, "learning_rate": 5.284454765004831e-06, "loss": 0.4938196539878845, "step": 770, "token_acc": 0.8494778453006077 }, { "epoch": 0.5348131448885806, "grad_norm": 0.015219638544862213, "learning_rate": 5.272362560243972e-06, "loss": 0.4728080630302429, "step": 771, "token_acc": 0.8540849261485668 }, { "epoch": 0.5355068065551027, "grad_norm": 0.016755018710186597, "learning_rate": 5.2602687575191206e-06, "loss": 0.49077826738357544, "step": 772, "token_acc": 0.8497015534052571 }, { "epoch": 0.5362004682216249, "grad_norm": 0.014906665135223658, "learning_rate": 5.248173427785188e-06, "loss": 0.4850311279296875, "step": 773, "token_acc": 0.8498493153935597 }, { "epoch": 0.5368941298881471, "grad_norm": 0.015428599105718713, "learning_rate": 5.2360766420060465e-06, "loss": 0.4952850937843323, "step": 774, "token_acc": 0.8471444920956321 }, { "epoch": 0.5375877915546692, "grad_norm": 0.01639061177896881, "learning_rate": 5.223978471154107e-06, "loss": 0.508001446723938, "step": 775, "token_acc": 0.8443404424361298 }, { "epoch": 0.5382814532211914, "grad_norm": 0.015316661333004513, "learning_rate": 5.211878986209908e-06, "loss": 0.5293304324150085, "step": 776, "token_acc": 0.8365824129666691 }, { "epoch": 0.5389751148877135, "grad_norm": 0.01980918675548803, "learning_rate": 5.199778258161701e-06, "loss": 0.47230517864227295, "step": 777, "token_acc": 0.853466796875 }, { "epoch": 0.5396687765542356, "grad_norm": 0.01846948329304602, "learning_rate": 5.187676358005026e-06, "loss": 0.48726534843444824, "step": 778, "token_acc": 0.849745290272972 }, { "epoch": 0.5403624382207578, "grad_norm": 0.01652940399165877, "learning_rate": 5.175573356742305e-06, "loss": 0.4713689982891083, "step": 779, "token_acc": 0.8547187708594381 }, { "epoch": 0.54105609988728, "grad_norm": 0.016205065608522064, "learning_rate": 5.163469325382415e-06, "loss": 0.4985469579696655, "step": 780, "token_acc": 0.8467826477635783 }, { "epoch": 0.5417497615538022, "grad_norm": 0.015215102385882457, "learning_rate": 5.1513643349402785e-06, "loss": 0.5066708922386169, "step": 781, "token_acc": 0.8462194112633257 }, { "epoch": 0.5424434232203242, "grad_norm": 0.01553342552720053, "learning_rate": 5.139258456436449e-06, "loss": 0.4966863989830017, "step": 782, "token_acc": 0.8488544065947388 }, { "epoch": 0.5431370848868464, "grad_norm": 0.02004742385696474, "learning_rate": 5.1271517608966845e-06, "loss": 0.5129349827766418, "step": 783, "token_acc": 0.8432920090336841 }, { "epoch": 0.5438307465533686, "grad_norm": 0.018825134227604356, "learning_rate": 5.1150443193515395e-06, "loss": 0.48599696159362793, "step": 784, "token_acc": 0.8502731294955137 }, { "epoch": 0.5445244082198908, "grad_norm": 0.014758622332613484, "learning_rate": 5.102936202835943e-06, "loss": 0.5147727727890015, "step": 785, "token_acc": 0.8429486407743613 }, { "epoch": 0.5452180698864129, "grad_norm": 0.014378638325061002, "learning_rate": 5.090827482388788e-06, "loss": 0.47670978307724, "step": 786, "token_acc": 0.8522540269786549 }, { "epoch": 0.545911731552935, "grad_norm": 0.015033161325905708, "learning_rate": 5.078718229052509e-06, "loss": 0.5182158350944519, "step": 787, "token_acc": 0.8411970556292989 }, { "epoch": 0.5466053932194572, "grad_norm": 0.014603019186588715, "learning_rate": 5.066608513872667e-06, "loss": 0.4859468936920166, "step": 788, "token_acc": 0.8500635928157948 }, { "epoch": 0.5472990548859794, "grad_norm": 0.015107486024869216, "learning_rate": 5.05449840789753e-06, "loss": 0.5039726495742798, "step": 789, "token_acc": 0.844683004861467 }, { "epoch": 0.5479927165525015, "grad_norm": 0.014852541136183013, "learning_rate": 5.0423879821776635e-06, "loss": 0.4983818531036377, "step": 790, "token_acc": 0.8456070618644688 }, { "epoch": 0.5486863782190237, "grad_norm": 0.018823998690198455, "learning_rate": 5.030277307765503e-06, "loss": 0.49894052743911743, "step": 791, "token_acc": 0.8470123928508585 }, { "epoch": 0.5493800398855458, "grad_norm": 0.01492589006091114, "learning_rate": 5.018166455714951e-06, "loss": 0.48149222135543823, "step": 792, "token_acc": 0.8510270244114871 }, { "epoch": 0.550073701552068, "grad_norm": 0.014640979849559418, "learning_rate": 5.006055497080948e-06, "loss": 0.49629834294319153, "step": 793, "token_acc": 0.8480954793115804 }, { "epoch": 0.5507673632185901, "grad_norm": 0.01471539259832078, "learning_rate": 4.993944502919054e-06, "loss": 0.5021896362304688, "step": 794, "token_acc": 0.8459827938174395 }, { "epoch": 0.5514610248851123, "grad_norm": 0.015203127274872084, "learning_rate": 4.981833544285049e-06, "loss": 0.49272620677948, "step": 795, "token_acc": 0.8480961273037265 }, { "epoch": 0.5521546865516345, "grad_norm": 0.01663695518047619, "learning_rate": 4.9697226922344975e-06, "loss": 0.5033783912658691, "step": 796, "token_acc": 0.8442296975530675 }, { "epoch": 0.5528483482181566, "grad_norm": 0.015235824443353078, "learning_rate": 4.957612017822338e-06, "loss": 0.5019034147262573, "step": 797, "token_acc": 0.8448267593813691 }, { "epoch": 0.5535420098846787, "grad_norm": 0.015532785602754447, "learning_rate": 4.945501592102472e-06, "loss": 0.5012336373329163, "step": 798, "token_acc": 0.8443447391775959 }, { "epoch": 0.5542356715512009, "grad_norm": 0.015063736968095612, "learning_rate": 4.933391486127334e-06, "loss": 0.4894632399082184, "step": 799, "token_acc": 0.8489451058610736 }, { "epoch": 0.5549293332177231, "grad_norm": 0.01566843439532972, "learning_rate": 4.921281770947491e-06, "loss": 0.49012261629104614, "step": 800, "token_acc": 0.849549668397635 }, { "epoch": 0.5556229948842452, "grad_norm": 0.014808401887603689, "learning_rate": 4.909172517611213e-06, "loss": 0.49737852811813354, "step": 801, "token_acc": 0.8467532467532467 }, { "epoch": 0.5563166565507673, "grad_norm": 0.01588924014981688, "learning_rate": 4.8970637971640575e-06, "loss": 0.4952949285507202, "step": 802, "token_acc": 0.8476766122226084 }, { "epoch": 0.5570103182172895, "grad_norm": 0.014100400358560706, "learning_rate": 4.884955680648463e-06, "loss": 0.48182910680770874, "step": 803, "token_acc": 0.8508358841535202 }, { "epoch": 0.5577039798838117, "grad_norm": 0.015541501121276169, "learning_rate": 4.872848239103318e-06, "loss": 0.4956710636615753, "step": 804, "token_acc": 0.846876373006543 }, { "epoch": 0.5583976415503338, "grad_norm": 0.01545709286077484, "learning_rate": 4.860741543563553e-06, "loss": 0.5071697235107422, "step": 805, "token_acc": 0.8439290415595813 }, { "epoch": 0.559091303216856, "grad_norm": 0.016507940886465868, "learning_rate": 4.848635665059722e-06, "loss": 0.5047606229782104, "step": 806, "token_acc": 0.8446972056460152 }, { "epoch": 0.5597849648833781, "grad_norm": 0.014969378886536096, "learning_rate": 4.836530674617586e-06, "loss": 0.49264928698539734, "step": 807, "token_acc": 0.8493419833178869 }, { "epoch": 0.5604786265499003, "grad_norm": 0.014779545130566807, "learning_rate": 4.8244266432576955e-06, "loss": 0.5037711262702942, "step": 808, "token_acc": 0.8468744677227048 }, { "epoch": 0.5611722882164224, "grad_norm": 0.014955019841238199, "learning_rate": 4.812323641994974e-06, "loss": 0.46963977813720703, "step": 809, "token_acc": 0.854982333842628 }, { "epoch": 0.5618659498829446, "grad_norm": 0.01622324579082327, "learning_rate": 4.8002217418383e-06, "loss": 0.4882949888706207, "step": 810, "token_acc": 0.8494755499441558 }, { "epoch": 0.5625596115494668, "grad_norm": 0.014589332530751637, "learning_rate": 4.788121013790093e-06, "loss": 0.501279890537262, "step": 811, "token_acc": 0.8451252663287048 }, { "epoch": 0.563253273215989, "grad_norm": 0.015302533461822403, "learning_rate": 4.776021528845894e-06, "loss": 0.49754539132118225, "step": 812, "token_acc": 0.8470234586820701 }, { "epoch": 0.563946934882511, "grad_norm": 0.015032745739559683, "learning_rate": 4.763923357993954e-06, "loss": 0.492852121591568, "step": 813, "token_acc": 0.846788990825688 }, { "epoch": 0.5646405965490332, "grad_norm": 0.015964930436819482, "learning_rate": 4.751826572214813e-06, "loss": 0.514976441860199, "step": 814, "token_acc": 0.8430399321577333 }, { "epoch": 0.5653342582155554, "grad_norm": 0.014269973808071067, "learning_rate": 4.739731242480881e-06, "loss": 0.46085265278816223, "step": 815, "token_acc": 0.857262482953322 }, { "epoch": 0.5660279198820776, "grad_norm": 0.01489896594146087, "learning_rate": 4.7276374397560305e-06, "loss": 0.4856981635093689, "step": 816, "token_acc": 0.849693660903852 }, { "epoch": 0.5667215815485996, "grad_norm": 0.014701380921232106, "learning_rate": 4.715545234995172e-06, "loss": 0.5005993843078613, "step": 817, "token_acc": 0.84463266010041 }, { "epoch": 0.5674152432151218, "grad_norm": 0.015831265439846223, "learning_rate": 4.7034546991438415e-06, "loss": 0.48617294430732727, "step": 818, "token_acc": 0.8499345243101025 }, { "epoch": 0.568108904881644, "grad_norm": 0.014399079984931194, "learning_rate": 4.69136590313778e-06, "loss": 0.46696561574935913, "step": 819, "token_acc": 0.8543810186371792 }, { "epoch": 0.5688025665481661, "grad_norm": 0.014074817201441006, "learning_rate": 4.679278917902531e-06, "loss": 0.47481101751327515, "step": 820, "token_acc": 0.8518518518518519 }, { "epoch": 0.5694962282146883, "grad_norm": 0.01581952567696786, "learning_rate": 4.667193814353002e-06, "loss": 0.475786030292511, "step": 821, "token_acc": 0.8526874744707946 }, { "epoch": 0.5701898898812104, "grad_norm": 0.01573419568901493, "learning_rate": 4.655110663393067e-06, "loss": 0.4977423846721649, "step": 822, "token_acc": 0.8455005331415677 }, { "epoch": 0.5708835515477326, "grad_norm": 0.015683384164355715, "learning_rate": 4.643029535915144e-06, "loss": 0.493772029876709, "step": 823, "token_acc": 0.8478704457261479 }, { "epoch": 0.5715772132142547, "grad_norm": 0.016397396687360163, "learning_rate": 4.630950502799777e-06, "loss": 0.5034977793693542, "step": 824, "token_acc": 0.8444587645148655 }, { "epoch": 0.5722708748807769, "grad_norm": 0.015540975707923442, "learning_rate": 4.618873634915222e-06, "loss": 0.47363126277923584, "step": 825, "token_acc": 0.8540614116839162 }, { "epoch": 0.5729645365472991, "grad_norm": 0.014966955830401224, "learning_rate": 4.606799003117036e-06, "loss": 0.47656041383743286, "step": 826, "token_acc": 0.8518159454186308 }, { "epoch": 0.5736581982138212, "grad_norm": 0.016321789640703212, "learning_rate": 4.594726678247649e-06, "loss": 0.5116324424743652, "step": 827, "token_acc": 0.8438762603003844 }, { "epoch": 0.5743518598803433, "grad_norm": 0.01645290753231071, "learning_rate": 4.582656731135964e-06, "loss": 0.5072988271713257, "step": 828, "token_acc": 0.8454046792790776 }, { "epoch": 0.5750455215468655, "grad_norm": 0.015477852914709258, "learning_rate": 4.570589232596929e-06, "loss": 0.5126480460166931, "step": 829, "token_acc": 0.8436128077877457 }, { "epoch": 0.5757391832133877, "grad_norm": 0.015455341258988438, "learning_rate": 4.558524253431128e-06, "loss": 0.4797298312187195, "step": 830, "token_acc": 0.8527792108073089 }, { "epoch": 0.5764328448799099, "grad_norm": 0.015522528747309155, "learning_rate": 4.546461864424363e-06, "loss": 0.4938841462135315, "step": 831, "token_acc": 0.8479320531757755 }, { "epoch": 0.5771265065464319, "grad_norm": 0.015631413527561047, "learning_rate": 4.5344021363472355e-06, "loss": 0.5060588717460632, "step": 832, "token_acc": 0.8442983591283193 }, { "epoch": 0.5778201682129541, "grad_norm": 0.014627862541459102, "learning_rate": 4.522345139954745e-06, "loss": 0.49909281730651855, "step": 833, "token_acc": 0.8448315351696704 }, { "epoch": 0.5785138298794763, "grad_norm": 0.015543583713417245, "learning_rate": 4.510290945985856e-06, "loss": 0.5254332423210144, "step": 834, "token_acc": 0.8391225991211179 }, { "epoch": 0.5792074915459985, "grad_norm": 0.01582672756811511, "learning_rate": 4.498239625163093e-06, "loss": 0.5005804300308228, "step": 835, "token_acc": 0.8476391774736691 }, { "epoch": 0.5799011532125206, "grad_norm": 0.016988636821180368, "learning_rate": 4.486191248192124e-06, "loss": 0.4965519309043884, "step": 836, "token_acc": 0.8451983730728951 }, { "epoch": 0.5805948148790427, "grad_norm": 0.016071105253592725, "learning_rate": 4.474145885761347e-06, "loss": 0.4855723977088928, "step": 837, "token_acc": 0.8495718979394916 }, { "epoch": 0.5812884765455649, "grad_norm": 0.014019414007532342, "learning_rate": 4.462103608541466e-06, "loss": 0.4619828164577484, "step": 838, "token_acc": 0.8555189190435565 }, { "epoch": 0.5819821382120871, "grad_norm": 0.015321193253709923, "learning_rate": 4.450064487185097e-06, "loss": 0.5093334913253784, "step": 839, "token_acc": 0.8441036738551849 }, { "epoch": 0.5826757998786092, "grad_norm": 0.01543360362339474, "learning_rate": 4.438028592326328e-06, "loss": 0.49215710163116455, "step": 840, "token_acc": 0.8483357628765792 }, { "epoch": 0.5833694615451314, "grad_norm": 0.014510854344065634, "learning_rate": 4.425995994580325e-06, "loss": 0.47662052512168884, "step": 841, "token_acc": 0.8531535924945427 }, { "epoch": 0.5840631232116535, "grad_norm": 0.01540971305008988, "learning_rate": 4.413966764542904e-06, "loss": 0.49259769916534424, "step": 842, "token_acc": 0.846911835793701 }, { "epoch": 0.5847567848781756, "grad_norm": 0.015134085712999791, "learning_rate": 4.401940972790127e-06, "loss": 0.48211029171943665, "step": 843, "token_acc": 0.8517339811721835 }, { "epoch": 0.5854504465446978, "grad_norm": 0.014995733345538085, "learning_rate": 4.389918689877879e-06, "loss": 0.4943150281906128, "step": 844, "token_acc": 0.8484321639242471 }, { "epoch": 0.58614410821122, "grad_norm": 0.015060702228327732, "learning_rate": 4.377899986341466e-06, "loss": 0.4812565743923187, "step": 845, "token_acc": 0.8523548175250961 }, { "epoch": 0.5868377698777422, "grad_norm": 0.015162259263708712, "learning_rate": 4.365884932695186e-06, "loss": 0.5004775524139404, "step": 846, "token_acc": 0.846257305651334 }, { "epoch": 0.5875314315442642, "grad_norm": 0.015140529437377134, "learning_rate": 4.3538735994319245e-06, "loss": 0.4732825756072998, "step": 847, "token_acc": 0.8531692997947592 }, { "epoch": 0.5882250932107864, "grad_norm": 0.015129833682381348, "learning_rate": 4.341866057022742e-06, "loss": 0.5001990795135498, "step": 848, "token_acc": 0.8459410689270366 }, { "epoch": 0.5889187548773086, "grad_norm": 0.016261335581005225, "learning_rate": 4.329862375916457e-06, "loss": 0.5035703182220459, "step": 849, "token_acc": 0.8456403988112358 }, { "epoch": 0.5896124165438308, "grad_norm": 0.014530678019048403, "learning_rate": 4.317862626539232e-06, "loss": 0.5021465420722961, "step": 850, "token_acc": 0.8466696622999551 }, { "epoch": 0.5903060782103529, "grad_norm": 0.015577355427710089, "learning_rate": 4.305866879294161e-06, "loss": 0.484871506690979, "step": 851, "token_acc": 0.8493975903614458 }, { "epoch": 0.590999739876875, "grad_norm": 0.015021081824838687, "learning_rate": 4.293875204560863e-06, "loss": 0.4966820180416107, "step": 852, "token_acc": 0.8481653771877619 }, { "epoch": 0.5916934015433972, "grad_norm": 0.01482486536153248, "learning_rate": 4.281887672695056e-06, "loss": 0.49290555715560913, "step": 853, "token_acc": 0.8491981630197205 }, { "epoch": 0.5923870632099194, "grad_norm": 0.015877469244885596, "learning_rate": 4.269904354028158e-06, "loss": 0.477644681930542, "step": 854, "token_acc": 0.8523274478330658 }, { "epoch": 0.5930807248764415, "grad_norm": 0.015094056960714319, "learning_rate": 4.257925318866861e-06, "loss": 0.48601630330085754, "step": 855, "token_acc": 0.8490465642903988 }, { "epoch": 0.5937743865429637, "grad_norm": 0.01521931583670503, "learning_rate": 4.2459506374927335e-06, "loss": 0.47192510962486267, "step": 856, "token_acc": 0.8530481541900792 }, { "epoch": 0.5944680482094858, "grad_norm": 0.015137121625348905, "learning_rate": 4.233980380161793e-06, "loss": 0.48421090841293335, "step": 857, "token_acc": 0.8494075974105288 }, { "epoch": 0.595161709876008, "grad_norm": 0.01553436733719721, "learning_rate": 4.2220146171041075e-06, "loss": 0.4918314814567566, "step": 858, "token_acc": 0.8471659538174019 }, { "epoch": 0.5958553715425301, "grad_norm": 0.015229279767625068, "learning_rate": 4.2100534185233714e-06, "loss": 0.5001651048660278, "step": 859, "token_acc": 0.8462337218109409 }, { "epoch": 0.5965490332090523, "grad_norm": 0.015440227448274182, "learning_rate": 4.1980968545964996e-06, "loss": 0.4966930150985718, "step": 860, "token_acc": 0.8473913462001251 }, { "epoch": 0.5972426948755745, "grad_norm": 0.015132626060857197, "learning_rate": 4.186144995473221e-06, "loss": 0.4969600439071655, "step": 861, "token_acc": 0.8472282425468382 }, { "epoch": 0.5979363565420966, "grad_norm": 0.014999276367045657, "learning_rate": 4.174197911275653e-06, "loss": 0.4889525771141052, "step": 862, "token_acc": 0.8484863233719458 }, { "epoch": 0.5986300182086187, "grad_norm": 0.015623309601161628, "learning_rate": 4.162255672097904e-06, "loss": 0.4973915219306946, "step": 863, "token_acc": 0.845727384853053 }, { "epoch": 0.5993236798751409, "grad_norm": 0.014810836746084442, "learning_rate": 4.150318348005655e-06, "loss": 0.4866749942302704, "step": 864, "token_acc": 0.8506755243711278 }, { "epoch": 0.6000173415416631, "grad_norm": 0.015239815632014797, "learning_rate": 4.1383860090357495e-06, "loss": 0.5048948526382446, "step": 865, "token_acc": 0.8437711250603573 }, { "epoch": 0.6007110032081852, "grad_norm": 0.015101738136110328, "learning_rate": 4.126458725195783e-06, "loss": 0.4887281060218811, "step": 866, "token_acc": 0.8511861700800758 }, { "epoch": 0.6014046648747073, "grad_norm": 0.015655011213541924, "learning_rate": 4.114536566463692e-06, "loss": 0.46441251039505005, "step": 867, "token_acc": 0.8577877586562479 }, { "epoch": 0.6020983265412295, "grad_norm": 0.015562714680442163, "learning_rate": 4.102619602787344e-06, "loss": 0.49050360918045044, "step": 868, "token_acc": 0.8478337657275794 }, { "epoch": 0.6027919882077517, "grad_norm": 0.014349326056501965, "learning_rate": 4.090707904084129e-06, "loss": 0.48049595952033997, "step": 869, "token_acc": 0.8518354445535308 }, { "epoch": 0.6034856498742738, "grad_norm": 0.015207588739548472, "learning_rate": 4.078801540240544e-06, "loss": 0.4849977195262909, "step": 870, "token_acc": 0.8499884858254451 }, { "epoch": 0.604179311540796, "grad_norm": 0.014883917602572684, "learning_rate": 4.066900581111788e-06, "loss": 0.48615092039108276, "step": 871, "token_acc": 0.8492379835873388 }, { "epoch": 0.6048729732073181, "grad_norm": 0.014855816867796794, "learning_rate": 4.0550050965213505e-06, "loss": 0.4778837561607361, "step": 872, "token_acc": 0.8537387883187747 }, { "epoch": 0.6055666348738403, "grad_norm": 0.015249422054512601, "learning_rate": 4.043115156260599e-06, "loss": 0.4928008019924164, "step": 873, "token_acc": 0.8489767453045716 }, { "epoch": 0.6062602965403624, "grad_norm": 0.017142845538430977, "learning_rate": 4.031230830088375e-06, "loss": 0.4881742000579834, "step": 874, "token_acc": 0.8488795010561312 }, { "epoch": 0.6069539582068846, "grad_norm": 0.015105046253946331, "learning_rate": 4.0193521877305815e-06, "loss": 0.4939943552017212, "step": 875, "token_acc": 0.8475995511105082 }, { "epoch": 0.6076476198734068, "grad_norm": 0.015108481803090643, "learning_rate": 4.0074792988797736e-06, "loss": 0.4823575019836426, "step": 876, "token_acc": 0.8504561729055895 }, { "epoch": 0.608341281539929, "grad_norm": 0.01505771650589443, "learning_rate": 3.995612233194748e-06, "loss": 0.5121563673019409, "step": 877, "token_acc": 0.8429208472686733 }, { "epoch": 0.609034943206451, "grad_norm": 0.014632623582438341, "learning_rate": 3.9837510603001425e-06, "loss": 0.5040811896324158, "step": 878, "token_acc": 0.843546052163182 }, { "epoch": 0.6097286048729732, "grad_norm": 0.016142571385198118, "learning_rate": 3.971895849786016e-06, "loss": 0.4934614896774292, "step": 879, "token_acc": 0.8475087158628705 }, { "epoch": 0.6104222665394954, "grad_norm": 0.014966421880598098, "learning_rate": 3.960046671207448e-06, "loss": 0.49729830026626587, "step": 880, "token_acc": 0.8474645130313812 }, { "epoch": 0.6111159282060176, "grad_norm": 0.01591358226307336, "learning_rate": 3.948203594084129e-06, "loss": 0.4680469036102295, "step": 881, "token_acc": 0.8548326301326556 }, { "epoch": 0.6118095898725396, "grad_norm": 0.015317821756843276, "learning_rate": 3.93636668789995e-06, "loss": 0.49105167388916016, "step": 882, "token_acc": 0.8485793696066414 }, { "epoch": 0.6125032515390618, "grad_norm": 0.015321460949993421, "learning_rate": 3.924536022102599e-06, "loss": 0.49517279863357544, "step": 883, "token_acc": 0.8465825274545157 }, { "epoch": 0.613196913205584, "grad_norm": 0.014824810557445688, "learning_rate": 3.912711666103153e-06, "loss": 0.4817257225513458, "step": 884, "token_acc": 0.8497641075914466 }, { "epoch": 0.6138905748721061, "grad_norm": 0.014999920681169104, "learning_rate": 3.900893689275663e-06, "loss": 0.4997515082359314, "step": 885, "token_acc": 0.8447212721871333 }, { "epoch": 0.6145842365386283, "grad_norm": 0.015226143892599594, "learning_rate": 3.889082160956757e-06, "loss": 0.49345144629478455, "step": 886, "token_acc": 0.8473566550385693 }, { "epoch": 0.6152778982051504, "grad_norm": 0.014591600383167862, "learning_rate": 3.877277150445231e-06, "loss": 0.4704713821411133, "step": 887, "token_acc": 0.8543993798231087 }, { "epoch": 0.6159715598716726, "grad_norm": 0.016776905432904134, "learning_rate": 3.8654787270016366e-06, "loss": 0.48805978894233704, "step": 888, "token_acc": 0.849785615073374 }, { "epoch": 0.6166652215381947, "grad_norm": 0.015255761467510374, "learning_rate": 3.853686959847882e-06, "loss": 0.4859652519226074, "step": 889, "token_acc": 0.8508233506274561 }, { "epoch": 0.6173588832047169, "grad_norm": 0.016326788935177833, "learning_rate": 3.841901918166821e-06, "loss": 0.5049853324890137, "step": 890, "token_acc": 0.8446220225178075 }, { "epoch": 0.6180525448712391, "grad_norm": 0.01475825018156711, "learning_rate": 3.83012367110185e-06, "loss": 0.5044801235198975, "step": 891, "token_acc": 0.8450520356746337 }, { "epoch": 0.6187462065377612, "grad_norm": 0.015507902705757455, "learning_rate": 3.818352287756499e-06, "loss": 0.5024208426475525, "step": 892, "token_acc": 0.8450663661407463 }, { "epoch": 0.6194398682042833, "grad_norm": 0.014753658883390476, "learning_rate": 3.80658783719403e-06, "loss": 0.48398375511169434, "step": 893, "token_acc": 0.850313703122772 }, { "epoch": 0.6201335298708055, "grad_norm": 0.015453206931053223, "learning_rate": 3.7948303884370285e-06, "loss": 0.501223087310791, "step": 894, "token_acc": 0.8444077845722578 }, { "epoch": 0.6208271915373277, "grad_norm": 0.015488152117303306, "learning_rate": 3.783080010467002e-06, "loss": 0.5007642507553101, "step": 895, "token_acc": 0.8472560470999929 }, { "epoch": 0.6215208532038499, "grad_norm": 0.014285302562708754, "learning_rate": 3.771336772223974e-06, "loss": 0.46079689264297485, "step": 896, "token_acc": 0.8585418682888613 }, { "epoch": 0.6222145148703719, "grad_norm": 0.01585729113338968, "learning_rate": 3.7596007426060765e-06, "loss": 0.49099329113960266, "step": 897, "token_acc": 0.8482857880946231 }, { "epoch": 0.6229081765368941, "grad_norm": 0.014834969397495491, "learning_rate": 3.7478719904691487e-06, "loss": 0.48972219228744507, "step": 898, "token_acc": 0.8481589010960902 }, { "epoch": 0.6236018382034163, "grad_norm": 0.015176642207635522, "learning_rate": 3.736150584626332e-06, "loss": 0.49735331535339355, "step": 899, "token_acc": 0.8460488360774445 }, { "epoch": 0.6242954998699385, "grad_norm": 0.014961124040551327, "learning_rate": 3.724436593847669e-06, "loss": 0.48516207933425903, "step": 900, "token_acc": 0.8512428251176312 }, { "epoch": 0.6249891615364606, "grad_norm": 0.01530162026412359, "learning_rate": 3.712730086859695e-06, "loss": 0.5055670738220215, "step": 901, "token_acc": 0.8444476942575802 }, { "epoch": 0.6256828232029827, "grad_norm": 0.0172355445024491, "learning_rate": 3.70103113234504e-06, "loss": 0.490145206451416, "step": 902, "token_acc": 0.8498469955489614 }, { "epoch": 0.6263764848695049, "grad_norm": 0.018533677628237023, "learning_rate": 3.6893397989420198e-06, "loss": 0.48516157269477844, "step": 903, "token_acc": 0.8494030649719273 }, { "epoch": 0.6270701465360271, "grad_norm": 0.01615789446629589, "learning_rate": 3.6776561552442426e-06, "loss": 0.4976872503757477, "step": 904, "token_acc": 0.846374344402859 }, { "epoch": 0.6277638082025492, "grad_norm": 0.015032370428912312, "learning_rate": 3.665980269800195e-06, "loss": 0.5043931007385254, "step": 905, "token_acc": 0.8443847714524406 }, { "epoch": 0.6284574698690714, "grad_norm": 0.015078733024798303, "learning_rate": 3.6543122111128488e-06, "loss": 0.4985184967517853, "step": 906, "token_acc": 0.8475696948216228 }, { "epoch": 0.6291511315355935, "grad_norm": 0.014811994315131277, "learning_rate": 3.642652047639252e-06, "loss": 0.48002707958221436, "step": 907, "token_acc": 0.8522419059254734 }, { "epoch": 0.6298447932021156, "grad_norm": 0.014706003019027754, "learning_rate": 3.630999847790137e-06, "loss": 0.495941162109375, "step": 908, "token_acc": 0.8461816429775958 }, { "epoch": 0.6305384548686378, "grad_norm": 0.0160171771212841, "learning_rate": 3.6193556799295105e-06, "loss": 0.5165108442306519, "step": 909, "token_acc": 0.841016407066497 }, { "epoch": 0.63123211653516, "grad_norm": 0.014458476533662336, "learning_rate": 3.607719612374254e-06, "loss": 0.4845355749130249, "step": 910, "token_acc": 0.8496343200507523 }, { "epoch": 0.6319257782016822, "grad_norm": 0.01578548259279897, "learning_rate": 3.5960917133937247e-06, "loss": 0.48611682653427124, "step": 911, "token_acc": 0.850259824817877 }, { "epoch": 0.6326194398682042, "grad_norm": 0.015977071803767395, "learning_rate": 3.5844720512093523e-06, "loss": 0.4806007742881775, "step": 912, "token_acc": 0.8535363610169115 }, { "epoch": 0.6333131015347264, "grad_norm": 0.015190205513348279, "learning_rate": 3.5728606939942466e-06, "loss": 0.49322739243507385, "step": 913, "token_acc": 0.8478964401294499 }, { "epoch": 0.6340067632012486, "grad_norm": 0.014716569025888545, "learning_rate": 3.561257709872787e-06, "loss": 0.49636828899383545, "step": 914, "token_acc": 0.847743552568529 }, { "epoch": 0.6347004248677708, "grad_norm": 0.015152436491806327, "learning_rate": 3.5496631669202285e-06, "loss": 0.48552361130714417, "step": 915, "token_acc": 0.8513528184219732 }, { "epoch": 0.6353940865342929, "grad_norm": 0.014931062026353045, "learning_rate": 3.538077133162301e-06, "loss": 0.48698028922080994, "step": 916, "token_acc": 0.8500822925743053 }, { "epoch": 0.636087748200815, "grad_norm": 0.015240344567751819, "learning_rate": 3.5264996765748124e-06, "loss": 0.49006688594818115, "step": 917, "token_acc": 0.8491153261775229 }, { "epoch": 0.6367814098673372, "grad_norm": 0.016565692895279826, "learning_rate": 3.5149308650832465e-06, "loss": 0.4937397241592407, "step": 918, "token_acc": 0.8485686644175432 }, { "epoch": 0.6374750715338594, "grad_norm": 0.017844504240850954, "learning_rate": 3.503370766562366e-06, "loss": 0.49533379077911377, "step": 919, "token_acc": 0.847604203328473 }, { "epoch": 0.6381687332003815, "grad_norm": 0.015029219834690466, "learning_rate": 3.4918194488358127e-06, "loss": 0.4909043312072754, "step": 920, "token_acc": 0.8475158688493295 }, { "epoch": 0.6388623948669037, "grad_norm": 0.015022825277781858, "learning_rate": 3.480276979675715e-06, "loss": 0.5000736713409424, "step": 921, "token_acc": 0.8473815802993458 }, { "epoch": 0.6395560565334258, "grad_norm": 0.0168521321659657, "learning_rate": 3.4687434268022847e-06, "loss": 0.4974159598350525, "step": 922, "token_acc": 0.8461902508282063 }, { "epoch": 0.640249718199948, "grad_norm": 0.016851336316916078, "learning_rate": 3.45721885788342e-06, "loss": 0.5042355060577393, "step": 923, "token_acc": 0.8433121958413213 }, { "epoch": 0.6409433798664701, "grad_norm": 0.015429522599384677, "learning_rate": 3.4457033405343094e-06, "loss": 0.5076022148132324, "step": 924, "token_acc": 0.8444006163328197 }, { "epoch": 0.6416370415329923, "grad_norm": 0.014261338806098527, "learning_rate": 3.434196942317036e-06, "loss": 0.47981321811676025, "step": 925, "token_acc": 0.8541189470412873 }, { "epoch": 0.6423307031995145, "grad_norm": 0.015006928749262351, "learning_rate": 3.422699730740184e-06, "loss": 0.4753767251968384, "step": 926, "token_acc": 0.8532774618377731 }, { "epoch": 0.6430243648660365, "grad_norm": 0.01555525967072167, "learning_rate": 3.411211773258434e-06, "loss": 0.4935673475265503, "step": 927, "token_acc": 0.8465847488254428 }, { "epoch": 0.6437180265325587, "grad_norm": 0.014538330504080982, "learning_rate": 3.3997331372721746e-06, "loss": 0.48743247985839844, "step": 928, "token_acc": 0.8475498633188215 }, { "epoch": 0.6444116881990809, "grad_norm": 0.015567655614611504, "learning_rate": 3.388263890127105e-06, "loss": 0.5029175281524658, "step": 929, "token_acc": 0.8450351359608922 }, { "epoch": 0.6451053498656031, "grad_norm": 0.017301488460091528, "learning_rate": 3.3768040991138397e-06, "loss": 0.49109798669815063, "step": 930, "token_acc": 0.8483051160955399 }, { "epoch": 0.6457990115321252, "grad_norm": 0.016758204473145584, "learning_rate": 3.365353831467514e-06, "loss": 0.500935971736908, "step": 931, "token_acc": 0.8470307878381453 }, { "epoch": 0.6464926731986473, "grad_norm": 0.014808568865743546, "learning_rate": 3.3539131543673864e-06, "loss": 0.5140148401260376, "step": 932, "token_acc": 0.8421001029462228 }, { "epoch": 0.6471863348651695, "grad_norm": 0.015597035584100959, "learning_rate": 3.342482134936449e-06, "loss": 0.497142493724823, "step": 933, "token_acc": 0.8462344773156084 }, { "epoch": 0.6478799965316917, "grad_norm": 0.014804961504620592, "learning_rate": 3.3310608402410342e-06, "loss": 0.4920557737350464, "step": 934, "token_acc": 0.8482796838858905 }, { "epoch": 0.6485736581982138, "grad_norm": 0.014568436016102318, "learning_rate": 3.3196493372904176e-06, "loss": 0.4896330237388611, "step": 935, "token_acc": 0.8499762602341552 }, { "epoch": 0.649267319864736, "grad_norm": 0.014594978183235291, "learning_rate": 3.3082476930364237e-06, "loss": 0.5043489336967468, "step": 936, "token_acc": 0.8451702375215956 }, { "epoch": 0.6499609815312581, "grad_norm": 0.015333088735434607, "learning_rate": 3.2968559743730383e-06, "loss": 0.5086060762405396, "step": 937, "token_acc": 0.8445313155780462 }, { "epoch": 0.6506546431977803, "grad_norm": 0.01614379526718392, "learning_rate": 3.28547424813601e-06, "loss": 0.5031305551528931, "step": 938, "token_acc": 0.8449546449796684 }, { "epoch": 0.6513483048643024, "grad_norm": 0.016752943458698596, "learning_rate": 3.274102581102467e-06, "loss": 0.47857776284217834, "step": 939, "token_acc": 0.851010069578422 }, { "epoch": 0.6520419665308246, "grad_norm": 0.014962952620701676, "learning_rate": 3.2627410399905144e-06, "loss": 0.4991922676563263, "step": 940, "token_acc": 0.8462161210841254 }, { "epoch": 0.6527356281973468, "grad_norm": 0.014849727103913371, "learning_rate": 3.25138969145885e-06, "loss": 0.48438096046447754, "step": 941, "token_acc": 0.8503399723208376 }, { "epoch": 0.653429289863869, "grad_norm": 0.014481644835743561, "learning_rate": 3.24004860210637e-06, "loss": 0.49086934328079224, "step": 942, "token_acc": 0.8505546843236336 }, { "epoch": 0.654122951530391, "grad_norm": 0.015718712703469893, "learning_rate": 3.2287178384717844e-06, "loss": 0.4995993375778198, "step": 943, "token_acc": 0.8465129188534517 }, { "epoch": 0.6548166131969132, "grad_norm": 0.01587965758965927, "learning_rate": 3.2173974670332157e-06, "loss": 0.5013362765312195, "step": 944, "token_acc": 0.8443231468382638 }, { "epoch": 0.6555102748634354, "grad_norm": 0.042972393627359336, "learning_rate": 3.206087554207815e-06, "loss": 0.4939986765384674, "step": 945, "token_acc": 0.8480765622067931 }, { "epoch": 0.6562039365299576, "grad_norm": 0.015144535750987802, "learning_rate": 3.1947881663513793e-06, "loss": 0.49576205015182495, "step": 946, "token_acc": 0.8468081341708479 }, { "epoch": 0.6568975981964796, "grad_norm": 0.015832684074109815, "learning_rate": 3.183499369757947e-06, "loss": 0.5044418573379517, "step": 947, "token_acc": 0.843318545029763 }, { "epoch": 0.6575912598630018, "grad_norm": 0.015215635909961198, "learning_rate": 3.1722212306594224e-06, "loss": 0.4800597131252289, "step": 948, "token_acc": 0.851195824768282 }, { "epoch": 0.658284921529524, "grad_norm": 0.017825321338945568, "learning_rate": 3.1609538152251784e-06, "loss": 0.4921591877937317, "step": 949, "token_acc": 0.8460351544920343 }, { "epoch": 0.6589785831960461, "grad_norm": 0.014946857965203864, "learning_rate": 3.1496971895616734e-06, "loss": 0.482985258102417, "step": 950, "token_acc": 0.8515028669515358 }, { "epoch": 0.6596722448625683, "grad_norm": 0.0149542880075417, "learning_rate": 3.1384514197120597e-06, "loss": 0.5038579702377319, "step": 951, "token_acc": 0.8463198015173852 }, { "epoch": 0.6603659065290904, "grad_norm": 0.014438488386531539, "learning_rate": 3.127216571655802e-06, "loss": 0.4815616309642792, "step": 952, "token_acc": 0.8509333236928045 }, { "epoch": 0.6610595681956126, "grad_norm": 0.01582603025449107, "learning_rate": 3.115992711308284e-06, "loss": 0.4937228560447693, "step": 953, "token_acc": 0.8482856592506982 }, { "epoch": 0.6617532298621347, "grad_norm": 0.014706588451927675, "learning_rate": 3.104779904520422e-06, "loss": 0.5019439458847046, "step": 954, "token_acc": 0.8442252412361788 }, { "epoch": 0.6624468915286569, "grad_norm": 0.015251630871863459, "learning_rate": 3.093578217078283e-06, "loss": 0.4943371117115021, "step": 955, "token_acc": 0.847044233154196 }, { "epoch": 0.6631405531951791, "grad_norm": 0.01595485653571547, "learning_rate": 3.082387714702697e-06, "loss": 0.5176520943641663, "step": 956, "token_acc": 0.8412878274685618 }, { "epoch": 0.6638342148617012, "grad_norm": 0.016598675152444065, "learning_rate": 3.0712084630488685e-06, "loss": 0.49506503343582153, "step": 957, "token_acc": 0.8473991997537704 }, { "epoch": 0.6645278765282233, "grad_norm": 0.015575266903900379, "learning_rate": 3.060040527705993e-06, "loss": 0.5339555740356445, "step": 958, "token_acc": 0.8355817766359845 }, { "epoch": 0.6652215381947455, "grad_norm": 0.014375129365406459, "learning_rate": 3.0488839741968774e-06, "loss": 0.48663631081581116, "step": 959, "token_acc": 0.85017232206879 }, { "epoch": 0.6659151998612677, "grad_norm": 0.01429256308067043, "learning_rate": 3.0377388679775444e-06, "loss": 0.486642450094223, "step": 960, "token_acc": 0.8504108931677764 }, { "epoch": 0.6666088615277899, "grad_norm": 0.015173923649325434, "learning_rate": 3.0266052744368613e-06, "loss": 0.5068844556808472, "step": 961, "token_acc": 0.8440263951449083 }, { "epoch": 0.6673025231943119, "grad_norm": 0.015570900671318267, "learning_rate": 3.0154832588961446e-06, "loss": 0.5243799686431885, "step": 962, "token_acc": 0.8389335916157552 }, { "epoch": 0.6679961848608341, "grad_norm": 0.014965382613541977, "learning_rate": 3.004372886608784e-06, "loss": 0.5018934011459351, "step": 963, "token_acc": 0.8455075591333971 }, { "epoch": 0.6686898465273563, "grad_norm": 0.0162653338547892, "learning_rate": 2.993274222759858e-06, "loss": 0.5132642388343811, "step": 964, "token_acc": 0.8404700258912567 }, { "epoch": 0.6693835081938785, "grad_norm": 0.015396357083104816, "learning_rate": 2.9821873324657523e-06, "loss": 0.48679178953170776, "step": 965, "token_acc": 0.8504774067992459 }, { "epoch": 0.6700771698604006, "grad_norm": 0.01565034423328303, "learning_rate": 2.971112280773775e-06, "loss": 0.5040479898452759, "step": 966, "token_acc": 0.8449136133452487 }, { "epoch": 0.6707708315269227, "grad_norm": 0.014552897390457651, "learning_rate": 2.9600491326617762e-06, "loss": 0.4871087670326233, "step": 967, "token_acc": 0.8503269332963657 }, { "epoch": 0.6714644931934449, "grad_norm": 0.01488757094853707, "learning_rate": 2.9489979530377677e-06, "loss": 0.4733317196369171, "step": 968, "token_acc": 0.8542294952291491 }, { "epoch": 0.6721581548599671, "grad_norm": 0.015285540014713724, "learning_rate": 2.937958806739543e-06, "loss": 0.4950611889362335, "step": 969, "token_acc": 0.8477079796264856 }, { "epoch": 0.6728518165264892, "grad_norm": 0.01537560252059356, "learning_rate": 2.9269317585342915e-06, "loss": 0.4919055700302124, "step": 970, "token_acc": 0.8482943342881589 }, { "epoch": 0.6735454781930114, "grad_norm": 0.015221029992988624, "learning_rate": 2.915916873118231e-06, "loss": 0.48189809918403625, "step": 971, "token_acc": 0.8509236628909594 }, { "epoch": 0.6742391398595335, "grad_norm": 0.0185068758387545, "learning_rate": 2.9049142151162078e-06, "loss": 0.49174290895462036, "step": 972, "token_acc": 0.8485012705179619 }, { "epoch": 0.6749328015260556, "grad_norm": 0.015613496706283345, "learning_rate": 2.893923849081339e-06, "loss": 0.4870654344558716, "step": 973, "token_acc": 0.8499565635932474 }, { "epoch": 0.6756264631925778, "grad_norm": 0.015684501833731768, "learning_rate": 2.882945839494621e-06, "loss": 0.4994729161262512, "step": 974, "token_acc": 0.8459600899931526 }, { "epoch": 0.6763201248591, "grad_norm": 0.014904780660702546, "learning_rate": 2.871980250764551e-06, "loss": 0.4800878167152405, "step": 975, "token_acc": 0.8523436654277776 }, { "epoch": 0.6770137865256222, "grad_norm": 0.014885299894755646, "learning_rate": 2.8610271472267576e-06, "loss": 0.49558717012405396, "step": 976, "token_acc": 0.8471039976141066 }, { "epoch": 0.6777074481921442, "grad_norm": 0.015144272950989208, "learning_rate": 2.850086593143618e-06, "loss": 0.5038456320762634, "step": 977, "token_acc": 0.8459469888816535 }, { "epoch": 0.6784011098586664, "grad_norm": 0.014737662161841202, "learning_rate": 2.8391586527038744e-06, "loss": 0.4936475157737732, "step": 978, "token_acc": 0.8464599916212819 }, { "epoch": 0.6790947715251886, "grad_norm": 0.015033889078507445, "learning_rate": 2.828243390022272e-06, "loss": 0.5040707588195801, "step": 979, "token_acc": 0.8463881472567969 }, { "epoch": 0.6797884331917108, "grad_norm": 0.014753484169105349, "learning_rate": 2.8173408691391735e-06, "loss": 0.4851222634315491, "step": 980, "token_acc": 0.8512949734349186 }, { "epoch": 0.6804820948582329, "grad_norm": 0.014310934328373945, "learning_rate": 2.8064511540201795e-06, "loss": 0.4920514225959778, "step": 981, "token_acc": 0.8495578437310721 }, { "epoch": 0.681175756524755, "grad_norm": 0.015056660845291867, "learning_rate": 2.795574308555766e-06, "loss": 0.494901567697525, "step": 982, "token_acc": 0.8471882640586798 }, { "epoch": 0.6818694181912772, "grad_norm": 0.015745102233625248, "learning_rate": 2.7847103965608936e-06, "loss": 0.5010907649993896, "step": 983, "token_acc": 0.847149892782242 }, { "epoch": 0.6825630798577994, "grad_norm": 0.01505991569589265, "learning_rate": 2.7738594817746532e-06, "loss": 0.4925991892814636, "step": 984, "token_acc": 0.8487770573090493 }, { "epoch": 0.6832567415243215, "grad_norm": 0.014944036757971672, "learning_rate": 2.7630216278598687e-06, "loss": 0.4843701720237732, "step": 985, "token_acc": 0.8488729570447779 }, { "epoch": 0.6839504031908437, "grad_norm": 0.015757957023274313, "learning_rate": 2.7521968984027426e-06, "loss": 0.5036523342132568, "step": 986, "token_acc": 0.843979708044311 }, { "epoch": 0.6846440648573658, "grad_norm": 0.014704579903109766, "learning_rate": 2.7413853569124742e-06, "loss": 0.4909956455230713, "step": 987, "token_acc": 0.8495161584809202 }, { "epoch": 0.685337726523888, "grad_norm": 0.0145627112875404, "learning_rate": 2.7305870668208847e-06, "loss": 0.4782446026802063, "step": 988, "token_acc": 0.8511444506281749 }, { "epoch": 0.6860313881904101, "grad_norm": 0.014851410984132016, "learning_rate": 2.7198020914820535e-06, "loss": 0.4968253970146179, "step": 989, "token_acc": 0.8475660212308201 }, { "epoch": 0.6867250498569323, "grad_norm": 0.015105828155882635, "learning_rate": 2.709030494171941e-06, "loss": 0.5059019923210144, "step": 990, "token_acc": 0.8438434373884903 }, { "epoch": 0.6874187115234545, "grad_norm": 0.01545541894164361, "learning_rate": 2.6982723380880134e-06, "loss": 0.4964044690132141, "step": 991, "token_acc": 0.8465911943370651 }, { "epoch": 0.6881123731899765, "grad_norm": 0.015092014315186265, "learning_rate": 2.687527686348882e-06, "loss": 0.4943210780620575, "step": 992, "token_acc": 0.8477784467188441 }, { "epoch": 0.6888060348564987, "grad_norm": 0.014906605441169373, "learning_rate": 2.6767966019939273e-06, "loss": 0.4822850227355957, "step": 993, "token_acc": 0.8513847981585526 }, { "epoch": 0.6894996965230209, "grad_norm": 0.015228099127904767, "learning_rate": 2.666079147982924e-06, "loss": 0.48847487568855286, "step": 994, "token_acc": 0.8490375612543927 }, { "epoch": 0.6901933581895431, "grad_norm": 0.016434789554447608, "learning_rate": 2.655375387195682e-06, "loss": 0.49146491289138794, "step": 995, "token_acc": 0.8492905442185099 }, { "epoch": 0.6908870198560652, "grad_norm": 0.01417757864660456, "learning_rate": 2.6446853824316717e-06, "loss": 0.48456496000289917, "step": 996, "token_acc": 0.8502969959589074 }, { "epoch": 0.6915806815225873, "grad_norm": 0.0156821625860381, "learning_rate": 2.634009196409656e-06, "loss": 0.4988805651664734, "step": 997, "token_acc": 0.8452383858604107 }, { "epoch": 0.6922743431891095, "grad_norm": 0.015036207472338567, "learning_rate": 2.6233468917673194e-06, "loss": 0.5086586475372314, "step": 998, "token_acc": 0.8427516863942981 }, { "epoch": 0.6929680048556317, "grad_norm": 0.014944335425452274, "learning_rate": 2.6126985310609074e-06, "loss": 0.4819408059120178, "step": 999, "token_acc": 0.8510630282176539 }, { "epoch": 0.6936616665221538, "grad_norm": 0.015115711540892203, "learning_rate": 2.602064176764858e-06, "loss": 0.5056493282318115, "step": 1000, "token_acc": 0.8445964220857 }, { "epoch": 0.694355328188676, "grad_norm": 0.01563772675587782, "learning_rate": 2.5914438912714236e-06, "loss": 0.49605444073677063, "step": 1001, "token_acc": 0.8455090722011694 }, { "epoch": 0.6950489898551981, "grad_norm": 0.014544807304277868, "learning_rate": 2.5808377368903235e-06, "loss": 0.4721240699291229, "step": 1002, "token_acc": 0.8537916388868311 }, { "epoch": 0.6957426515217203, "grad_norm": 0.021179436867960078, "learning_rate": 2.570245775848369e-06, "loss": 0.48827654123306274, "step": 1003, "token_acc": 0.8470386439309858 }, { "epoch": 0.6964363131882424, "grad_norm": 0.015792035609187828, "learning_rate": 2.5596680702890888e-06, "loss": 0.47810348868370056, "step": 1004, "token_acc": 0.8525996971226654 }, { "epoch": 0.6971299748547646, "grad_norm": 0.015007828164609795, "learning_rate": 2.5491046822723843e-06, "loss": 0.4858604669570923, "step": 1005, "token_acc": 0.8507555322145568 }, { "epoch": 0.6978236365212868, "grad_norm": 0.014604180302724833, "learning_rate": 2.5385556737741527e-06, "loss": 0.48501473665237427, "step": 1006, "token_acc": 0.8495973145956237 }, { "epoch": 0.6985172981878089, "grad_norm": 0.014728769385186292, "learning_rate": 2.5280211066859212e-06, "loss": 0.47741907835006714, "step": 1007, "token_acc": 0.8527443105756358 }, { "epoch": 0.699210959854331, "grad_norm": 0.01588237207923155, "learning_rate": 2.517501042814493e-06, "loss": 0.509096086025238, "step": 1008, "token_acc": 0.8431280937468432 }, { "epoch": 0.6999046215208532, "grad_norm": 0.014967018362491157, "learning_rate": 2.506995543881581e-06, "loss": 0.5144431591033936, "step": 1009, "token_acc": 0.8414242219215156 }, { "epoch": 0.7005982831873754, "grad_norm": 0.015810522676244828, "learning_rate": 2.4965046715234425e-06, "loss": 0.4765987992286682, "step": 1010, "token_acc": 0.8532092371950409 }, { "epoch": 0.7012919448538976, "grad_norm": 0.0145904195441866, "learning_rate": 2.4860284872905184e-06, "loss": 0.461880624294281, "step": 1011, "token_acc": 0.8576604309732417 }, { "epoch": 0.7019856065204196, "grad_norm": 0.014810177380409312, "learning_rate": 2.4755670526470777e-06, "loss": 0.4977007508277893, "step": 1012, "token_acc": 0.8469075992355727 }, { "epoch": 0.7026792681869418, "grad_norm": 0.01585134322962679, "learning_rate": 2.465120428970853e-06, "loss": 0.4859347343444824, "step": 1013, "token_acc": 0.8494155652002368 }, { "epoch": 0.703372929853464, "grad_norm": 0.015126121810374244, "learning_rate": 2.454688677552674e-06, "loss": 0.48555877804756165, "step": 1014, "token_acc": 0.8505802469135803 }, { "epoch": 0.7040665915199861, "grad_norm": 0.014774692533618381, "learning_rate": 2.4442718595961207e-06, "loss": 0.48450538516044617, "step": 1015, "token_acc": 0.8517632714312512 }, { "epoch": 0.7047602531865083, "grad_norm": 0.015037776910171254, "learning_rate": 2.433870036217158e-06, "loss": 0.48211878538131714, "step": 1016, "token_acc": 0.8518038112786694 }, { "epoch": 0.7054539148530304, "grad_norm": 0.01739685325526002, "learning_rate": 2.423483268443769e-06, "loss": 0.4897506833076477, "step": 1017, "token_acc": 0.8487738586417561 }, { "epoch": 0.7061475765195526, "grad_norm": 0.015291271869550028, "learning_rate": 2.4131116172156143e-06, "loss": 0.4834560751914978, "step": 1018, "token_acc": 0.8504437383587159 }, { "epoch": 0.7068412381860747, "grad_norm": 0.014819231381669053, "learning_rate": 2.4027551433836616e-06, "loss": 0.4766578674316406, "step": 1019, "token_acc": 0.8523614282128213 }, { "epoch": 0.7075348998525969, "grad_norm": 0.016948557092073293, "learning_rate": 2.392413907709828e-06, "loss": 0.4620873034000397, "step": 1020, "token_acc": 0.8554705245134011 }, { "epoch": 0.7082285615191191, "grad_norm": 0.01426445507467062, "learning_rate": 2.3820879708666335e-06, "loss": 0.4805910289287567, "step": 1021, "token_acc": 0.8519102353585112 }, { "epoch": 0.7089222231856412, "grad_norm": 0.014669906803883992, "learning_rate": 2.371777393436836e-06, "loss": 0.4865392744541168, "step": 1022, "token_acc": 0.8495514957967407 }, { "epoch": 0.7096158848521633, "grad_norm": 0.01727914003328102, "learning_rate": 2.361482235913082e-06, "loss": 0.4928773045539856, "step": 1023, "token_acc": 0.8465934544926202 }, { "epoch": 0.7103095465186855, "grad_norm": 0.014561680813780002, "learning_rate": 2.351202558697543e-06, "loss": 0.485928475856781, "step": 1024, "token_acc": 0.8499585234342596 }, { "epoch": 0.7110032081852077, "grad_norm": 0.01492710796166681, "learning_rate": 2.3409384221015713e-06, "loss": 0.4990689754486084, "step": 1025, "token_acc": 0.8471149083704804 }, { "epoch": 0.7116968698517299, "grad_norm": 0.016082593899117637, "learning_rate": 2.3306898863453437e-06, "loss": 0.5078955888748169, "step": 1026, "token_acc": 0.8443071979772973 }, { "epoch": 0.7123905315182519, "grad_norm": 0.016271283193642044, "learning_rate": 2.320457011557498e-06, "loss": 0.4973866045475006, "step": 1027, "token_acc": 0.8464028603179204 }, { "epoch": 0.7130841931847741, "grad_norm": 0.01523887801758737, "learning_rate": 2.3102398577747963e-06, "loss": 0.4757079482078552, "step": 1028, "token_acc": 0.8521429419446753 }, { "epoch": 0.7137778548512963, "grad_norm": 0.01529246590921894, "learning_rate": 2.3000384849417633e-06, "loss": 0.4849313795566559, "step": 1029, "token_acc": 0.8503841641118807 }, { "epoch": 0.7144715165178185, "grad_norm": 0.014349205271470438, "learning_rate": 2.289852952910331e-06, "loss": 0.46652036905288696, "step": 1030, "token_acc": 0.8549098148894314 }, { "epoch": 0.7151651781843406, "grad_norm": 0.014783161597349213, "learning_rate": 2.2796833214394986e-06, "loss": 0.48262202739715576, "step": 1031, "token_acc": 0.8512696055348281 }, { "epoch": 0.7158588398508627, "grad_norm": 0.01566697483986332, "learning_rate": 2.2695296501949754e-06, "loss": 0.49339598417282104, "step": 1032, "token_acc": 0.8476891475522081 }, { "epoch": 0.7165525015173849, "grad_norm": 0.028411983305986398, "learning_rate": 2.2593919987488245e-06, "loss": 0.49643173813819885, "step": 1033, "token_acc": 0.8475223116399796 }, { "epoch": 0.7172461631839071, "grad_norm": 0.01607211657942205, "learning_rate": 2.249270426579132e-06, "loss": 0.4862135350704193, "step": 1034, "token_acc": 0.8481868313384666 }, { "epoch": 0.7179398248504292, "grad_norm": 0.015426807590427876, "learning_rate": 2.239164993069634e-06, "loss": 0.4932153820991516, "step": 1035, "token_acc": 0.8475959362269275 }, { "epoch": 0.7186334865169514, "grad_norm": 0.015621173711595729, "learning_rate": 2.2290757575093895e-06, "loss": 0.4836636185646057, "step": 1036, "token_acc": 0.8502628333313377 }, { "epoch": 0.7193271481834735, "grad_norm": 0.014813341940891114, "learning_rate": 2.2190027790924162e-06, "loss": 0.48033607006073, "step": 1037, "token_acc": 0.8507781109622975 }, { "epoch": 0.7200208098499956, "grad_norm": 0.016503648015777014, "learning_rate": 2.2089461169173555e-06, "loss": 0.4789632558822632, "step": 1038, "token_acc": 0.8526886848694408 }, { "epoch": 0.7207144715165178, "grad_norm": 0.019393336107971868, "learning_rate": 2.198905829987121e-06, "loss": 0.4759595990180969, "step": 1039, "token_acc": 0.8535192593970972 }, { "epoch": 0.72140813318304, "grad_norm": 0.01537059578575822, "learning_rate": 2.1888819772085447e-06, "loss": 0.49502551555633545, "step": 1040, "token_acc": 0.847054163065399 }, { "epoch": 0.7221017948495622, "grad_norm": 0.014650720056424357, "learning_rate": 2.1788746173920472e-06, "loss": 0.49381712079048157, "step": 1041, "token_acc": 0.8468312980979513 }, { "epoch": 0.7227954565160842, "grad_norm": 0.015307141744407709, "learning_rate": 2.168883809251282e-06, "loss": 0.4845913350582123, "step": 1042, "token_acc": 0.8482517565655725 }, { "epoch": 0.7234891181826064, "grad_norm": 0.014882722427806416, "learning_rate": 2.1589096114027887e-06, "loss": 0.48117008805274963, "step": 1043, "token_acc": 0.8508790047260931 }, { "epoch": 0.7241827798491286, "grad_norm": 0.014616900038304804, "learning_rate": 2.1489520823656575e-06, "loss": 0.49325162172317505, "step": 1044, "token_acc": 0.8470485743213015 }, { "epoch": 0.7248764415156508, "grad_norm": 0.015373073256868677, "learning_rate": 2.139011280561184e-06, "loss": 0.48385536670684814, "step": 1045, "token_acc": 0.8515298762432617 }, { "epoch": 0.7255701031821729, "grad_norm": 0.014369337074878195, "learning_rate": 2.129087264312515e-06, "loss": 0.4767202138900757, "step": 1046, "token_acc": 0.8524680207011034 }, { "epoch": 0.726263764848695, "grad_norm": 0.016127771358255178, "learning_rate": 2.11918009184433e-06, "loss": 0.5082826614379883, "step": 1047, "token_acc": 0.8441837956241562 }, { "epoch": 0.7269574265152172, "grad_norm": 0.01487587493527573, "learning_rate": 2.109289821282472e-06, "loss": 0.4868817925453186, "step": 1048, "token_acc": 0.8492424889822129 }, { "epoch": 0.7276510881817394, "grad_norm": 0.015605254785692687, "learning_rate": 2.099416510653628e-06, "loss": 0.5194337368011475, "step": 1049, "token_acc": 0.842254757347832 }, { "epoch": 0.7283447498482615, "grad_norm": 0.015124496642371938, "learning_rate": 2.0895602178849744e-06, "loss": 0.48941701650619507, "step": 1050, "token_acc": 0.8480621460094517 }, { "epoch": 0.7290384115147837, "grad_norm": 0.014159991252724286, "learning_rate": 2.0797210008038475e-06, "loss": 0.4827421009540558, "step": 1051, "token_acc": 0.8518626820281888 }, { "epoch": 0.7297320731813058, "grad_norm": 0.014554301446084958, "learning_rate": 2.0698989171373994e-06, "loss": 0.4842878580093384, "step": 1052, "token_acc": 0.8510346636151243 }, { "epoch": 0.730425734847828, "grad_norm": 0.02243146380955053, "learning_rate": 2.0600940245122558e-06, "loss": 0.49361759424209595, "step": 1053, "token_acc": 0.8469353157610539 }, { "epoch": 0.7311193965143501, "grad_norm": 0.014681778512559234, "learning_rate": 2.0503063804541858e-06, "loss": 0.47838741540908813, "step": 1054, "token_acc": 0.8524671310772797 }, { "epoch": 0.7318130581808723, "grad_norm": 0.014598917365664743, "learning_rate": 2.04053604238776e-06, "loss": 0.48771247267723083, "step": 1055, "token_acc": 0.8502119303623171 }, { "epoch": 0.7325067198473945, "grad_norm": 0.01535795122236516, "learning_rate": 2.03078306763601e-06, "loss": 0.49432411789894104, "step": 1056, "token_acc": 0.8462104305110526 }, { "epoch": 0.7332003815139165, "grad_norm": 0.01487428365563731, "learning_rate": 2.021047513420101e-06, "loss": 0.47908201813697815, "step": 1057, "token_acc": 0.8502274554991418 }, { "epoch": 0.7338940431804387, "grad_norm": 0.01555780548840077, "learning_rate": 2.0113294368589877e-06, "loss": 0.4920053780078888, "step": 1058, "token_acc": 0.8482869963674351 }, { "epoch": 0.7345877048469609, "grad_norm": 0.017121801145560233, "learning_rate": 2.0016288949690845e-06, "loss": 0.48199859261512756, "step": 1059, "token_acc": 0.8498627521183912 }, { "epoch": 0.7352813665134831, "grad_norm": 0.01485193340718201, "learning_rate": 1.99194594466393e-06, "loss": 0.4814346432685852, "step": 1060, "token_acc": 0.8517426305337884 }, { "epoch": 0.7359750281800052, "grad_norm": 0.014753003898077637, "learning_rate": 1.982280642753848e-06, "loss": 0.4857046902179718, "step": 1061, "token_acc": 0.8504559345123403 }, { "epoch": 0.7366686898465273, "grad_norm": 0.015182424095882606, "learning_rate": 1.972633045945623e-06, "loss": 0.4836413562297821, "step": 1062, "token_acc": 0.8521635475026821 }, { "epoch": 0.7373623515130495, "grad_norm": 0.014803716206238477, "learning_rate": 1.9630032108421575e-06, "loss": 0.4742114841938019, "step": 1063, "token_acc": 0.8542002507612395 }, { "epoch": 0.7380560131795717, "grad_norm": 0.014803289896367273, "learning_rate": 1.953391193942151e-06, "loss": 0.48096615076065063, "step": 1064, "token_acc": 0.8504672897196262 }, { "epoch": 0.7387496748460938, "grad_norm": 0.01808690172606116, "learning_rate": 1.94379705163976e-06, "loss": 0.499566912651062, "step": 1065, "token_acc": 0.8468128651491534 }, { "epoch": 0.739443336512616, "grad_norm": 0.015528923608981206, "learning_rate": 1.934220840224269e-06, "loss": 0.5057448148727417, "step": 1066, "token_acc": 0.8434688646529628 }, { "epoch": 0.7401369981791381, "grad_norm": 0.015531018422006614, "learning_rate": 1.9246626158797627e-06, "loss": 0.49275335669517517, "step": 1067, "token_acc": 0.8482313279294018 }, { "epoch": 0.7408306598456603, "grad_norm": 0.015026142751390956, "learning_rate": 1.9151224346847956e-06, "loss": 0.4699188470840454, "step": 1068, "token_acc": 0.8553137463918267 }, { "epoch": 0.7415243215121824, "grad_norm": 0.01684193740386647, "learning_rate": 1.9056003526120586e-06, "loss": 0.48632144927978516, "step": 1069, "token_acc": 0.8504808101438176 }, { "epoch": 0.7422179831787046, "grad_norm": 0.014467969721966928, "learning_rate": 1.8960964255280573e-06, "loss": 0.46264785528182983, "step": 1070, "token_acc": 0.8581867267595578 }, { "epoch": 0.7429116448452268, "grad_norm": 0.01569105974189077, "learning_rate": 1.886610709192781e-06, "loss": 0.48013564944267273, "step": 1071, "token_acc": 0.8518138887872021 }, { "epoch": 0.7436053065117489, "grad_norm": 0.01487785357382387, "learning_rate": 1.8771432592593747e-06, "loss": 0.5001915693283081, "step": 1072, "token_acc": 0.8466850662263627 }, { "epoch": 0.744298968178271, "grad_norm": 0.01574642564975611, "learning_rate": 1.8676941312738156e-06, "loss": 0.5001866817474365, "step": 1073, "token_acc": 0.8462003506438547 }, { "epoch": 0.7449926298447932, "grad_norm": 0.014243877693283958, "learning_rate": 1.8582633806745798e-06, "loss": 0.47086209058761597, "step": 1074, "token_acc": 0.8541553439353482 }, { "epoch": 0.7456862915113154, "grad_norm": 0.020318101237777917, "learning_rate": 1.8488510627923296e-06, "loss": 0.48285406827926636, "step": 1075, "token_acc": 0.8511015319651631 }, { "epoch": 0.7463799531778376, "grad_norm": 0.015022342942374048, "learning_rate": 1.8394572328495753e-06, "loss": 0.4885493814945221, "step": 1076, "token_acc": 0.8492713912068751 }, { "epoch": 0.7470736148443596, "grad_norm": 0.014796801131364019, "learning_rate": 1.8300819459603608e-06, "loss": 0.4786321520805359, "step": 1077, "token_acc": 0.8518135107497696 }, { "epoch": 0.7477672765108818, "grad_norm": 0.01613619183647605, "learning_rate": 1.8207252571299393e-06, "loss": 0.491268128156662, "step": 1078, "token_acc": 0.8488146229704154 }, { "epoch": 0.748460938177404, "grad_norm": 0.01704396553246551, "learning_rate": 1.8113872212544426e-06, "loss": 0.49682724475860596, "step": 1079, "token_acc": 0.8471988309887062 }, { "epoch": 0.7491545998439261, "grad_norm": 0.015437292586545182, "learning_rate": 1.8020678931205688e-06, "loss": 0.4881242513656616, "step": 1080, "token_acc": 0.8510550609957138 }, { "epoch": 0.7498482615104483, "grad_norm": 0.01568776420169638, "learning_rate": 1.792767327405257e-06, "loss": 0.5043549537658691, "step": 1081, "token_acc": 0.8448815488340446 }, { "epoch": 0.7505419231769704, "grad_norm": 0.014849040477224842, "learning_rate": 1.783485578675363e-06, "loss": 0.49488365650177, "step": 1082, "token_acc": 0.8479552354306804 }, { "epoch": 0.7512355848434926, "grad_norm": 0.014524775768953257, "learning_rate": 1.7742227013873454e-06, "loss": 0.47741204500198364, "step": 1083, "token_acc": 0.8533694127577176 }, { "epoch": 0.7519292465100147, "grad_norm": 0.014648152227207358, "learning_rate": 1.7649787498869419e-06, "loss": 0.49362924695014954, "step": 1084, "token_acc": 0.8464782593329956 }, { "epoch": 0.7526229081765369, "grad_norm": 0.014464348598455135, "learning_rate": 1.7557537784088535e-06, "loss": 0.4848307967185974, "step": 1085, "token_acc": 0.8500048393341076 }, { "epoch": 0.7533165698430591, "grad_norm": 0.014857839920301309, "learning_rate": 1.7465478410764237e-06, "loss": 0.49537113308906555, "step": 1086, "token_acc": 0.8464165469969952 }, { "epoch": 0.7540102315095812, "grad_norm": 0.014963288224737338, "learning_rate": 1.737360991901318e-06, "loss": 0.5112149715423584, "step": 1087, "token_acc": 0.8423951950494449 }, { "epoch": 0.7547038931761033, "grad_norm": 0.01499546744726426, "learning_rate": 1.7281932847832177e-06, "loss": 0.5083386301994324, "step": 1088, "token_acc": 0.8421464116851367 }, { "epoch": 0.7553975548426255, "grad_norm": 0.015162082945457985, "learning_rate": 1.719044773509489e-06, "loss": 0.5002686381340027, "step": 1089, "token_acc": 0.846529605653174 }, { "epoch": 0.7560912165091477, "grad_norm": 0.014683139262880777, "learning_rate": 1.7099155117548817e-06, "loss": 0.47220921516418457, "step": 1090, "token_acc": 0.853482415992791 }, { "epoch": 0.7567848781756699, "grad_norm": 0.015398328507306946, "learning_rate": 1.7008055530812068e-06, "loss": 0.5035794973373413, "step": 1091, "token_acc": 0.8457592211196405 }, { "epoch": 0.7574785398421919, "grad_norm": 0.014774375505750129, "learning_rate": 1.6917149509370173e-06, "loss": 0.5106973052024841, "step": 1092, "token_acc": 0.8429336362161285 }, { "epoch": 0.7581722015087141, "grad_norm": 0.014150879877483891, "learning_rate": 1.682643758657308e-06, "loss": 0.4574889540672302, "step": 1093, "token_acc": 0.8578149378878289 }, { "epoch": 0.7588658631752363, "grad_norm": 0.015223655351545674, "learning_rate": 1.673592029463193e-06, "loss": 0.5024327635765076, "step": 1094, "token_acc": 0.8452364104213573 }, { "epoch": 0.7595595248417585, "grad_norm": 0.016474931299108063, "learning_rate": 1.664559816461591e-06, "loss": 0.5008118152618408, "step": 1095, "token_acc": 0.8470720351280078 }, { "epoch": 0.7602531865082806, "grad_norm": 0.019271134765099604, "learning_rate": 1.655547172644924e-06, "loss": 0.4903147220611572, "step": 1096, "token_acc": 0.8496907706945765 }, { "epoch": 0.7609468481748027, "grad_norm": 0.015754712874446127, "learning_rate": 1.6465541508907985e-06, "loss": 0.5032920837402344, "step": 1097, "token_acc": 0.8443793124503878 }, { "epoch": 0.7616405098413249, "grad_norm": 0.015976233668121634, "learning_rate": 1.637580803961697e-06, "loss": 0.4884096384048462, "step": 1098, "token_acc": 0.8491846681509116 }, { "epoch": 0.7623341715078471, "grad_norm": 0.014518678636972416, "learning_rate": 1.6286271845046714e-06, "loss": 0.4889319837093353, "step": 1099, "token_acc": 0.8489039229279721 }, { "epoch": 0.7630278331743692, "grad_norm": 0.016399594013816625, "learning_rate": 1.6196933450510261e-06, "loss": 0.46936893463134766, "step": 1100, "token_acc": 0.8541011568406762 }, { "epoch": 0.7637214948408914, "grad_norm": 0.014793058325434776, "learning_rate": 1.610779338016022e-06, "loss": 0.48812806606292725, "step": 1101, "token_acc": 0.8497429572735155 }, { "epoch": 0.7644151565074135, "grad_norm": 0.017791644120321304, "learning_rate": 1.601885215698556e-06, "loss": 0.494831919670105, "step": 1102, "token_acc": 0.8462255697367465 }, { "epoch": 0.7651088181739356, "grad_norm": 0.016009552295313256, "learning_rate": 1.5930110302808639e-06, "loss": 0.5026143789291382, "step": 1103, "token_acc": 0.846633475580844 }, { "epoch": 0.7658024798404578, "grad_norm": 0.014841244529652744, "learning_rate": 1.5841568338282132e-06, "loss": 0.4807819128036499, "step": 1104, "token_acc": 0.850722505005669 }, { "epoch": 0.76649614150698, "grad_norm": 0.015457030485057777, "learning_rate": 1.5753226782885882e-06, "loss": 0.47803711891174316, "step": 1105, "token_acc": 0.851727527753135 }, { "epoch": 0.7671898031735022, "grad_norm": 0.014725109856269207, "learning_rate": 1.5665086154923986e-06, "loss": 0.46997731924057007, "step": 1106, "token_acc": 0.8547809347434085 }, { "epoch": 0.7678834648400242, "grad_norm": 0.014997466630371197, "learning_rate": 1.5577146971521678e-06, "loss": 0.4692568778991699, "step": 1107, "token_acc": 0.8544776577084892 }, { "epoch": 0.7685771265065464, "grad_norm": 0.015342185368645867, "learning_rate": 1.5489409748622265e-06, "loss": 0.4760567247867584, "step": 1108, "token_acc": 0.8519501320806588 }, { "epoch": 0.7692707881730686, "grad_norm": 0.016241192319235636, "learning_rate": 1.540187500098419e-06, "loss": 0.5063484907150269, "step": 1109, "token_acc": 0.8455612059144774 }, { "epoch": 0.7699644498395908, "grad_norm": 0.01510663392576633, "learning_rate": 1.5314543242177948e-06, "loss": 0.4992586672306061, "step": 1110, "token_acc": 0.8458442778996073 }, { "epoch": 0.7706581115061129, "grad_norm": 0.014213261381244138, "learning_rate": 1.5227414984583084e-06, "loss": 0.4811577796936035, "step": 1111, "token_acc": 0.8511703518712672 }, { "epoch": 0.771351773172635, "grad_norm": 0.015340874792292916, "learning_rate": 1.514049073938522e-06, "loss": 0.47088056802749634, "step": 1112, "token_acc": 0.854507782494441 }, { "epoch": 0.7720454348391572, "grad_norm": 0.014292711647144511, "learning_rate": 1.5053771016572966e-06, "loss": 0.46076104044914246, "step": 1113, "token_acc": 0.8584035580524345 }, { "epoch": 0.7727390965056794, "grad_norm": 0.01510850358198487, "learning_rate": 1.4967256324935064e-06, "loss": 0.4902913272380829, "step": 1114, "token_acc": 0.84808451275203 }, { "epoch": 0.7734327581722015, "grad_norm": 0.01406616187742871, "learning_rate": 1.4880947172057259e-06, "loss": 0.4712037444114685, "step": 1115, "token_acc": 0.854129263913824 }, { "epoch": 0.7741264198387237, "grad_norm": 0.014459810216399666, "learning_rate": 1.4794844064319436e-06, "loss": 0.48807263374328613, "step": 1116, "token_acc": 0.8494061872626563 }, { "epoch": 0.7748200815052458, "grad_norm": 0.01539873373560859, "learning_rate": 1.4708947506892608e-06, "loss": 0.4796691834926605, "step": 1117, "token_acc": 0.851583532647001 }, { "epoch": 0.775513743171768, "grad_norm": 0.0143411953225552, "learning_rate": 1.4623258003735902e-06, "loss": 0.49138838052749634, "step": 1118, "token_acc": 0.8494747777906038 }, { "epoch": 0.7762074048382901, "grad_norm": 0.014988362878052813, "learning_rate": 1.453777605759369e-06, "loss": 0.4891347885131836, "step": 1119, "token_acc": 0.8491440623927012 }, { "epoch": 0.7769010665048123, "grad_norm": 0.014721478009786656, "learning_rate": 1.4452502169992594e-06, "loss": 0.46590349078178406, "step": 1120, "token_acc": 0.8558769708971499 }, { "epoch": 0.7775947281713345, "grad_norm": 0.014682912925220538, "learning_rate": 1.4367436841238491e-06, "loss": 0.4991234242916107, "step": 1121, "token_acc": 0.8469599628697755 }, { "epoch": 0.7782883898378565, "grad_norm": 0.014816631650523786, "learning_rate": 1.4282580570413752e-06, "loss": 0.4783792495727539, "step": 1122, "token_acc": 0.8517951231466732 }, { "epoch": 0.7789820515043787, "grad_norm": 0.01494424175377722, "learning_rate": 1.4197933855374058e-06, "loss": 0.48028987646102905, "step": 1123, "token_acc": 0.8519668246445498 }, { "epoch": 0.7796757131709009, "grad_norm": 0.015181019737389426, "learning_rate": 1.4113497192745706e-06, "loss": 0.4810061454772949, "step": 1124, "token_acc": 0.8503107302160077 }, { "epoch": 0.7803693748374231, "grad_norm": 0.01521911526936282, "learning_rate": 1.4029271077922597e-06, "loss": 0.4921690821647644, "step": 1125, "token_acc": 0.8487995953560044 }, { "epoch": 0.7810630365039452, "grad_norm": 0.016784287230982284, "learning_rate": 1.3945256005063284e-06, "loss": 0.49938973784446716, "step": 1126, "token_acc": 0.8465262588778846 }, { "epoch": 0.7817566981704673, "grad_norm": 0.023631365264367588, "learning_rate": 1.3861452467088198e-06, "loss": 0.4944407641887665, "step": 1127, "token_acc": 0.8477658236350092 }, { "epoch": 0.7824503598369895, "grad_norm": 0.015537279130983416, "learning_rate": 1.3777860955676614e-06, "loss": 0.4780152440071106, "step": 1128, "token_acc": 0.8536919650505397 }, { "epoch": 0.7831440215035117, "grad_norm": 0.03283702324814802, "learning_rate": 1.3694481961263901e-06, "loss": 0.4954927861690521, "step": 1129, "token_acc": 0.8476018054659987 }, { "epoch": 0.7838376831700338, "grad_norm": 0.014596448518650322, "learning_rate": 1.3611315973038552e-06, "loss": 0.4784379005432129, "step": 1130, "token_acc": 0.8515335658552371 }, { "epoch": 0.784531344836556, "grad_norm": 0.015068494292256731, "learning_rate": 1.3528363478939328e-06, "loss": 0.47476738691329956, "step": 1131, "token_acc": 0.8531402195666509 }, { "epoch": 0.7852250065030781, "grad_norm": 0.01459048430336981, "learning_rate": 1.3445624965652432e-06, "loss": 0.46084678173065186, "step": 1132, "token_acc": 0.8575732937969296 }, { "epoch": 0.7859186681696003, "grad_norm": 0.01827511891904174, "learning_rate": 1.336310091860864e-06, "loss": 0.47069159150123596, "step": 1133, "token_acc": 0.8556967503817069 }, { "epoch": 0.7866123298361224, "grad_norm": 0.014650956529059296, "learning_rate": 1.3280791821980394e-06, "loss": 0.48098528385162354, "step": 1134, "token_acc": 0.8505811408841122 }, { "epoch": 0.7873059915026446, "grad_norm": 0.015018409810654356, "learning_rate": 1.3198698158679101e-06, "loss": 0.4762737452983856, "step": 1135, "token_acc": 0.8523133005418092 }, { "epoch": 0.7879996531691668, "grad_norm": 0.015389516220403725, "learning_rate": 1.3116820410352116e-06, "loss": 0.4732992649078369, "step": 1136, "token_acc": 0.8532967237759197 }, { "epoch": 0.7886933148356889, "grad_norm": 0.015455452965593584, "learning_rate": 1.303515905738006e-06, "loss": 0.5015147924423218, "step": 1137, "token_acc": 0.8458254627880619 }, { "epoch": 0.789386976502211, "grad_norm": 0.013858981657726019, "learning_rate": 1.295371457887396e-06, "loss": 0.456808865070343, "step": 1138, "token_acc": 0.8591728525980912 }, { "epoch": 0.7900806381687332, "grad_norm": 0.015148535785668195, "learning_rate": 1.2872487452672394e-06, "loss": 0.4995688796043396, "step": 1139, "token_acc": 0.8454888516179953 }, { "epoch": 0.7907742998352554, "grad_norm": 0.014912837073042303, "learning_rate": 1.279147815533876e-06, "loss": 0.4939815402030945, "step": 1140, "token_acc": 0.8475750850018889 }, { "epoch": 0.7914679615017776, "grad_norm": 0.015685891351868937, "learning_rate": 1.2710687162158407e-06, "loss": 0.4841570258140564, "step": 1141, "token_acc": 0.8492170076021007 }, { "epoch": 0.7921616231682996, "grad_norm": 0.015150754464117774, "learning_rate": 1.2630114947135918e-06, "loss": 0.4800683856010437, "step": 1142, "token_acc": 0.8525944421669834 }, { "epoch": 0.7928552848348218, "grad_norm": 0.014493210389137251, "learning_rate": 1.2549761982992293e-06, "loss": 0.46789151430130005, "step": 1143, "token_acc": 0.8540130789389895 }, { "epoch": 0.793548946501344, "grad_norm": 0.01607973583830975, "learning_rate": 1.2469628741162136e-06, "loss": 0.49391451478004456, "step": 1144, "token_acc": 0.8471340750143109 }, { "epoch": 0.7942426081678661, "grad_norm": 0.014306781637042385, "learning_rate": 1.238971569179097e-06, "loss": 0.4993424117565155, "step": 1145, "token_acc": 0.8454431011139878 }, { "epoch": 0.7949362698343883, "grad_norm": 0.015615838220694957, "learning_rate": 1.2310023303732444e-06, "loss": 0.505712628364563, "step": 1146, "token_acc": 0.8443890928159413 }, { "epoch": 0.7956299315009104, "grad_norm": 0.015213463858169874, "learning_rate": 1.2230552044545512e-06, "loss": 0.5002025365829468, "step": 1147, "token_acc": 0.8463069669984284 }, { "epoch": 0.7963235931674326, "grad_norm": 0.014909808062567196, "learning_rate": 1.2151302380491864e-06, "loss": 0.506147027015686, "step": 1148, "token_acc": 0.8439727527064834 }, { "epoch": 0.7970172548339547, "grad_norm": 0.015913262983965205, "learning_rate": 1.2072274776532971e-06, "loss": 0.5109888911247253, "step": 1149, "token_acc": 0.8424944540300715 }, { "epoch": 0.7977109165004769, "grad_norm": 0.01679062637887947, "learning_rate": 1.199346969632753e-06, "loss": 0.48737025260925293, "step": 1150, "token_acc": 0.8490900909909009 }, { "epoch": 0.7984045781669991, "grad_norm": 0.016216214187482956, "learning_rate": 1.1914887602228682e-06, "loss": 0.5016616582870483, "step": 1151, "token_acc": 0.8456040028591851 }, { "epoch": 0.7990982398335212, "grad_norm": 0.01503296137154686, "learning_rate": 1.1836528955281245e-06, "loss": 0.49464359879493713, "step": 1152, "token_acc": 0.8478047944366904 }, { "epoch": 0.7997919015000433, "grad_norm": 0.018122698822594773, "learning_rate": 1.1758394215219133e-06, "loss": 0.5182386636734009, "step": 1153, "token_acc": 0.8421653433449177 }, { "epoch": 0.8004855631665655, "grad_norm": 0.015628309988055814, "learning_rate": 1.168048384046252e-06, "loss": 0.49610722064971924, "step": 1154, "token_acc": 0.847619164138201 }, { "epoch": 0.8011792248330877, "grad_norm": 0.0155082974366464, "learning_rate": 1.1602798288115264e-06, "loss": 0.49241721630096436, "step": 1155, "token_acc": 0.8474658279908068 }, { "epoch": 0.8018728864996099, "grad_norm": 0.014795295660218916, "learning_rate": 1.152533801396219e-06, "loss": 0.4911728501319885, "step": 1156, "token_acc": 0.8479717178980166 }, { "epoch": 0.8025665481661319, "grad_norm": 0.014729743427823152, "learning_rate": 1.1448103472466337e-06, "loss": 0.4774756133556366, "step": 1157, "token_acc": 0.8528099365517916 }, { "epoch": 0.8032602098326541, "grad_norm": 0.014980192653512032, "learning_rate": 1.1371095116766433e-06, "loss": 0.48005640506744385, "step": 1158, "token_acc": 0.8513314155049052 }, { "epoch": 0.8039538714991763, "grad_norm": 0.017558992311390877, "learning_rate": 1.1294313398674128e-06, "loss": 0.48691046237945557, "step": 1159, "token_acc": 0.8481006636378773 }, { "epoch": 0.8046475331656985, "grad_norm": 0.014682237086191173, "learning_rate": 1.121775876867139e-06, "loss": 0.47944995760917664, "step": 1160, "token_acc": 0.8522227679480575 }, { "epoch": 0.8053411948322206, "grad_norm": 0.0153717660901358, "learning_rate": 1.114143167590785e-06, "loss": 0.47564589977264404, "step": 1161, "token_acc": 0.8525025731862029 }, { "epoch": 0.8060348564987427, "grad_norm": 0.014901304751844589, "learning_rate": 1.106533256819814e-06, "loss": 0.4821814000606537, "step": 1162, "token_acc": 0.850415880845302 }, { "epoch": 0.8067285181652649, "grad_norm": 0.015263025970738996, "learning_rate": 1.0989461892019326e-06, "loss": 0.47467362880706787, "step": 1163, "token_acc": 0.8540419067706175 }, { "epoch": 0.8074221798317871, "grad_norm": 0.01534476403537387, "learning_rate": 1.0913820092508271e-06, "loss": 0.49298083782196045, "step": 1164, "token_acc": 0.8481125445509764 }, { "epoch": 0.8081158414983092, "grad_norm": 0.014532958702280037, "learning_rate": 1.083840761345894e-06, "loss": 0.4824800491333008, "step": 1165, "token_acc": 0.8497787864492357 }, { "epoch": 0.8088095031648314, "grad_norm": 0.01864744598547217, "learning_rate": 1.0763224897319935e-06, "loss": 0.48031681776046753, "step": 1166, "token_acc": 0.8499698280197123 }, { "epoch": 0.8095031648313535, "grad_norm": 0.014909697427245749, "learning_rate": 1.0688272385191778e-06, "loss": 0.4898441731929779, "step": 1167, "token_acc": 0.8479122522747102 }, { "epoch": 0.8101968264978756, "grad_norm": 0.014397455236124691, "learning_rate": 1.0613550516824405e-06, "loss": 0.4768611788749695, "step": 1168, "token_acc": 0.8534238356841202 }, { "epoch": 0.8108904881643978, "grad_norm": 0.015141087588342114, "learning_rate": 1.053905973061456e-06, "loss": 0.485501766204834, "step": 1169, "token_acc": 0.8501874494621549 }, { "epoch": 0.81158414983092, "grad_norm": 0.015703794734878186, "learning_rate": 1.046480046360317e-06, "loss": 0.48971396684646606, "step": 1170, "token_acc": 0.8493162157958338 }, { "epoch": 0.8122778114974422, "grad_norm": 0.01595851346487391, "learning_rate": 1.0390773151472867e-06, "loss": 0.49396079778671265, "step": 1171, "token_acc": 0.8486320696847013 }, { "epoch": 0.8129714731639642, "grad_norm": 0.015396316323977574, "learning_rate": 1.0316978228545393e-06, "loss": 0.5019892454147339, "step": 1172, "token_acc": 0.8455304488554859 }, { "epoch": 0.8136651348304864, "grad_norm": 0.01920997357138387, "learning_rate": 1.0243416127779039e-06, "loss": 0.5042543411254883, "step": 1173, "token_acc": 0.8446826323298784 }, { "epoch": 0.8143587964970086, "grad_norm": 0.015513943016403088, "learning_rate": 1.0170087280766134e-06, "loss": 0.498773455619812, "step": 1174, "token_acc": 0.8448185852257896 }, { "epoch": 0.8150524581635308, "grad_norm": 0.014822288333179381, "learning_rate": 1.0096992117730453e-06, "loss": 0.4866490960121155, "step": 1175, "token_acc": 0.849618033204999 }, { "epoch": 0.8157461198300529, "grad_norm": 0.015230762190034875, "learning_rate": 1.002413106752479e-06, "loss": 0.48216721415519714, "step": 1176, "token_acc": 0.8513523356317374 }, { "epoch": 0.816439781496575, "grad_norm": 0.015024267768031636, "learning_rate": 9.951504557628366e-07, "loss": 0.48413145542144775, "step": 1177, "token_acc": 0.8491725145498527 }, { "epoch": 0.8171334431630972, "grad_norm": 0.014540275063947823, "learning_rate": 9.879113014144326e-07, "loss": 0.4652285873889923, "step": 1178, "token_acc": 0.855804305843645 }, { "epoch": 0.8178271048296194, "grad_norm": 0.014494871688375256, "learning_rate": 9.806956861797283e-07, "loss": 0.48984047770500183, "step": 1179, "token_acc": 0.8472857550175412 }, { "epoch": 0.8185207664961415, "grad_norm": 0.015255883454622306, "learning_rate": 9.73503652393078e-07, "loss": 0.49657249450683594, "step": 1180, "token_acc": 0.8464348001920987 }, { "epoch": 0.8192144281626637, "grad_norm": 0.014647990704413876, "learning_rate": 9.663352422504834e-07, "loss": 0.4946049451828003, "step": 1181, "token_acc": 0.8476932033182942 }, { "epoch": 0.8199080898291858, "grad_norm": 0.014525996660795898, "learning_rate": 9.591904978093463e-07, "loss": 0.4867572486400604, "step": 1182, "token_acc": 0.8497771191896126 }, { "epoch": 0.820601751495708, "grad_norm": 0.014900292347780352, "learning_rate": 9.52069460988217e-07, "loss": 0.4857378602027893, "step": 1183, "token_acc": 0.8510748666972278 }, { "epoch": 0.8212954131622301, "grad_norm": 0.014793507637166517, "learning_rate": 9.449721735665562e-07, "loss": 0.4714547395706177, "step": 1184, "token_acc": 0.8540977016929916 }, { "epoch": 0.8219890748287523, "grad_norm": 0.014380593399707906, "learning_rate": 9.378986771844839e-07, "loss": 0.4791170656681061, "step": 1185, "token_acc": 0.8526920491352441 }, { "epoch": 0.8226827364952745, "grad_norm": 0.015486431061818252, "learning_rate": 9.308490133425368e-07, "loss": 0.49600014090538025, "step": 1186, "token_acc": 0.8468361617563561 }, { "epoch": 0.8233763981617965, "grad_norm": 0.017691126900802403, "learning_rate": 9.238232234014266e-07, "loss": 0.4796775281429291, "step": 1187, "token_acc": 0.8518590357502213 }, { "epoch": 0.8240700598283187, "grad_norm": 0.01441392905532477, "learning_rate": 9.168213485817923e-07, "loss": 0.4712483286857605, "step": 1188, "token_acc": 0.8537368396204145 }, { "epoch": 0.8247637214948409, "grad_norm": 0.015008403689929652, "learning_rate": 9.098434299639647e-07, "loss": 0.49830883741378784, "step": 1189, "token_acc": 0.8461854267348566 }, { "epoch": 0.8254573831613631, "grad_norm": 0.020320183119689694, "learning_rate": 9.028895084877232e-07, "loss": 0.4823826551437378, "step": 1190, "token_acc": 0.8506120815775324 }, { "epoch": 0.8261510448278852, "grad_norm": 0.01465439602213039, "learning_rate": 8.959596249520503e-07, "loss": 0.4661049246788025, "step": 1191, "token_acc": 0.8543276757535847 }, { "epoch": 0.8268447064944073, "grad_norm": 0.014933104959178626, "learning_rate": 8.890538200149024e-07, "loss": 0.48948362469673157, "step": 1192, "token_acc": 0.8500915389309689 }, { "epoch": 0.8275383681609295, "grad_norm": 0.018976513577772563, "learning_rate": 8.821721341929601e-07, "loss": 0.47134244441986084, "step": 1193, "token_acc": 0.8537956548547386 }, { "epoch": 0.8282320298274517, "grad_norm": 0.015833582590698184, "learning_rate": 8.753146078614006e-07, "loss": 0.49518120288848877, "step": 1194, "token_acc": 0.8457138479609816 }, { "epoch": 0.8289256914939738, "grad_norm": 0.015523745238077914, "learning_rate": 8.684812812536559e-07, "loss": 0.4917120337486267, "step": 1195, "token_acc": 0.8493331745653727 }, { "epoch": 0.829619353160496, "grad_norm": 0.014436966208534331, "learning_rate": 8.616721944611717e-07, "loss": 0.4802964925765991, "step": 1196, "token_acc": 0.852530352205794 }, { "epoch": 0.8303130148270181, "grad_norm": 0.015565829904664577, "learning_rate": 8.548873874331842e-07, "loss": 0.508421778678894, "step": 1197, "token_acc": 0.8439957333862526 }, { "epoch": 0.8310066764935403, "grad_norm": 0.016491346888324425, "learning_rate": 8.481268999764758e-07, "loss": 0.49222487211227417, "step": 1198, "token_acc": 0.8489801770799728 }, { "epoch": 0.8317003381600624, "grad_norm": 0.015763780459760294, "learning_rate": 8.413907717551456e-07, "loss": 0.509338915348053, "step": 1199, "token_acc": 0.8411023682103042 }, { "epoch": 0.8323939998265846, "grad_norm": 0.014124900856806647, "learning_rate": 8.346790422903767e-07, "loss": 0.48319417238235474, "step": 1200, "token_acc": 0.8516298013961144 }, { "epoch": 0.8330876614931068, "grad_norm": 0.01431052124216175, "learning_rate": 8.279917509602003e-07, "loss": 0.4816702604293823, "step": 1201, "token_acc": 0.8518500766871165 }, { "epoch": 0.8337813231596289, "grad_norm": 0.01477930917755017, "learning_rate": 8.21328936999271e-07, "loss": 0.48292064666748047, "step": 1202, "token_acc": 0.8512737689650701 }, { "epoch": 0.834474984826151, "grad_norm": 0.017784153781997426, "learning_rate": 8.146906394986343e-07, "loss": 0.48401451110839844, "step": 1203, "token_acc": 0.8517974541893971 }, { "epoch": 0.8351686464926732, "grad_norm": 0.014946812097329157, "learning_rate": 8.080768974054926e-07, "loss": 0.5003101229667664, "step": 1204, "token_acc": 0.8444172769462677 }, { "epoch": 0.8358623081591954, "grad_norm": 0.015315083543524797, "learning_rate": 8.014877495229845e-07, "loss": 0.490465372800827, "step": 1205, "token_acc": 0.8488576449912126 }, { "epoch": 0.8365559698257176, "grad_norm": 0.014779287786465261, "learning_rate": 7.949232345099494e-07, "loss": 0.5001910924911499, "step": 1206, "token_acc": 0.8444315720546013 }, { "epoch": 0.8372496314922396, "grad_norm": 0.014249777572259092, "learning_rate": 7.883833908807087e-07, "loss": 0.47642362117767334, "step": 1207, "token_acc": 0.8528539703499189 }, { "epoch": 0.8379432931587618, "grad_norm": 0.0143818004505075, "learning_rate": 7.818682570048336e-07, "loss": 0.48089301586151123, "step": 1208, "token_acc": 0.8514933034115597 }, { "epoch": 0.838636954825284, "grad_norm": 0.018538022400050994, "learning_rate": 7.753778711069199e-07, "loss": 0.49009865522384644, "step": 1209, "token_acc": 0.8480121530994109 }, { "epoch": 0.8393306164918061, "grad_norm": 0.01427801308464171, "learning_rate": 7.689122712663693e-07, "loss": 0.46379759907722473, "step": 1210, "token_acc": 0.856657031117157 }, { "epoch": 0.8400242781583283, "grad_norm": 0.01457306336567194, "learning_rate": 7.624714954171613e-07, "loss": 0.48636144399642944, "step": 1211, "token_acc": 0.8509876614725135 }, { "epoch": 0.8407179398248504, "grad_norm": 0.014340828274367421, "learning_rate": 7.560555813476312e-07, "loss": 0.4882701635360718, "step": 1212, "token_acc": 0.8511698632569703 }, { "epoch": 0.8414116014913726, "grad_norm": 0.014108839308463017, "learning_rate": 7.496645667002511e-07, "loss": 0.4870738089084625, "step": 1213, "token_acc": 0.8506556024206858 }, { "epoch": 0.8421052631578947, "grad_norm": 0.014103050631162585, "learning_rate": 7.432984889714029e-07, "loss": 0.4771355390548706, "step": 1214, "token_acc": 0.8538589942735261 }, { "epoch": 0.8427989248244169, "grad_norm": 0.014265390313865612, "learning_rate": 7.369573855111662e-07, "loss": 0.49214503169059753, "step": 1215, "token_acc": 0.8486220711424062 }, { "epoch": 0.8434925864909391, "grad_norm": 0.01605635631514752, "learning_rate": 7.306412935230956e-07, "loss": 0.5066254138946533, "step": 1216, "token_acc": 0.8457706653832271 }, { "epoch": 0.8441862481574612, "grad_norm": 0.014581091324433481, "learning_rate": 7.243502500639982e-07, "loss": 0.47143658995628357, "step": 1217, "token_acc": 0.8540629592755891 }, { "epoch": 0.8448799098239833, "grad_norm": 0.015604173099228949, "learning_rate": 7.180842920437254e-07, "loss": 0.4858618378639221, "step": 1218, "token_acc": 0.849934547185529 }, { "epoch": 0.8455735714905055, "grad_norm": 0.014428989976537304, "learning_rate": 7.11843456224946e-07, "loss": 0.47412240505218506, "step": 1219, "token_acc": 0.8514113623602675 }, { "epoch": 0.8462672331570277, "grad_norm": 0.014886650338331524, "learning_rate": 7.056277792229399e-07, "loss": 0.496471107006073, "step": 1220, "token_acc": 0.8467693660896441 }, { "epoch": 0.8469608948235499, "grad_norm": 0.015127874325550886, "learning_rate": 6.994372975053776e-07, "loss": 0.5043197274208069, "step": 1221, "token_acc": 0.8454506749480327 }, { "epoch": 0.8476545564900719, "grad_norm": 0.014460906126215962, "learning_rate": 6.932720473921045e-07, "loss": 0.471439927816391, "step": 1222, "token_acc": 0.8526715143850775 }, { "epoch": 0.8483482181565941, "grad_norm": 0.01526370047924029, "learning_rate": 6.87132065054939e-07, "loss": 0.4909140467643738, "step": 1223, "token_acc": 0.8485328012659534 }, { "epoch": 0.8490418798231163, "grad_norm": 0.01737513504736715, "learning_rate": 6.810173865174425e-07, "loss": 0.5112253427505493, "step": 1224, "token_acc": 0.8417460790152869 }, { "epoch": 0.8497355414896385, "grad_norm": 0.014275554014001653, "learning_rate": 6.749280476547249e-07, "loss": 0.47410711646080017, "step": 1225, "token_acc": 0.853473452386652 }, { "epoch": 0.8504292031561606, "grad_norm": 0.014228035409978076, "learning_rate": 6.688640841932247e-07, "loss": 0.4710024297237396, "step": 1226, "token_acc": 0.854679744460541 }, { "epoch": 0.8511228648226827, "grad_norm": 0.014851933636995702, "learning_rate": 6.628255317105003e-07, "loss": 0.48160475492477417, "step": 1227, "token_acc": 0.8500863214636146 }, { "epoch": 0.8518165264892049, "grad_norm": 0.015553267890868907, "learning_rate": 6.568124256350255e-07, "loss": 0.5038610696792603, "step": 1228, "token_acc": 0.8441106921454592 }, { "epoch": 0.852510188155727, "grad_norm": 0.01668400913794991, "learning_rate": 6.508248012459761e-07, "loss": 0.5057685971260071, "step": 1229, "token_acc": 0.8458969465648855 }, { "epoch": 0.8532038498222492, "grad_norm": 0.014256071688107297, "learning_rate": 6.448626936730268e-07, "loss": 0.4853042960166931, "step": 1230, "token_acc": 0.8503269212546195 }, { "epoch": 0.8538975114887714, "grad_norm": 0.015211786769963294, "learning_rate": 6.389261378961459e-07, "loss": 0.5060874223709106, "step": 1231, "token_acc": 0.8437729461985538 }, { "epoch": 0.8545911731552935, "grad_norm": 0.014695466022403377, "learning_rate": 6.330151687453845e-07, "loss": 0.47927096486091614, "step": 1232, "token_acc": 0.8526340133404742 }, { "epoch": 0.8552848348218156, "grad_norm": 0.015796372594204754, "learning_rate": 6.271298209006776e-07, "loss": 0.49978917837142944, "step": 1233, "token_acc": 0.8457178217821782 }, { "epoch": 0.8559784964883378, "grad_norm": 0.015058200451232072, "learning_rate": 6.212701288916401e-07, "loss": 0.4724929928779602, "step": 1234, "token_acc": 0.8536338000296839 }, { "epoch": 0.85667215815486, "grad_norm": 0.014863002422858701, "learning_rate": 6.154361270973585e-07, "loss": 0.4908880591392517, "step": 1235, "token_acc": 0.8478818067511364 }, { "epoch": 0.8573658198213822, "grad_norm": 0.015856202741638018, "learning_rate": 6.096278497462005e-07, "loss": 0.5080985426902771, "step": 1236, "token_acc": 0.8437236885455943 }, { "epoch": 0.8580594814879042, "grad_norm": 0.015347687083991337, "learning_rate": 6.038453309156006e-07, "loss": 0.4814985990524292, "step": 1237, "token_acc": 0.8509170946441673 }, { "epoch": 0.8587531431544264, "grad_norm": 0.014452973378931356, "learning_rate": 5.980886045318707e-07, "loss": 0.4812590479850769, "step": 1238, "token_acc": 0.8510823324793757 }, { "epoch": 0.8594468048209486, "grad_norm": 0.015676111189983338, "learning_rate": 5.923577043699968e-07, "loss": 0.4962432086467743, "step": 1239, "token_acc": 0.8452162938032073 }, { "epoch": 0.8601404664874708, "grad_norm": 0.014726951610677032, "learning_rate": 5.866526640534387e-07, "loss": 0.49580591917037964, "step": 1240, "token_acc": 0.8464266263237519 }, { "epoch": 0.8608341281539929, "grad_norm": 0.01486914114854171, "learning_rate": 5.809735170539394e-07, "loss": 0.453208327293396, "step": 1241, "token_acc": 0.8584187736706113 }, { "epoch": 0.861527789820515, "grad_norm": 0.01713180082715134, "learning_rate": 5.753202966913197e-07, "loss": 0.48552072048187256, "step": 1242, "token_acc": 0.8502116064000386 }, { "epoch": 0.8622214514870372, "grad_norm": 0.014708881228334672, "learning_rate": 5.696930361332914e-07, "loss": 0.5048816204071045, "step": 1243, "token_acc": 0.844431694722836 }, { "epoch": 0.8629151131535594, "grad_norm": 0.01489388446385596, "learning_rate": 5.640917683952591e-07, "loss": 0.5054924488067627, "step": 1244, "token_acc": 0.8453063462784784 }, { "epoch": 0.8636087748200815, "grad_norm": 0.014711077172958331, "learning_rate": 5.585165263401221e-07, "loss": 0.4894644320011139, "step": 1245, "token_acc": 0.8499077276560381 }, { "epoch": 0.8643024364866037, "grad_norm": 0.014952929713200884, "learning_rate": 5.529673426780907e-07, "loss": 0.4816892743110657, "step": 1246, "token_acc": 0.8504103819660277 }, { "epoch": 0.8649960981531258, "grad_norm": 0.014479600570672305, "learning_rate": 5.474442499664867e-07, "loss": 0.48173987865448, "step": 1247, "token_acc": 0.8516653898070118 }, { "epoch": 0.865689759819648, "grad_norm": 0.015059603646536898, "learning_rate": 5.419472806095554e-07, "loss": 0.46261098980903625, "step": 1248, "token_acc": 0.8562405943761775 }, { "epoch": 0.8663834214861701, "grad_norm": 0.014883058916993397, "learning_rate": 5.364764668582767e-07, "loss": 0.48334556818008423, "step": 1249, "token_acc": 0.8517173166132596 }, { "epoch": 0.8670770831526923, "grad_norm": 0.015170503821912797, "learning_rate": 5.310318408101706e-07, "loss": 0.4940692186355591, "step": 1250, "token_acc": 0.8469791289811499 }, { "epoch": 0.8677707448192145, "grad_norm": 0.015203082962900344, "learning_rate": 5.256134344091168e-07, "loss": 0.5064350962638855, "step": 1251, "token_acc": 0.8451549690557988 }, { "epoch": 0.8684644064857365, "grad_norm": 0.015128726122267322, "learning_rate": 5.202212794451622e-07, "loss": 0.510948657989502, "step": 1252, "token_acc": 0.8426370918052988 }, { "epoch": 0.8691580681522587, "grad_norm": 0.01478191158787928, "learning_rate": 5.148554075543316e-07, "loss": 0.4760981500148773, "step": 1253, "token_acc": 0.8532676005531173 }, { "epoch": 0.8698517298187809, "grad_norm": 0.015043459216382324, "learning_rate": 5.09515850218451e-07, "loss": 0.4991549253463745, "step": 1254, "token_acc": 0.8457918845716996 }, { "epoch": 0.8705453914853031, "grad_norm": 0.01674456383423164, "learning_rate": 5.042026387649535e-07, "loss": 0.4882330894470215, "step": 1255, "token_acc": 0.8489710586635097 }, { "epoch": 0.8712390531518251, "grad_norm": 0.015103009609797782, "learning_rate": 4.98915804366702e-07, "loss": 0.495825856924057, "step": 1256, "token_acc": 0.8461510661366101 }, { "epoch": 0.8719327148183473, "grad_norm": 0.015177830248476895, "learning_rate": 4.936553780418041e-07, "loss": 0.497866153717041, "step": 1257, "token_acc": 0.8457742210788012 }, { "epoch": 0.8726263764848695, "grad_norm": 0.01487457579616895, "learning_rate": 4.884213906534286e-07, "loss": 0.47706133127212524, "step": 1258, "token_acc": 0.8522521016431028 }, { "epoch": 0.8733200381513917, "grad_norm": 0.013917530701210093, "learning_rate": 4.832138729096258e-07, "loss": 0.47600480914115906, "step": 1259, "token_acc": 0.8542011461253533 }, { "epoch": 0.8740136998179138, "grad_norm": 0.01533028591700318, "learning_rate": 4.780328553631492e-07, "loss": 0.5045238733291626, "step": 1260, "token_acc": 0.8428912486633011 }, { "epoch": 0.874707361484436, "grad_norm": 0.01559143474714665, "learning_rate": 4.7287836841127255e-07, "loss": 0.49202632904052734, "step": 1261, "token_acc": 0.847623100608306 }, { "epoch": 0.8754010231509581, "grad_norm": 0.015847747010119914, "learning_rate": 4.677504422956153e-07, "loss": 0.4643852710723877, "step": 1262, "token_acc": 0.8559315794640228 }, { "epoch": 0.8760946848174803, "grad_norm": 0.014967159277258401, "learning_rate": 4.6264910710195955e-07, "loss": 0.47826164960861206, "step": 1263, "token_acc": 0.8513480745915363 }, { "epoch": 0.8767883464840024, "grad_norm": 0.01465810413550369, "learning_rate": 4.575743927600812e-07, "loss": 0.4848337769508362, "step": 1264, "token_acc": 0.8505064530926493 }, { "epoch": 0.8774820081505246, "grad_norm": 0.014936251125406362, "learning_rate": 4.5252632904356943e-07, "loss": 0.4958951473236084, "step": 1265, "token_acc": 0.8488746756773065 }, { "epoch": 0.8781756698170468, "grad_norm": 0.014237325098593096, "learning_rate": 4.475049455696501e-07, "loss": 0.4788554310798645, "step": 1266, "token_acc": 0.8522434392146531 }, { "epoch": 0.8788693314835689, "grad_norm": 0.015077470670510787, "learning_rate": 4.425102717990193e-07, "loss": 0.46219199895858765, "step": 1267, "token_acc": 0.8576673425558209 }, { "epoch": 0.879562993150091, "grad_norm": 0.015252990864331952, "learning_rate": 4.375423370356613e-07, "loss": 0.49129313230514526, "step": 1268, "token_acc": 0.8476937937696208 }, { "epoch": 0.8802566548166132, "grad_norm": 0.014410991405190574, "learning_rate": 4.326011704266847e-07, "loss": 0.4779866337776184, "step": 1269, "token_acc": 0.85320763956905 }, { "epoch": 0.8809503164831354, "grad_norm": 0.015044510130134591, "learning_rate": 4.276868009621488e-07, "loss": 0.49329057335853577, "step": 1270, "token_acc": 0.8466496291217354 }, { "epoch": 0.8816439781496576, "grad_norm": 0.015376697412873886, "learning_rate": 4.2279925747488903e-07, "loss": 0.4844633638858795, "step": 1271, "token_acc": 0.8500751970161824 }, { "epoch": 0.8823376398161796, "grad_norm": 0.01570771194014842, "learning_rate": 4.1793856864035367e-07, "loss": 0.49036291241645813, "step": 1272, "token_acc": 0.847940745974865 }, { "epoch": 0.8830313014827018, "grad_norm": 0.015077556844614955, "learning_rate": 4.131047629764345e-07, "loss": 0.48658254742622375, "step": 1273, "token_acc": 0.8501951695535497 }, { "epoch": 0.883724963149224, "grad_norm": 0.015661292232570276, "learning_rate": 4.08297868843297e-07, "loss": 0.4949260950088501, "step": 1274, "token_acc": 0.8476840122133438 }, { "epoch": 0.8844186248157461, "grad_norm": 0.015012837955363257, "learning_rate": 4.035179144432172e-07, "loss": 0.48250728845596313, "step": 1275, "token_acc": 0.8496135015118353 }, { "epoch": 0.8851122864822683, "grad_norm": 0.015558404357522922, "learning_rate": 3.987649278204103e-07, "loss": 0.4432808756828308, "step": 1276, "token_acc": 0.8640694425358435 }, { "epoch": 0.8858059481487904, "grad_norm": 0.015171069714704664, "learning_rate": 3.940389368608749e-07, "loss": 0.469568133354187, "step": 1277, "token_acc": 0.8553029386866611 }, { "epoch": 0.8864996098153126, "grad_norm": 0.01488045578321707, "learning_rate": 3.8933996929222394e-07, "loss": 0.4691130816936493, "step": 1278, "token_acc": 0.8542978316707485 }, { "epoch": 0.8871932714818347, "grad_norm": 0.01482097595792368, "learning_rate": 3.8466805268351894e-07, "loss": 0.48471006751060486, "step": 1279, "token_acc": 0.851341506305582 }, { "epoch": 0.8878869331483569, "grad_norm": 0.014826870715518012, "learning_rate": 3.8002321444511825e-07, "loss": 0.46322381496429443, "step": 1280, "token_acc": 0.8575789936553804 }, { "epoch": 0.888580594814879, "grad_norm": 0.014234400841443631, "learning_rate": 3.754054818285041e-07, "loss": 0.4925912916660309, "step": 1281, "token_acc": 0.8495663066342086 }, { "epoch": 0.8892742564814012, "grad_norm": 0.01563680502078851, "learning_rate": 3.708148819261331e-07, "loss": 0.518622636795044, "step": 1282, "token_acc": 0.8416090266239036 }, { "epoch": 0.8899679181479233, "grad_norm": 0.015063130089119567, "learning_rate": 3.6625144167127205e-07, "loss": 0.4791542887687683, "step": 1283, "token_acc": 0.8509066487575554 }, { "epoch": 0.8906615798144455, "grad_norm": 0.014426640470813285, "learning_rate": 3.617151878378389e-07, "loss": 0.4754985570907593, "step": 1284, "token_acc": 0.851685500633108 }, { "epoch": 0.8913552414809677, "grad_norm": 0.014913104596004262, "learning_rate": 3.5720614704024937e-07, "loss": 0.47861558198928833, "step": 1285, "token_acc": 0.8506273626143598 }, { "epoch": 0.8920489031474899, "grad_norm": 0.015453434956521841, "learning_rate": 3.527243457332591e-07, "loss": 0.4806508421897888, "step": 1286, "token_acc": 0.852040205815484 }, { "epoch": 0.8927425648140119, "grad_norm": 0.015637730041351938, "learning_rate": 3.482698102118071e-07, "loss": 0.48896628618240356, "step": 1287, "token_acc": 0.8474536977409134 }, { "epoch": 0.8934362264805341, "grad_norm": 0.0175159888091215, "learning_rate": 3.438425666108652e-07, "loss": 0.4794098734855652, "step": 1288, "token_acc": 0.8504329440193314 }, { "epoch": 0.8941298881470563, "grad_norm": 0.014840343838700202, "learning_rate": 3.3944264090527813e-07, "loss": 0.495941162109375, "step": 1289, "token_acc": 0.8465895454327269 }, { "epoch": 0.8948235498135785, "grad_norm": 0.015550352387680573, "learning_rate": 3.350700589096184e-07, "loss": 0.4850412607192993, "step": 1290, "token_acc": 0.8496087636932708 }, { "epoch": 0.8955172114801006, "grad_norm": 0.015624331451613432, "learning_rate": 3.3072484627803215e-07, "loss": 0.48219728469848633, "step": 1291, "token_acc": 0.8499042568861394 }, { "epoch": 0.8962108731466227, "grad_norm": 0.014413419208571279, "learning_rate": 3.264070285040849e-07, "loss": 0.4901720881462097, "step": 1292, "token_acc": 0.8485210208417886 }, { "epoch": 0.8969045348131449, "grad_norm": 0.016372895418834146, "learning_rate": 3.221166309206192e-07, "loss": 0.47804829478263855, "step": 1293, "token_acc": 0.8532075112000762 }, { "epoch": 0.897598196479667, "grad_norm": 0.015367505805447703, "learning_rate": 3.178536786995978e-07, "loss": 0.5003529787063599, "step": 1294, "token_acc": 0.847169879042389 }, { "epoch": 0.8982918581461892, "grad_norm": 0.013997340196095156, "learning_rate": 3.136181968519647e-07, "loss": 0.44829559326171875, "step": 1295, "token_acc": 0.8624173979930772 }, { "epoch": 0.8989855198127114, "grad_norm": 0.015056229853540565, "learning_rate": 3.0941021022749096e-07, "loss": 0.5001315474510193, "step": 1296, "token_acc": 0.8460694382798244 }, { "epoch": 0.8996791814792335, "grad_norm": 0.0153465221521075, "learning_rate": 3.052297435146323e-07, "loss": 0.48008930683135986, "step": 1297, "token_acc": 0.8513819337442219 }, { "epoch": 0.9003728431457556, "grad_norm": 0.020577385738220242, "learning_rate": 3.010768212403847e-07, "loss": 0.46722811460494995, "step": 1298, "token_acc": 0.8547109447861452 }, { "epoch": 0.9010665048122778, "grad_norm": 0.015291809077035649, "learning_rate": 2.969514677701385e-07, "loss": 0.48564088344573975, "step": 1299, "token_acc": 0.8492283683024102 }, { "epoch": 0.9017601664788, "grad_norm": 0.015497252073076185, "learning_rate": 2.928537073075377e-07, "loss": 0.49905911087989807, "step": 1300, "token_acc": 0.8441692338268906 }, { "epoch": 0.9024538281453222, "grad_norm": 0.014560529850063819, "learning_rate": 2.887835638943365e-07, "loss": 0.4709942936897278, "step": 1301, "token_acc": 0.85369873046875 }, { "epoch": 0.9031474898118442, "grad_norm": 0.015199560279924085, "learning_rate": 2.847410614102575e-07, "loss": 0.4938746988773346, "step": 1302, "token_acc": 0.8477448197011934 }, { "epoch": 0.9038411514783664, "grad_norm": 0.01579614217058597, "learning_rate": 2.8072622357285416e-07, "loss": 0.5009612441062927, "step": 1303, "token_acc": 0.8460111758411604 }, { "epoch": 0.9045348131448886, "grad_norm": 0.015086112080284153, "learning_rate": 2.7673907393736986e-07, "loss": 0.4836624562740326, "step": 1304, "token_acc": 0.8504585618160485 }, { "epoch": 0.9052284748114108, "grad_norm": 0.01532945794856661, "learning_rate": 2.727796358965984e-07, "loss": 0.4977220296859741, "step": 1305, "token_acc": 0.8464377733836659 }, { "epoch": 0.9059221364779328, "grad_norm": 0.014369509529550054, "learning_rate": 2.6884793268075084e-07, "loss": 0.4605836868286133, "step": 1306, "token_acc": 0.8566338341427988 }, { "epoch": 0.906615798144455, "grad_norm": 0.014545698885116032, "learning_rate": 2.6494398735731455e-07, "loss": 0.477038711309433, "step": 1307, "token_acc": 0.8525177700264902 }, { "epoch": 0.9073094598109772, "grad_norm": 0.014639811860293873, "learning_rate": 2.6106782283092104e-07, "loss": 0.47814515233039856, "step": 1308, "token_acc": 0.852556647989569 }, { "epoch": 0.9080031214774994, "grad_norm": 0.014460787571007367, "learning_rate": 2.572194618432117e-07, "loss": 0.4681873321533203, "step": 1309, "token_acc": 0.8546651010323402 }, { "epoch": 0.9086967831440215, "grad_norm": 0.01441730957640273, "learning_rate": 2.533989269726994e-07, "loss": 0.47597038745880127, "step": 1310, "token_acc": 0.8524427661080953 }, { "epoch": 0.9093904448105437, "grad_norm": 0.01511387265632127, "learning_rate": 2.4960624063464557e-07, "loss": 0.48148781061172485, "step": 1311, "token_acc": 0.8512144139231591 }, { "epoch": 0.9100841064770658, "grad_norm": 0.015721720728772853, "learning_rate": 2.4584142508091824e-07, "loss": 0.4872797131538391, "step": 1312, "token_acc": 0.849751116783663 }, { "epoch": 0.910777768143588, "grad_norm": 0.015317497391154938, "learning_rate": 2.421045023998675e-07, "loss": 0.5071355104446411, "step": 1313, "token_acc": 0.8453397580634989 }, { "epoch": 0.9114714298101101, "grad_norm": 0.014554465795721047, "learning_rate": 2.3839549451619702e-07, "loss": 0.4928591549396515, "step": 1314, "token_acc": 0.8485395928561902 }, { "epoch": 0.9121650914766323, "grad_norm": 0.014506088487726472, "learning_rate": 2.3471442319082826e-07, "loss": 0.4884806275367737, "step": 1315, "token_acc": 0.8499857281497973 }, { "epoch": 0.9128587531431545, "grad_norm": 0.014534496131311182, "learning_rate": 2.3106131002078258e-07, "loss": 0.4698955714702606, "step": 1316, "token_acc": 0.8542419154153731 }, { "epoch": 0.9135524148096765, "grad_norm": 0.015283008160014313, "learning_rate": 2.2743617643904704e-07, "loss": 0.4908464848995209, "step": 1317, "token_acc": 0.8480065133121893 }, { "epoch": 0.9142460764761987, "grad_norm": 0.014122567549765387, "learning_rate": 2.2383904371444997e-07, "loss": 0.4640657305717468, "step": 1318, "token_acc": 0.8567585431185426 }, { "epoch": 0.9149397381427209, "grad_norm": 0.014814553728184887, "learning_rate": 2.202699329515412e-07, "loss": 0.481908917427063, "step": 1319, "token_acc": 0.8501543747522196 }, { "epoch": 0.9156333998092431, "grad_norm": 0.014978212112858463, "learning_rate": 2.167288650904592e-07, "loss": 0.4914807677268982, "step": 1320, "token_acc": 0.8483299803821115 }, { "epoch": 0.9163270614757651, "grad_norm": 0.015109716969415203, "learning_rate": 2.13215860906818e-07, "loss": 0.4732681214809418, "step": 1321, "token_acc": 0.8541486782187001 }, { "epoch": 0.9170207231422873, "grad_norm": 0.015037799045012975, "learning_rate": 2.0973094101157887e-07, "loss": 0.48215681314468384, "step": 1322, "token_acc": 0.8513672809607731 }, { "epoch": 0.9177143848088095, "grad_norm": 0.015343938765583326, "learning_rate": 2.0627412585092931e-07, "loss": 0.4992937445640564, "step": 1323, "token_acc": 0.8460502021058935 }, { "epoch": 0.9184080464753317, "grad_norm": 0.014847180515524382, "learning_rate": 2.028454357061699e-07, "loss": 0.47934192419052124, "step": 1324, "token_acc": 0.8535156012037477 }, { "epoch": 0.9191017081418538, "grad_norm": 0.014687352743812434, "learning_rate": 1.9944489069358586e-07, "loss": 0.4909333288669586, "step": 1325, "token_acc": 0.8490175434445719 }, { "epoch": 0.919795369808376, "grad_norm": 0.015229153317636286, "learning_rate": 1.960725107643352e-07, "loss": 0.4906335473060608, "step": 1326, "token_acc": 0.8498776716632928 }, { "epoch": 0.9204890314748981, "grad_norm": 0.014986021551377345, "learning_rate": 1.9272831570433083e-07, "loss": 0.471751868724823, "step": 1327, "token_acc": 0.8539812942366026 }, { "epoch": 0.9211826931414203, "grad_norm": 0.01443728525837145, "learning_rate": 1.8941232513412234e-07, "loss": 0.4699688255786896, "step": 1328, "token_acc": 0.8533712525180709 }, { "epoch": 0.9218763548079424, "grad_norm": 0.014410772913505841, "learning_rate": 1.8612455850878285e-07, "loss": 0.4841550588607788, "step": 1329, "token_acc": 0.8506899563318777 }, { "epoch": 0.9225700164744646, "grad_norm": 0.014531737862082427, "learning_rate": 1.8286503511779463e-07, "loss": 0.47982338070869446, "step": 1330, "token_acc": 0.8527225309544701 }, { "epoch": 0.9232636781409868, "grad_norm": 0.014731007438108274, "learning_rate": 1.7963377408493465e-07, "loss": 0.4786407947540283, "step": 1331, "token_acc": 0.8518728102387517 }, { "epoch": 0.9239573398075089, "grad_norm": 0.01725912557527735, "learning_rate": 1.764307943681648e-07, "loss": 0.4747292995452881, "step": 1332, "token_acc": 0.8546226723293041 }, { "epoch": 0.924651001474031, "grad_norm": 0.014633200197014187, "learning_rate": 1.7325611475951741e-07, "loss": 0.48061102628707886, "step": 1333, "token_acc": 0.8514510927982802 }, { "epoch": 0.9253446631405532, "grad_norm": 0.015301101376536884, "learning_rate": 1.7010975388498718e-07, "loss": 0.5079939365386963, "step": 1334, "token_acc": 0.8433119065571048 }, { "epoch": 0.9260383248070754, "grad_norm": 0.014828773489127018, "learning_rate": 1.669917302044227e-07, "loss": 0.49215176701545715, "step": 1335, "token_acc": 0.8491163700849997 }, { "epoch": 0.9267319864735976, "grad_norm": 0.01606642416843906, "learning_rate": 1.6390206201141567e-07, "loss": 0.5003042221069336, "step": 1336, "token_acc": 0.8442950356017566 }, { "epoch": 0.9274256481401196, "grad_norm": 0.014478785432296564, "learning_rate": 1.6084076743319632e-07, "loss": 0.4747961163520813, "step": 1337, "token_acc": 0.8548352588789045 }, { "epoch": 0.9281193098066418, "grad_norm": 0.015974325829363277, "learning_rate": 1.5780786443052198e-07, "loss": 0.4960228502750397, "step": 1338, "token_acc": 0.8473106776793711 }, { "epoch": 0.928812971473164, "grad_norm": 0.01561546292341737, "learning_rate": 1.5480337079757936e-07, "loss": 0.4891878366470337, "step": 1339, "token_acc": 0.8489391030374637 }, { "epoch": 0.9295066331396861, "grad_norm": 0.015577976579026676, "learning_rate": 1.5182730416187397e-07, "loss": 0.495572566986084, "step": 1340, "token_acc": 0.8477673961456635 }, { "epoch": 0.9302002948062082, "grad_norm": 0.014460611873972696, "learning_rate": 1.4887968198412928e-07, "loss": 0.4690867066383362, "step": 1341, "token_acc": 0.8538900608015882 }, { "epoch": 0.9308939564727304, "grad_norm": 0.016470487096254392, "learning_rate": 1.459605215581833e-07, "loss": 0.48173683881759644, "step": 1342, "token_acc": 0.8520554580129563 }, { "epoch": 0.9315876181392526, "grad_norm": 0.017286681581844265, "learning_rate": 1.4306984001088874e-07, "loss": 0.4810333549976349, "step": 1343, "token_acc": 0.8512939221272554 }, { "epoch": 0.9322812798057747, "grad_norm": 0.014331374236315232, "learning_rate": 1.4020765430200978e-07, "loss": 0.47349411249160767, "step": 1344, "token_acc": 0.8528525407876961 }, { "epoch": 0.9329749414722969, "grad_norm": 0.01666788853682606, "learning_rate": 1.3737398122412593e-07, "loss": 0.5009215474128723, "step": 1345, "token_acc": 0.8448050381543697 }, { "epoch": 0.933668603138819, "grad_norm": 0.014610472571289595, "learning_rate": 1.3456883740253002e-07, "loss": 0.4920837879180908, "step": 1346, "token_acc": 0.8481767009300049 }, { "epoch": 0.9343622648053412, "grad_norm": 0.01471635899571153, "learning_rate": 1.3179223929513319e-07, "loss": 0.47758376598358154, "step": 1347, "token_acc": 0.8528451273376563 }, { "epoch": 0.9350559264718633, "grad_norm": 0.019827727289348823, "learning_rate": 1.2904420319236777e-07, "loss": 0.49908164143562317, "step": 1348, "token_acc": 0.846481797741765 }, { "epoch": 0.9357495881383855, "grad_norm": 0.016939610479986173, "learning_rate": 1.263247452170918e-07, "loss": 0.46709534525871277, "step": 1349, "token_acc": 0.8551113882161099 }, { "epoch": 0.9364432498049077, "grad_norm": 0.04642755474588351, "learning_rate": 1.2363388132449305e-07, "loss": 0.4587211608886719, "step": 1350, "token_acc": 0.8575965926376635 }, { "epoch": 0.9371369114714299, "grad_norm": 0.015377172520885182, "learning_rate": 1.2097162730199674e-07, "loss": 0.47107812762260437, "step": 1351, "token_acc": 0.852191177877715 }, { "epoch": 0.9378305731379519, "grad_norm": 0.015475350633723538, "learning_rate": 1.1833799876917295e-07, "loss": 0.4978245496749878, "step": 1352, "token_acc": 0.8457602159774206 }, { "epoch": 0.9385242348044741, "grad_norm": 0.016089792632347784, "learning_rate": 1.1573301117764501e-07, "loss": 0.5139383673667908, "step": 1353, "token_acc": 0.8410465371175098 }, { "epoch": 0.9392178964709963, "grad_norm": 0.015307529185995443, "learning_rate": 1.1315667981099732e-07, "loss": 0.5043618679046631, "step": 1354, "token_acc": 0.8447739897059573 }, { "epoch": 0.9399115581375185, "grad_norm": 0.015135432841054949, "learning_rate": 1.1060901978468818e-07, "loss": 0.49245384335517883, "step": 1355, "token_acc": 0.8483071502690702 }, { "epoch": 0.9406052198040405, "grad_norm": 0.016292413979470528, "learning_rate": 1.0809004604595885e-07, "loss": 0.4921651780605316, "step": 1356, "token_acc": 0.8485243667465576 }, { "epoch": 0.9412988814705627, "grad_norm": 0.015483976763928103, "learning_rate": 1.0559977337374739e-07, "loss": 0.5070492625236511, "step": 1357, "token_acc": 0.8439488892682094 }, { "epoch": 0.9419925431370849, "grad_norm": 0.04794598312464519, "learning_rate": 1.0313821637860044e-07, "loss": 0.4818376302719116, "step": 1358, "token_acc": 0.8502132861027266 }, { "epoch": 0.942686204803607, "grad_norm": 0.015182877615214488, "learning_rate": 1.0070538950258946e-07, "loss": 0.4973796606063843, "step": 1359, "token_acc": 0.8451398034646945 }, { "epoch": 0.9433798664701292, "grad_norm": 0.01490757535040365, "learning_rate": 9.830130701922458e-08, "loss": 0.49590805172920227, "step": 1360, "token_acc": 0.8470919097146459 }, { "epoch": 0.9440735281366514, "grad_norm": 0.014791479627432347, "learning_rate": 9.592598303337142e-08, "loss": 0.4972732663154602, "step": 1361, "token_acc": 0.8470632388815913 }, { "epoch": 0.9447671898031735, "grad_norm": 0.01489648302302562, "learning_rate": 9.357943148116721e-08, "loss": 0.486162006855011, "step": 1362, "token_acc": 0.8501204937785866 }, { "epoch": 0.9454608514696956, "grad_norm": 0.014737108620549045, "learning_rate": 9.126166612994092e-08, "loss": 0.4937983453273773, "step": 1363, "token_acc": 0.8492099056603774 }, { "epoch": 0.9461545131362178, "grad_norm": 0.014536012324557738, "learning_rate": 8.897270057813156e-08, "loss": 0.47822749614715576, "step": 1364, "token_acc": 0.8513465168206659 }, { "epoch": 0.94684817480274, "grad_norm": 0.014591251160030851, "learning_rate": 8.671254825520725e-08, "loss": 0.4670189619064331, "step": 1365, "token_acc": 0.8549076277204041 }, { "epoch": 0.9475418364692622, "grad_norm": 0.015467609533731646, "learning_rate": 8.448122242158963e-08, "loss": 0.48121193051338196, "step": 1366, "token_acc": 0.850860152135752 }, { "epoch": 0.9482354981357842, "grad_norm": 0.014427477321746785, "learning_rate": 8.227873616857118e-08, "loss": 0.48997747898101807, "step": 1367, "token_acc": 0.8485800984078242 }, { "epoch": 0.9489291598023064, "grad_norm": 0.014840088734240866, "learning_rate": 8.01051024182431e-08, "loss": 0.4922926127910614, "step": 1368, "token_acc": 0.8480007733484781 }, { "epoch": 0.9496228214688286, "grad_norm": 0.01584726074868091, "learning_rate": 7.796033392341807e-08, "loss": 0.4989640712738037, "step": 1369, "token_acc": 0.8450143237674022 }, { "epoch": 0.9503164831353508, "grad_norm": 0.015332896201615175, "learning_rate": 7.584444326755258e-08, "loss": 0.4667845666408539, "step": 1370, "token_acc": 0.855942046338981 }, { "epoch": 0.9510101448018728, "grad_norm": 0.015698498133796868, "learning_rate": 7.375744286467867e-08, "loss": 0.4873367249965668, "step": 1371, "token_acc": 0.8491714402618658 }, { "epoch": 0.951703806468395, "grad_norm": 0.014753190622026534, "learning_rate": 7.169934495932452e-08, "loss": 0.50493323802948, "step": 1372, "token_acc": 0.8455954312897191 }, { "epoch": 0.9523974681349172, "grad_norm": 0.014665804608250876, "learning_rate": 6.96701616264478e-08, "loss": 0.4893949031829834, "step": 1373, "token_acc": 0.8492953113534577 }, { "epoch": 0.9530911298014394, "grad_norm": 0.01478398084442406, "learning_rate": 6.766990477136415e-08, "loss": 0.48541027307510376, "step": 1374, "token_acc": 0.8509180013494797 }, { "epoch": 0.9537847914679615, "grad_norm": 0.015032665275717554, "learning_rate": 6.569858612967384e-08, "loss": 0.505965530872345, "step": 1375, "token_acc": 0.8448908514211083 }, { "epoch": 0.9544784531344837, "grad_norm": 0.015298977289435191, "learning_rate": 6.375621726719739e-08, "loss": 0.4691782593727112, "step": 1376, "token_acc": 0.853504726475909 }, { "epoch": 0.9551721148010058, "grad_norm": 0.01494458734553453, "learning_rate": 6.18428095799034e-08, "loss": 0.48082780838012695, "step": 1377, "token_acc": 0.8503453475363588 }, { "epoch": 0.955865776467528, "grad_norm": 0.01555165847642453, "learning_rate": 5.995837429384532e-08, "loss": 0.5019830465316772, "step": 1378, "token_acc": 0.8450040017238195 }, { "epoch": 0.9565594381340501, "grad_norm": 0.015729481630315584, "learning_rate": 5.8102922465094193e-08, "loss": 0.5026932954788208, "step": 1379, "token_acc": 0.846239995973222 }, { "epoch": 0.9572530998005723, "grad_norm": 0.015003483714720217, "learning_rate": 5.627646497967321e-08, "loss": 0.5210980176925659, "step": 1380, "token_acc": 0.8412733914858719 }, { "epoch": 0.9579467614670945, "grad_norm": 0.016698039748163137, "learning_rate": 5.447901255349497e-08, "loss": 0.480342298746109, "step": 1381, "token_acc": 0.8516656954156954 }, { "epoch": 0.9586404231336165, "grad_norm": 0.01440247758987613, "learning_rate": 5.271057573229765e-08, "loss": 0.47742992639541626, "step": 1382, "token_acc": 0.8521942554043235 }, { "epoch": 0.9593340848001387, "grad_norm": 0.015240924768466443, "learning_rate": 5.0971164891582823e-08, "loss": 0.49536165595054626, "step": 1383, "token_acc": 0.8461728425549779 }, { "epoch": 0.9600277464666609, "grad_norm": 0.015153211710252447, "learning_rate": 4.9260790236557164e-08, "loss": 0.48115891218185425, "step": 1384, "token_acc": 0.8502901353965183 }, { "epoch": 0.9607214081331831, "grad_norm": 0.014146687258688234, "learning_rate": 4.7579461802068074e-08, "loss": 0.4921720623970032, "step": 1385, "token_acc": 0.8492523397575857 }, { "epoch": 0.9614150697997051, "grad_norm": 0.016164446831598466, "learning_rate": 4.5927189452549814e-08, "loss": 0.47069627046585083, "step": 1386, "token_acc": 0.8529672950549311 }, { "epoch": 0.9621087314662273, "grad_norm": 0.014935401291788549, "learning_rate": 4.430398288196192e-08, "loss": 0.47943252325057983, "step": 1387, "token_acc": 0.8512735935495693 }, { "epoch": 0.9628023931327495, "grad_norm": 0.014224013576555223, "learning_rate": 4.270985161373364e-08, "loss": 0.45480039715766907, "step": 1388, "token_acc": 0.860259801328396 }, { "epoch": 0.9634960547992717, "grad_norm": 0.015072835795173245, "learning_rate": 4.1144805000709055e-08, "loss": 0.493346631526947, "step": 1389, "token_acc": 0.8475467289719626 }, { "epoch": 0.9641897164657938, "grad_norm": 0.015101623119300363, "learning_rate": 3.960885222508926e-08, "loss": 0.4650023579597473, "step": 1390, "token_acc": 0.8557908968003605 }, { "epoch": 0.964883378132316, "grad_norm": 0.016946945981899417, "learning_rate": 3.810200229838301e-08, "loss": 0.497998982667923, "step": 1391, "token_acc": 0.8451325783474013 }, { "epoch": 0.9655770397988381, "grad_norm": 0.01511236526857143, "learning_rate": 3.662426406134956e-08, "loss": 0.49606966972351074, "step": 1392, "token_acc": 0.8464102752653382 }, { "epoch": 0.9662707014653603, "grad_norm": 0.015066739156175818, "learning_rate": 3.5175646183948084e-08, "loss": 0.48968619108200073, "step": 1393, "token_acc": 0.848127011473625 }, { "epoch": 0.9669643631318824, "grad_norm": 0.01458227378987733, "learning_rate": 3.3756157165288906e-08, "loss": 0.48212528228759766, "step": 1394, "token_acc": 0.8527548581991974 }, { "epoch": 0.9676580247984046, "grad_norm": 0.014999040609097538, "learning_rate": 3.236580533358125e-08, "loss": 0.5115934610366821, "step": 1395, "token_acc": 0.8418283459388359 }, { "epoch": 0.9683516864649268, "grad_norm": 0.015270341022523943, "learning_rate": 3.100459884608387e-08, "loss": 0.4967975318431854, "step": 1396, "token_acc": 0.8470583889477183 }, { "epoch": 0.9690453481314489, "grad_norm": 0.014729942187699376, "learning_rate": 2.967254568906064e-08, "loss": 0.46931299567222595, "step": 1397, "token_acc": 0.8541807343935548 }, { "epoch": 0.969739009797971, "grad_norm": 0.014853550988070162, "learning_rate": 2.8369653677728925e-08, "loss": 0.46090883016586304, "step": 1398, "token_acc": 0.8573388368091072 }, { "epoch": 0.9704326714644932, "grad_norm": 0.014849960146649691, "learning_rate": 2.70959304562185e-08, "loss": 0.4885014295578003, "step": 1399, "token_acc": 0.8489035195065838 }, { "epoch": 0.9711263331310154, "grad_norm": 0.015497636780232632, "learning_rate": 2.5851383497523808e-08, "loss": 0.47734275460243225, "step": 1400, "token_acc": 0.850331409022878 }, { "epoch": 0.9718199947975376, "grad_norm": 0.015049983112338619, "learning_rate": 2.463602010346011e-08, "loss": 0.49931228160858154, "step": 1401, "token_acc": 0.8449159951794702 }, { "epoch": 0.9725136564640596, "grad_norm": 0.015000927836919788, "learning_rate": 2.344984740462186e-08, "loss": 0.47793424129486084, "step": 1402, "token_acc": 0.8521621753088822 }, { "epoch": 0.9732073181305818, "grad_norm": 0.014591976460812731, "learning_rate": 2.22928723603405e-08, "loss": 0.47189855575561523, "step": 1403, "token_acc": 0.8538755218306593 }, { "epoch": 0.973900979797104, "grad_norm": 0.015435261426315839, "learning_rate": 2.1165101758642836e-08, "loss": 0.4815305471420288, "step": 1404, "token_acc": 0.8509544157800008 }, { "epoch": 0.9745946414636261, "grad_norm": 0.015871419014219075, "learning_rate": 2.006654221621218e-08, "loss": 0.4949691891670227, "step": 1405, "token_acc": 0.8480433280694828 }, { "epoch": 0.9752883031301482, "grad_norm": 0.019748363500073866, "learning_rate": 1.8997200178350052e-08, "loss": 0.5090245008468628, "step": 1406, "token_acc": 0.8424693860713991 }, { "epoch": 0.9759819647966704, "grad_norm": 0.01482242562673695, "learning_rate": 1.7957081918935082e-08, "loss": 0.49824821949005127, "step": 1407, "token_acc": 0.8473565576214386 }, { "epoch": 0.9766756264631926, "grad_norm": 0.01500634511325981, "learning_rate": 1.6946193540391953e-08, "loss": 0.48481571674346924, "step": 1408, "token_acc": 0.8501672901465487 }, { "epoch": 0.9773692881297147, "grad_norm": 0.014444218235446933, "learning_rate": 1.596454097364919e-08, "loss": 0.45973730087280273, "step": 1409, "token_acc": 0.8574705911065553 }, { "epoch": 0.9780629497962369, "grad_norm": 0.014432420990005277, "learning_rate": 1.5012129978109192e-08, "loss": 0.4578933119773865, "step": 1410, "token_acc": 0.8583829421218225 }, { "epoch": 0.978756611462759, "grad_norm": 0.014633565977334286, "learning_rate": 1.4088966141612148e-08, "loss": 0.4911653399467468, "step": 1411, "token_acc": 0.8493817401614816 }, { "epoch": 0.9794502731292812, "grad_norm": 0.014338984853352528, "learning_rate": 1.3195054880404402e-08, "loss": 0.46972334384918213, "step": 1412, "token_acc": 0.8556775030100954 }, { "epoch": 0.9801439347958033, "grad_norm": 0.01561436053252338, "learning_rate": 1.2330401439104023e-08, "loss": 0.4709281027317047, "step": 1413, "token_acc": 0.8546195974300007 }, { "epoch": 0.9808375964623255, "grad_norm": 0.01530006222044117, "learning_rate": 1.1495010890675284e-08, "loss": 0.491136372089386, "step": 1414, "token_acc": 0.8488250719213829 }, { "epoch": 0.9815312581288477, "grad_norm": 0.016423442197015893, "learning_rate": 1.0688888136392572e-08, "loss": 0.4955834746360779, "step": 1415, "token_acc": 0.847953499636716 }, { "epoch": 0.9822249197953699, "grad_norm": 0.015420915373507317, "learning_rate": 9.912037905817073e-09, "loss": 0.5029826164245605, "step": 1416, "token_acc": 0.84404547160446 }, { "epoch": 0.9829185814618919, "grad_norm": 0.01549442339343571, "learning_rate": 9.164464756765135e-09, "loss": 0.499424546957016, "step": 1417, "token_acc": 0.8468828182254682 }, { "epoch": 0.9836122431284141, "grad_norm": 0.01863109433363208, "learning_rate": 8.446173075284391e-09, "loss": 0.47542858123779297, "step": 1418, "token_acc": 0.8511581883859277 }, { "epoch": 0.9843059047949363, "grad_norm": 0.014545337829642457, "learning_rate": 7.757167075624905e-09, "loss": 0.4957142770290375, "step": 1419, "token_acc": 0.8482006883071721 }, { "epoch": 0.9849995664614585, "grad_norm": 0.014833392819802536, "learning_rate": 7.097450800218619e-09, "loss": 0.4770868718624115, "step": 1420, "token_acc": 0.8514475616738443 }, { "epoch": 0.9856932281279805, "grad_norm": 0.01517353528820084, "learning_rate": 6.4670281196510535e-09, "loss": 0.4952002465724945, "step": 1421, "token_acc": 0.8469137028120424 }, { "epoch": 0.9863868897945027, "grad_norm": 0.015129728425794685, "learning_rate": 5.865902732642425e-09, "loss": 0.5073471069335938, "step": 1422, "token_acc": 0.8430885714988078 }, { "epoch": 0.9870805514610249, "grad_norm": 0.014883512322361193, "learning_rate": 5.294078166023786e-09, "loss": 0.4889640212059021, "step": 1423, "token_acc": 0.8518632220280546 }, { "epoch": 0.987774213127547, "grad_norm": 0.015640125330722485, "learning_rate": 4.75155777471592e-09, "loss": 0.47188061475753784, "step": 1424, "token_acc": 0.8528079088287793 }, { "epoch": 0.9884678747940692, "grad_norm": 0.01560323636400069, "learning_rate": 4.238344741712697e-09, "loss": 0.49999168515205383, "step": 1425, "token_acc": 0.8461373498839438 }, { "epoch": 0.9891615364605914, "grad_norm": 0.01753780606615252, "learning_rate": 3.754442078058862e-09, "loss": 0.4745776057243347, "step": 1426, "token_acc": 0.8529645719880071 }, { "epoch": 0.9898551981271135, "grad_norm": 0.015302888749905161, "learning_rate": 3.2998526228344986e-09, "loss": 0.5048530101776123, "step": 1427, "token_acc": 0.8445273339138211 }, { "epoch": 0.9905488597936356, "grad_norm": 0.015381032530294039, "learning_rate": 2.87457904313615e-09, "loss": 0.5048484206199646, "step": 1428, "token_acc": 0.8432450339251316 }, { "epoch": 0.9912425214601578, "grad_norm": 0.015113063905850908, "learning_rate": 2.4786238340651636e-09, "loss": 0.4660869240760803, "step": 1429, "token_acc": 0.8541533488523562 }, { "epoch": 0.99193618312668, "grad_norm": 0.01576395395115819, "learning_rate": 2.111989318709373e-09, "loss": 0.5095952749252319, "step": 1430, "token_acc": 0.8415655690352397 }, { "epoch": 0.9926298447932022, "grad_norm": 0.01562659336561702, "learning_rate": 1.7746776481303297e-09, "loss": 0.47950640320777893, "step": 1431, "token_acc": 0.8508502555014748 }, { "epoch": 0.9933235064597242, "grad_norm": 0.01733827024618465, "learning_rate": 1.4666908013510894e-09, "loss": 0.46759599447250366, "step": 1432, "token_acc": 0.8552837188219437 }, { "epoch": 0.9940171681262464, "grad_norm": 0.014912143987498664, "learning_rate": 1.1880305853462225e-09, "loss": 0.4982258081436157, "step": 1433, "token_acc": 0.84621320413404 }, { "epoch": 0.9947108297927686, "grad_norm": 0.014569605978683578, "learning_rate": 9.386986350273798e-10, "loss": 0.4764164686203003, "step": 1434, "token_acc": 0.8531788294128543 }, { "epoch": 0.9954044914592908, "grad_norm": 0.017434474187605378, "learning_rate": 7.186964132377406e-10, "loss": 0.5005350708961487, "step": 1435, "token_acc": 0.8454956327442616 }, { "epoch": 0.9960981531258128, "grad_norm": 0.014968413792381629, "learning_rate": 5.280252107398021e-10, "loss": 0.4855000376701355, "step": 1436, "token_acc": 0.8507664055765843 }, { "epoch": 0.996791814792335, "grad_norm": 0.015103253526298469, "learning_rate": 3.6668614621093725e-10, "loss": 0.49337685108184814, "step": 1437, "token_acc": 0.8479208816617867 }, { "epoch": 0.9974854764588572, "grad_norm": 0.015227388165579142, "learning_rate": 2.346801662350684e-10, "loss": 0.4743519425392151, "step": 1438, "token_acc": 0.8526354319180088 }, { "epoch": 0.9981791381253794, "grad_norm": 0.014221216784092876, "learning_rate": 1.320080452971162e-10, "loss": 0.477560818195343, "step": 1439, "token_acc": 0.8526144221665608 }, { "epoch": 0.9988727997919015, "grad_norm": 0.01434403605741942, "learning_rate": 5.8670385780224e-11, "loss": 0.4448873996734619, "step": 1440, "token_acc": 0.8607230115994972 }, { "epoch": 0.9995664614584236, "grad_norm": 0.014114901090207257, "learning_rate": 1.4667617959096546e-11, "loss": 0.49373555183410645, "step": 1441, "token_acc": 0.846836191602602 }, { "epoch": 1.0, "grad_norm": 0.020185433880738755, "learning_rate": 0.0, "loss": 0.500306248664856, "step": 1442, "token_acc": 0.8468703054581873 } ], "logging_steps": 1, "max_steps": 1442, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1442753329364992.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }