{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.20799833601331189, "eval_steps": 500, "global_step": 26000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.999936000511996e-05, "grad_norm": 4.414881453815827, "learning_rate": 7.199424046076314e-09, "loss": 0.5441, "step": 10 }, { "epoch": 0.00015999872001023991, "grad_norm": 4.004957247533114, "learning_rate": 1.519878409727222e-08, "loss": 0.5353, "step": 20 }, { "epoch": 0.00023999808001535987, "grad_norm": 4.889790729834478, "learning_rate": 2.3198144148468124e-08, "loss": 0.537, "step": 30 }, { "epoch": 0.00031999744002047983, "grad_norm": 3.7138301639437508, "learning_rate": 3.119750419966403e-08, "loss": 0.5488, "step": 40 }, { "epoch": 0.0003999968000255998, "grad_norm": 4.208028943214464, "learning_rate": 3.919686425085993e-08, "loss": 0.5557, "step": 50 }, { "epoch": 0.00047999616003071974, "grad_norm": 4.326896533998126, "learning_rate": 4.719622430205584e-08, "loss": 0.5061, "step": 60 }, { "epoch": 0.0005599955200358397, "grad_norm": 7.1723877700496415, "learning_rate": 5.519558435325175e-08, "loss": 0.5193, "step": 70 }, { "epoch": 0.0006399948800409597, "grad_norm": 3.892238785520058, "learning_rate": 6.319494440444764e-08, "loss": 0.5311, "step": 80 }, { "epoch": 0.0007199942400460796, "grad_norm": 3.4517472540795513, "learning_rate": 7.119430445564356e-08, "loss": 0.5232, "step": 90 }, { "epoch": 0.0007999936000511996, "grad_norm": 4.094057558667446, "learning_rate": 7.919366450683946e-08, "loss": 0.5225, "step": 100 }, { "epoch": 0.0008799929600563195, "grad_norm": 3.4395682865399144, "learning_rate": 8.719302455803536e-08, "loss": 0.5371, "step": 110 }, { "epoch": 0.0009599923200614395, "grad_norm": 3.7029712275218682, "learning_rate": 9.519238460923127e-08, "loss": 0.5305, "step": 120 }, { "epoch": 0.0010399916800665594, "grad_norm": 3.493027269180126, "learning_rate": 1.0319174466042718e-07, "loss": 0.5167, "step": 130 }, { "epoch": 0.0011199910400716794, "grad_norm": 3.480790725115363, "learning_rate": 1.1119110471162308e-07, "loss": 0.5543, "step": 140 }, { "epoch": 0.0011999904000767993, "grad_norm": 3.0350778792458564, "learning_rate": 1.1919046476281897e-07, "loss": 0.5188, "step": 150 }, { "epoch": 0.0012799897600819193, "grad_norm": 2.7703962024693216, "learning_rate": 1.271898248140149e-07, "loss": 0.5176, "step": 160 }, { "epoch": 0.0013599891200870393, "grad_norm": 2.7312812150288277, "learning_rate": 1.351891848652108e-07, "loss": 0.5206, "step": 170 }, { "epoch": 0.0014399884800921593, "grad_norm": 2.6772442085163837, "learning_rate": 1.431885449164067e-07, "loss": 0.5187, "step": 180 }, { "epoch": 0.0015199878400972793, "grad_norm": 2.6017371604025503, "learning_rate": 1.5118790496760262e-07, "loss": 0.4858, "step": 190 }, { "epoch": 0.0015999872001023993, "grad_norm": 2.361921882047865, "learning_rate": 1.5918726501879854e-07, "loss": 0.4965, "step": 200 }, { "epoch": 0.0016799865601075192, "grad_norm": 1.9949603945902301, "learning_rate": 1.6718662506999443e-07, "loss": 0.49, "step": 210 }, { "epoch": 0.001759985920112639, "grad_norm": 1.689017850332906, "learning_rate": 1.7518598512119031e-07, "loss": 0.4474, "step": 220 }, { "epoch": 0.001839985280117759, "grad_norm": 1.7526108484730667, "learning_rate": 1.8318534517238623e-07, "loss": 0.4758, "step": 230 }, { "epoch": 0.001919984640122879, "grad_norm": 1.6619250196852287, "learning_rate": 1.9118470522358212e-07, "loss": 0.4551, "step": 240 }, { "epoch": 0.001999984000127999, "grad_norm": 1.4244411983222107, "learning_rate": 1.9918406527477803e-07, "loss": 0.4666, "step": 250 }, { "epoch": 0.0020799833601331187, "grad_norm": 1.4091371713945773, "learning_rate": 2.0718342532597392e-07, "loss": 0.4744, "step": 260 }, { "epoch": 0.002159982720138239, "grad_norm": 1.4037698231377602, "learning_rate": 2.1518278537716986e-07, "loss": 0.4577, "step": 270 }, { "epoch": 0.0022399820801433587, "grad_norm": 1.1233973407058262, "learning_rate": 2.2318214542836575e-07, "loss": 0.4442, "step": 280 }, { "epoch": 0.002319981440148479, "grad_norm": 1.0934034827825632, "learning_rate": 2.3118150547956164e-07, "loss": 0.4746, "step": 290 }, { "epoch": 0.0023999808001535987, "grad_norm": 0.9547032856231036, "learning_rate": 2.3918086553075753e-07, "loss": 0.4631, "step": 300 }, { "epoch": 0.002479980160158719, "grad_norm": 1.011447249716743, "learning_rate": 2.4718022558195345e-07, "loss": 0.4359, "step": 310 }, { "epoch": 0.0025599795201638386, "grad_norm": 0.7506687829382205, "learning_rate": 2.5517958563314936e-07, "loss": 0.4554, "step": 320 }, { "epoch": 0.002639978880168959, "grad_norm": 0.8268141903301145, "learning_rate": 2.631789456843453e-07, "loss": 0.4444, "step": 330 }, { "epoch": 0.0027199782401740786, "grad_norm": 0.9120933866995852, "learning_rate": 2.711783057355412e-07, "loss": 0.4662, "step": 340 }, { "epoch": 0.0027999776001791984, "grad_norm": 0.9481570868182263, "learning_rate": 2.791776657867371e-07, "loss": 0.4677, "step": 350 }, { "epoch": 0.0028799769601843186, "grad_norm": 0.8010445040150771, "learning_rate": 2.8717702583793297e-07, "loss": 0.4196, "step": 360 }, { "epoch": 0.0029599763201894383, "grad_norm": 0.8670839473253033, "learning_rate": 2.951763858891289e-07, "loss": 0.4492, "step": 370 }, { "epoch": 0.0030399756801945585, "grad_norm": 0.8536232676044045, "learning_rate": 3.031757459403248e-07, "loss": 0.4375, "step": 380 }, { "epoch": 0.0031199750401996783, "grad_norm": 0.8135677817452803, "learning_rate": 3.111751059915207e-07, "loss": 0.438, "step": 390 }, { "epoch": 0.0031999744002047985, "grad_norm": 0.9667962048008838, "learning_rate": 3.191744660427166e-07, "loss": 0.4348, "step": 400 }, { "epoch": 0.0032799737602099183, "grad_norm": 0.885059264680228, "learning_rate": 3.271738260939125e-07, "loss": 0.4295, "step": 410 }, { "epoch": 0.0033599731202150385, "grad_norm": 0.7112007856245484, "learning_rate": 3.3517318614510846e-07, "loss": 0.4412, "step": 420 }, { "epoch": 0.0034399724802201583, "grad_norm": 0.9486242885147141, "learning_rate": 3.431725461963043e-07, "loss": 0.4357, "step": 430 }, { "epoch": 0.003519971840225278, "grad_norm": 0.7792187180449996, "learning_rate": 3.5117190624750024e-07, "loss": 0.4053, "step": 440 }, { "epoch": 0.0035999712002303982, "grad_norm": 0.8013118911013118, "learning_rate": 3.591712662986961e-07, "loss": 0.4526, "step": 450 }, { "epoch": 0.003679970560235518, "grad_norm": 0.9290353407648725, "learning_rate": 3.6717062634989207e-07, "loss": 0.4394, "step": 460 }, { "epoch": 0.003759969920240638, "grad_norm": 0.7440275824040646, "learning_rate": 3.7516998640108793e-07, "loss": 0.4354, "step": 470 }, { "epoch": 0.003839969280245758, "grad_norm": 0.6661735557442742, "learning_rate": 3.8316934645228385e-07, "loss": 0.4132, "step": 480 }, { "epoch": 0.003919968640250878, "grad_norm": 0.80714967397643, "learning_rate": 3.9116870650347976e-07, "loss": 0.4247, "step": 490 }, { "epoch": 0.003999968000255998, "grad_norm": 0.7269691108846945, "learning_rate": 3.9916806655467563e-07, "loss": 0.4308, "step": 500 }, { "epoch": 0.004079967360261118, "grad_norm": 0.8112092339496537, "learning_rate": 4.071674266058716e-07, "loss": 0.4215, "step": 510 }, { "epoch": 0.0041599667202662375, "grad_norm": 0.8465738727092779, "learning_rate": 4.1516678665706746e-07, "loss": 0.4438, "step": 520 }, { "epoch": 0.004239966080271358, "grad_norm": 0.7947114246478345, "learning_rate": 4.2316614670826337e-07, "loss": 0.402, "step": 530 }, { "epoch": 0.004319965440276478, "grad_norm": 0.771445835897062, "learning_rate": 4.311655067594593e-07, "loss": 0.4434, "step": 540 }, { "epoch": 0.004399964800281598, "grad_norm": 0.7143580000434352, "learning_rate": 4.3916486681065515e-07, "loss": 0.4227, "step": 550 }, { "epoch": 0.004479964160286717, "grad_norm": 0.7487773075552359, "learning_rate": 4.471642268618511e-07, "loss": 0.438, "step": 560 }, { "epoch": 0.004559963520291838, "grad_norm": 0.7373810653849879, "learning_rate": 4.55163586913047e-07, "loss": 0.4288, "step": 570 }, { "epoch": 0.004639962880296958, "grad_norm": 0.7104505393064982, "learning_rate": 4.631629469642429e-07, "loss": 0.4462, "step": 580 }, { "epoch": 0.004719962240302078, "grad_norm": 0.6996674322090365, "learning_rate": 4.7116230701543876e-07, "loss": 0.4263, "step": 590 }, { "epoch": 0.004799961600307197, "grad_norm": 0.779468888606517, "learning_rate": 4.791616670666347e-07, "loss": 0.4378, "step": 600 }, { "epoch": 0.0048799609603123175, "grad_norm": 0.6348456379271163, "learning_rate": 4.871610271178306e-07, "loss": 0.4098, "step": 610 }, { "epoch": 0.004959960320317438, "grad_norm": 0.6384341588537313, "learning_rate": 4.951603871690265e-07, "loss": 0.4489, "step": 620 }, { "epoch": 0.005039959680322557, "grad_norm": 0.6779051066808497, "learning_rate": 5.031597472202224e-07, "loss": 0.4522, "step": 630 }, { "epoch": 0.005119959040327677, "grad_norm": 0.7405337420145796, "learning_rate": 5.111591072714183e-07, "loss": 0.4248, "step": 640 }, { "epoch": 0.0051999584003327975, "grad_norm": 0.6915688533513078, "learning_rate": 5.191584673226143e-07, "loss": 0.4129, "step": 650 }, { "epoch": 0.005279957760337918, "grad_norm": 0.6044444399034763, "learning_rate": 5.271578273738101e-07, "loss": 0.4237, "step": 660 }, { "epoch": 0.005359957120343037, "grad_norm": 0.7401624173362278, "learning_rate": 5.351571874250061e-07, "loss": 0.4216, "step": 670 }, { "epoch": 0.005439956480348157, "grad_norm": 0.7496141168534413, "learning_rate": 5.43156547476202e-07, "loss": 0.4268, "step": 680 }, { "epoch": 0.005519955840353277, "grad_norm": 0.730614366570662, "learning_rate": 5.511559075273978e-07, "loss": 0.4589, "step": 690 }, { "epoch": 0.005599955200358397, "grad_norm": 0.7836211351094068, "learning_rate": 5.591552675785937e-07, "loss": 0.4176, "step": 700 }, { "epoch": 0.005679954560363517, "grad_norm": 0.6648189022606215, "learning_rate": 5.671546276297896e-07, "loss": 0.4274, "step": 710 }, { "epoch": 0.005759953920368637, "grad_norm": 0.6607901995290275, "learning_rate": 5.751539876809856e-07, "loss": 0.428, "step": 720 }, { "epoch": 0.005839953280373757, "grad_norm": 0.8422142881250906, "learning_rate": 5.831533477321815e-07, "loss": 0.4206, "step": 730 }, { "epoch": 0.005919952640378877, "grad_norm": 0.8636103630308042, "learning_rate": 5.911527077833774e-07, "loss": 0.4384, "step": 740 }, { "epoch": 0.005999952000383997, "grad_norm": 0.7020283376881055, "learning_rate": 5.991520678345733e-07, "loss": 0.4336, "step": 750 }, { "epoch": 0.006079951360389117, "grad_norm": 0.6637768636661737, "learning_rate": 6.071514278857692e-07, "loss": 0.421, "step": 760 }, { "epoch": 0.006159950720394236, "grad_norm": 0.6720228370970702, "learning_rate": 6.151507879369651e-07, "loss": 0.4425, "step": 770 }, { "epoch": 0.006239950080399357, "grad_norm": 0.7915308024009126, "learning_rate": 6.231501479881609e-07, "loss": 0.4665, "step": 780 }, { "epoch": 0.006319949440404477, "grad_norm": 0.7005483141808596, "learning_rate": 6.311495080393569e-07, "loss": 0.4371, "step": 790 }, { "epoch": 0.006399948800409597, "grad_norm": 0.7085842808188052, "learning_rate": 6.391488680905528e-07, "loss": 0.4481, "step": 800 }, { "epoch": 0.006479948160414716, "grad_norm": 0.797364113205394, "learning_rate": 6.471482281417488e-07, "loss": 0.43, "step": 810 }, { "epoch": 0.0065599475204198366, "grad_norm": 0.7769920944189029, "learning_rate": 6.551475881929446e-07, "loss": 0.4421, "step": 820 }, { "epoch": 0.006639946880424957, "grad_norm": 0.7190098449382926, "learning_rate": 6.631469482441405e-07, "loss": 0.4524, "step": 830 }, { "epoch": 0.006719946240430077, "grad_norm": 0.8999089940382301, "learning_rate": 6.711463082953363e-07, "loss": 0.4327, "step": 840 }, { "epoch": 0.006799945600435196, "grad_norm": 0.8071258672281636, "learning_rate": 6.791456683465323e-07, "loss": 0.3999, "step": 850 }, { "epoch": 0.0068799449604403165, "grad_norm": 0.9718244104757909, "learning_rate": 6.871450283977283e-07, "loss": 0.4109, "step": 860 }, { "epoch": 0.006959944320445437, "grad_norm": 0.7260805899057289, "learning_rate": 6.951443884489241e-07, "loss": 0.4254, "step": 870 }, { "epoch": 0.007039943680450556, "grad_norm": 0.6793848246836304, "learning_rate": 7.031437485001201e-07, "loss": 0.4168, "step": 880 }, { "epoch": 0.007119943040455676, "grad_norm": 0.8220405330990818, "learning_rate": 7.11143108551316e-07, "loss": 0.4368, "step": 890 }, { "epoch": 0.0071999424004607964, "grad_norm": 0.8650021092848164, "learning_rate": 7.191424686025118e-07, "loss": 0.4336, "step": 900 }, { "epoch": 0.007279941760465917, "grad_norm": 0.6667689577948444, "learning_rate": 7.271418286537078e-07, "loss": 0.4325, "step": 910 }, { "epoch": 0.007359941120471036, "grad_norm": 0.6443211014509481, "learning_rate": 7.351411887049036e-07, "loss": 0.443, "step": 920 }, { "epoch": 0.007439940480476156, "grad_norm": 0.6067913066251965, "learning_rate": 7.431405487560996e-07, "loss": 0.4095, "step": 930 }, { "epoch": 0.007519939840481276, "grad_norm": 0.6935512372810877, "learning_rate": 7.511399088072954e-07, "loss": 0.4272, "step": 940 }, { "epoch": 0.007599939200486396, "grad_norm": 0.6599113657719949, "learning_rate": 7.591392688584914e-07, "loss": 0.3942, "step": 950 }, { "epoch": 0.007679938560491516, "grad_norm": 0.611225655896161, "learning_rate": 7.671386289096873e-07, "loss": 0.4451, "step": 960 }, { "epoch": 0.007759937920496636, "grad_norm": 0.757015652343119, "learning_rate": 7.751379889608831e-07, "loss": 0.4059, "step": 970 }, { "epoch": 0.007839937280501755, "grad_norm": 0.8456384517410107, "learning_rate": 7.831373490120791e-07, "loss": 0.4278, "step": 980 }, { "epoch": 0.007919936640506876, "grad_norm": 0.7996016208371923, "learning_rate": 7.911367090632751e-07, "loss": 0.4536, "step": 990 }, { "epoch": 0.007999936000511996, "grad_norm": 0.6228736248515305, "learning_rate": 7.991360691144709e-07, "loss": 0.3969, "step": 1000 }, { "epoch": 0.008079935360517116, "grad_norm": 0.6440105424946402, "learning_rate": 8.071354291656668e-07, "loss": 0.4307, "step": 1010 }, { "epoch": 0.008159934720522236, "grad_norm": 0.6636370756757088, "learning_rate": 8.151347892168628e-07, "loss": 0.4019, "step": 1020 }, { "epoch": 0.008239934080527356, "grad_norm": 0.6534728964626215, "learning_rate": 8.231341492680586e-07, "loss": 0.4357, "step": 1030 }, { "epoch": 0.008319933440532475, "grad_norm": 0.6854238349847609, "learning_rate": 8.311335093192545e-07, "loss": 0.4156, "step": 1040 }, { "epoch": 0.008399932800537595, "grad_norm": 0.7692797686595098, "learning_rate": 8.391328693704504e-07, "loss": 0.4364, "step": 1050 }, { "epoch": 0.008479932160542715, "grad_norm": 0.6892139807862941, "learning_rate": 8.471322294216464e-07, "loss": 0.4595, "step": 1060 }, { "epoch": 0.008559931520547836, "grad_norm": 0.6786670330601587, "learning_rate": 8.551315894728423e-07, "loss": 0.4207, "step": 1070 }, { "epoch": 0.008639930880552956, "grad_norm": 0.6118366771253826, "learning_rate": 8.631309495240381e-07, "loss": 0.4186, "step": 1080 }, { "epoch": 0.008719930240558076, "grad_norm": 0.7157718863255984, "learning_rate": 8.711303095752341e-07, "loss": 0.4412, "step": 1090 }, { "epoch": 0.008799929600563196, "grad_norm": 0.7102096103999658, "learning_rate": 8.791296696264299e-07, "loss": 0.4205, "step": 1100 }, { "epoch": 0.008879928960568315, "grad_norm": 0.6326552697725677, "learning_rate": 8.871290296776258e-07, "loss": 0.4161, "step": 1110 }, { "epoch": 0.008959928320573435, "grad_norm": 0.7455469536196851, "learning_rate": 8.951283897288219e-07, "loss": 0.4259, "step": 1120 }, { "epoch": 0.009039927680578555, "grad_norm": 0.8219540514047248, "learning_rate": 9.031277497800177e-07, "loss": 0.4423, "step": 1130 }, { "epoch": 0.009119927040583675, "grad_norm": 0.6428846698552027, "learning_rate": 9.111271098312136e-07, "loss": 0.432, "step": 1140 }, { "epoch": 0.009199926400588795, "grad_norm": 0.7959015617833209, "learning_rate": 9.191264698824094e-07, "loss": 0.4374, "step": 1150 }, { "epoch": 0.009279925760593916, "grad_norm": 0.722690696817927, "learning_rate": 9.271258299336054e-07, "loss": 0.4177, "step": 1160 }, { "epoch": 0.009359925120599036, "grad_norm": 0.7583146312532495, "learning_rate": 9.351251899848013e-07, "loss": 0.4349, "step": 1170 }, { "epoch": 0.009439924480604156, "grad_norm": 0.8841341873240007, "learning_rate": 9.431245500359971e-07, "loss": 0.4323, "step": 1180 }, { "epoch": 0.009519923840609274, "grad_norm": 0.6844990736142492, "learning_rate": 9.511239100871932e-07, "loss": 0.4195, "step": 1190 }, { "epoch": 0.009599923200614395, "grad_norm": 0.7129122863481299, "learning_rate": 9.59123270138389e-07, "loss": 0.4088, "step": 1200 }, { "epoch": 0.009679922560619515, "grad_norm": 0.6532760834208395, "learning_rate": 9.67122630189585e-07, "loss": 0.3969, "step": 1210 }, { "epoch": 0.009759921920624635, "grad_norm": 0.7563260927255472, "learning_rate": 9.75121990240781e-07, "loss": 0.4075, "step": 1220 }, { "epoch": 0.009839921280629755, "grad_norm": 0.7704505024635662, "learning_rate": 9.831213502919768e-07, "loss": 0.3981, "step": 1230 }, { "epoch": 0.009919920640634875, "grad_norm": 0.7056792723024738, "learning_rate": 9.911207103431725e-07, "loss": 0.4223, "step": 1240 }, { "epoch": 0.009999920000639996, "grad_norm": 0.750390399783068, "learning_rate": 9.991200703943684e-07, "loss": 0.4102, "step": 1250 }, { "epoch": 0.010079919360645114, "grad_norm": 0.7529460057363149, "learning_rate": 1.0071194304455646e-06, "loss": 0.4278, "step": 1260 }, { "epoch": 0.010159918720650234, "grad_norm": 1.041695148553965, "learning_rate": 1.0151187904967603e-06, "loss": 0.4261, "step": 1270 }, { "epoch": 0.010239918080655355, "grad_norm": 0.8098838670880232, "learning_rate": 1.0231181505479562e-06, "loss": 0.4163, "step": 1280 }, { "epoch": 0.010319917440660475, "grad_norm": 0.7139180273227849, "learning_rate": 1.0311175105991523e-06, "loss": 0.4139, "step": 1290 }, { "epoch": 0.010399916800665595, "grad_norm": 0.7293628900497775, "learning_rate": 1.039116870650348e-06, "loss": 0.4139, "step": 1300 }, { "epoch": 0.010479916160670715, "grad_norm": 3.9899125178456276, "learning_rate": 1.047116230701544e-06, "loss": 0.4352, "step": 1310 }, { "epoch": 0.010559915520675835, "grad_norm": 0.741428608083807, "learning_rate": 1.0551155907527398e-06, "loss": 0.4487, "step": 1320 }, { "epoch": 0.010639914880680954, "grad_norm": 0.7645959836257802, "learning_rate": 1.0631149508039358e-06, "loss": 0.4509, "step": 1330 }, { "epoch": 0.010719914240686074, "grad_norm": 0.6630163901596575, "learning_rate": 1.0711143108551317e-06, "loss": 0.422, "step": 1340 }, { "epoch": 0.010799913600691194, "grad_norm": 0.7653242364061862, "learning_rate": 1.0791136709063276e-06, "loss": 0.4047, "step": 1350 }, { "epoch": 0.010879912960696314, "grad_norm": 0.6952138151449674, "learning_rate": 1.0871130309575235e-06, "loss": 0.424, "step": 1360 }, { "epoch": 0.010959912320701435, "grad_norm": 0.8400358998787617, "learning_rate": 1.0951123910087194e-06, "loss": 0.4329, "step": 1370 }, { "epoch": 0.011039911680706555, "grad_norm": 0.7959368745719518, "learning_rate": 1.1031117510599153e-06, "loss": 0.4296, "step": 1380 }, { "epoch": 0.011119911040711675, "grad_norm": 0.7182831330712102, "learning_rate": 1.111111111111111e-06, "loss": 0.4053, "step": 1390 }, { "epoch": 0.011199910400716794, "grad_norm": 0.7631459457678664, "learning_rate": 1.1191104711623072e-06, "loss": 0.4174, "step": 1400 }, { "epoch": 0.011279909760721914, "grad_norm": 0.6904060292491346, "learning_rate": 1.127109831213503e-06, "loss": 0.4262, "step": 1410 }, { "epoch": 0.011359909120727034, "grad_norm": 0.7154968430224656, "learning_rate": 1.1351091912646988e-06, "loss": 0.4116, "step": 1420 }, { "epoch": 0.011439908480732154, "grad_norm": 0.6430506977204669, "learning_rate": 1.143108551315895e-06, "loss": 0.3985, "step": 1430 }, { "epoch": 0.011519907840737274, "grad_norm": 0.6060536937789834, "learning_rate": 1.1511079113670908e-06, "loss": 0.4366, "step": 1440 }, { "epoch": 0.011599907200742395, "grad_norm": 0.7070045694797042, "learning_rate": 1.1591072714182865e-06, "loss": 0.3951, "step": 1450 }, { "epoch": 0.011679906560747515, "grad_norm": 0.6799481270205386, "learning_rate": 1.1671066314694824e-06, "loss": 0.4277, "step": 1460 }, { "epoch": 0.011759905920752633, "grad_norm": 0.7706688131579046, "learning_rate": 1.1751059915206786e-06, "loss": 0.4106, "step": 1470 }, { "epoch": 0.011839905280757753, "grad_norm": 0.7659401655636618, "learning_rate": 1.1831053515718743e-06, "loss": 0.4147, "step": 1480 }, { "epoch": 0.011919904640762874, "grad_norm": 0.7880351649260696, "learning_rate": 1.1911047116230702e-06, "loss": 0.4018, "step": 1490 }, { "epoch": 0.011999904000767994, "grad_norm": 0.6643952514129879, "learning_rate": 1.1991040716742661e-06, "loss": 0.4225, "step": 1500 }, { "epoch": 0.012079903360773114, "grad_norm": 0.7410891062680932, "learning_rate": 1.207103431725462e-06, "loss": 0.4235, "step": 1510 }, { "epoch": 0.012159902720778234, "grad_norm": 0.6702465025094532, "learning_rate": 1.215102791776658e-06, "loss": 0.4485, "step": 1520 }, { "epoch": 0.012239902080783354, "grad_norm": 0.72511805103151, "learning_rate": 1.2231021518278539e-06, "loss": 0.4075, "step": 1530 }, { "epoch": 0.012319901440788473, "grad_norm": 0.7364457263640842, "learning_rate": 1.2311015118790498e-06, "loss": 0.4209, "step": 1540 }, { "epoch": 0.012399900800793593, "grad_norm": 0.6760801533876919, "learning_rate": 1.2391008719302457e-06, "loss": 0.4153, "step": 1550 }, { "epoch": 0.012479900160798713, "grad_norm": 1.329240774622819, "learning_rate": 1.2471002319814416e-06, "loss": 0.4569, "step": 1560 }, { "epoch": 0.012559899520803833, "grad_norm": 0.7197695590369367, "learning_rate": 1.2550995920326375e-06, "loss": 0.4148, "step": 1570 }, { "epoch": 0.012639898880808954, "grad_norm": 0.7197146321929337, "learning_rate": 1.2630989520838332e-06, "loss": 0.4309, "step": 1580 }, { "epoch": 0.012719898240814074, "grad_norm": 0.6941046794760515, "learning_rate": 1.2710983121350293e-06, "loss": 0.4266, "step": 1590 }, { "epoch": 0.012799897600819194, "grad_norm": 0.6685733935987979, "learning_rate": 1.2790976721862253e-06, "loss": 0.4148, "step": 1600 }, { "epoch": 0.012879896960824314, "grad_norm": 0.829963069106683, "learning_rate": 1.287097032237421e-06, "loss": 0.438, "step": 1610 }, { "epoch": 0.012959896320829433, "grad_norm": 0.8772001383565942, "learning_rate": 1.295096392288617e-06, "loss": 0.4167, "step": 1620 }, { "epoch": 0.013039895680834553, "grad_norm": 0.7693529714105686, "learning_rate": 1.303095752339813e-06, "loss": 0.4465, "step": 1630 }, { "epoch": 0.013119895040839673, "grad_norm": 0.6969819161826472, "learning_rate": 1.3110951123910087e-06, "loss": 0.4374, "step": 1640 }, { "epoch": 0.013199894400844793, "grad_norm": 0.7245418409893126, "learning_rate": 1.3190944724422048e-06, "loss": 0.4375, "step": 1650 }, { "epoch": 0.013279893760849914, "grad_norm": 0.6704658854506884, "learning_rate": 1.3270938324934008e-06, "loss": 0.4144, "step": 1660 }, { "epoch": 0.013359893120855034, "grad_norm": 0.7596780468310279, "learning_rate": 1.3350931925445965e-06, "loss": 0.4142, "step": 1670 }, { "epoch": 0.013439892480860154, "grad_norm": 0.799033496950235, "learning_rate": 1.3430925525957924e-06, "loss": 0.424, "step": 1680 }, { "epoch": 0.013519891840865272, "grad_norm": 0.8360146268303155, "learning_rate": 1.3510919126469885e-06, "loss": 0.4104, "step": 1690 }, { "epoch": 0.013599891200870393, "grad_norm": 0.7830381641790578, "learning_rate": 1.3590912726981842e-06, "loss": 0.4067, "step": 1700 }, { "epoch": 0.013679890560875513, "grad_norm": 0.7376045815830132, "learning_rate": 1.3670906327493801e-06, "loss": 0.4232, "step": 1710 }, { "epoch": 0.013759889920880633, "grad_norm": 0.7780257393234633, "learning_rate": 1.3750899928005762e-06, "loss": 0.4001, "step": 1720 }, { "epoch": 0.013839889280885753, "grad_norm": 0.7400853428241427, "learning_rate": 1.383089352851772e-06, "loss": 0.4283, "step": 1730 }, { "epoch": 0.013919888640890873, "grad_norm": 0.6301335389858812, "learning_rate": 1.3910887129029679e-06, "loss": 0.4315, "step": 1740 }, { "epoch": 0.013999888000895994, "grad_norm": 0.6873058205506691, "learning_rate": 1.3990880729541636e-06, "loss": 0.4294, "step": 1750 }, { "epoch": 0.014079887360901112, "grad_norm": 0.6118849450571252, "learning_rate": 1.4070874330053597e-06, "loss": 0.4116, "step": 1760 }, { "epoch": 0.014159886720906232, "grad_norm": 0.8643963519063279, "learning_rate": 1.4150867930565556e-06, "loss": 0.4189, "step": 1770 }, { "epoch": 0.014239886080911352, "grad_norm": 0.6986286668684888, "learning_rate": 1.4230861531077513e-06, "loss": 0.4393, "step": 1780 }, { "epoch": 0.014319885440916473, "grad_norm": 0.7361554237470329, "learning_rate": 1.4310855131589474e-06, "loss": 0.4048, "step": 1790 }, { "epoch": 0.014399884800921593, "grad_norm": 0.7526595512221772, "learning_rate": 1.4390848732101434e-06, "loss": 0.4434, "step": 1800 }, { "epoch": 0.014479884160926713, "grad_norm": 0.6793121490590498, "learning_rate": 1.447084233261339e-06, "loss": 0.4313, "step": 1810 }, { "epoch": 0.014559883520931833, "grad_norm": 0.7503593630371851, "learning_rate": 1.4550835933125352e-06, "loss": 0.4363, "step": 1820 }, { "epoch": 0.014639882880936952, "grad_norm": 0.6631126747097913, "learning_rate": 1.463082953363731e-06, "loss": 0.4172, "step": 1830 }, { "epoch": 0.014719882240942072, "grad_norm": 0.6201041671912539, "learning_rate": 1.4710823134149268e-06, "loss": 0.426, "step": 1840 }, { "epoch": 0.014799881600947192, "grad_norm": 0.7986575840999489, "learning_rate": 1.479081673466123e-06, "loss": 0.4418, "step": 1850 }, { "epoch": 0.014879880960952312, "grad_norm": 0.7485644191714067, "learning_rate": 1.4870810335173188e-06, "loss": 0.4315, "step": 1860 }, { "epoch": 0.014959880320957433, "grad_norm": 0.767180980787683, "learning_rate": 1.4950803935685146e-06, "loss": 0.44, "step": 1870 }, { "epoch": 0.015039879680962553, "grad_norm": 0.7085244935047633, "learning_rate": 1.5030797536197107e-06, "loss": 0.4197, "step": 1880 }, { "epoch": 0.015119879040967673, "grad_norm": 0.9096558305501717, "learning_rate": 1.5110791136709064e-06, "loss": 0.4276, "step": 1890 }, { "epoch": 0.015199878400972791, "grad_norm": 0.7782449144127891, "learning_rate": 1.5190784737221023e-06, "loss": 0.4433, "step": 1900 }, { "epoch": 0.015279877760977912, "grad_norm": 0.6784288997728832, "learning_rate": 1.5270778337732984e-06, "loss": 0.408, "step": 1910 }, { "epoch": 0.015359877120983032, "grad_norm": 0.7394160138728095, "learning_rate": 1.5350771938244941e-06, "loss": 0.427, "step": 1920 }, { "epoch": 0.015439876480988152, "grad_norm": 0.7020184796096351, "learning_rate": 1.54307655387569e-06, "loss": 0.4585, "step": 1930 }, { "epoch": 0.015519875840993272, "grad_norm": 0.6570216742745165, "learning_rate": 1.551075913926886e-06, "loss": 0.4203, "step": 1940 }, { "epoch": 0.015599875200998392, "grad_norm": 0.6492915506893296, "learning_rate": 1.5590752739780819e-06, "loss": 0.4225, "step": 1950 }, { "epoch": 0.01567987456100351, "grad_norm": 0.6404207960330748, "learning_rate": 1.5670746340292778e-06, "loss": 0.4155, "step": 1960 }, { "epoch": 0.01575987392100863, "grad_norm": 0.7069937527425317, "learning_rate": 1.5750739940804737e-06, "loss": 0.4017, "step": 1970 }, { "epoch": 0.01583987328101375, "grad_norm": 0.7497814910327999, "learning_rate": 1.5830733541316694e-06, "loss": 0.4336, "step": 1980 }, { "epoch": 0.01591987264101887, "grad_norm": 0.997502334828043, "learning_rate": 1.5910727141828655e-06, "loss": 0.4252, "step": 1990 }, { "epoch": 0.01599987200102399, "grad_norm": 0.7051737520195863, "learning_rate": 1.5990720742340615e-06, "loss": 0.4136, "step": 2000 }, { "epoch": 0.016079871361029112, "grad_norm": 0.7358677233078202, "learning_rate": 1.6070714342852572e-06, "loss": 0.4041, "step": 2010 }, { "epoch": 0.016159870721034232, "grad_norm": 0.7232160368639224, "learning_rate": 1.6150707943364533e-06, "loss": 0.3925, "step": 2020 }, { "epoch": 0.016239870081039352, "grad_norm": 0.7492396218793221, "learning_rate": 1.6230701543876492e-06, "loss": 0.4453, "step": 2030 }, { "epoch": 0.016319869441044473, "grad_norm": 0.7879468186487532, "learning_rate": 1.631069514438845e-06, "loss": 0.4143, "step": 2040 }, { "epoch": 0.016399868801049593, "grad_norm": 0.7027247029038095, "learning_rate": 1.639068874490041e-06, "loss": 0.4487, "step": 2050 }, { "epoch": 0.016479868161054713, "grad_norm": 0.6646702988688921, "learning_rate": 1.6470682345412367e-06, "loss": 0.4189, "step": 2060 }, { "epoch": 0.016559867521059833, "grad_norm": 0.7214592394412016, "learning_rate": 1.6550675945924326e-06, "loss": 0.4298, "step": 2070 }, { "epoch": 0.01663986688106495, "grad_norm": 0.7009780752105863, "learning_rate": 1.6630669546436288e-06, "loss": 0.43, "step": 2080 }, { "epoch": 0.01671986624107007, "grad_norm": 0.6802031501540443, "learning_rate": 1.6710663146948245e-06, "loss": 0.4064, "step": 2090 }, { "epoch": 0.01679986560107519, "grad_norm": 0.7764568933093239, "learning_rate": 1.6790656747460204e-06, "loss": 0.4192, "step": 2100 }, { "epoch": 0.01687986496108031, "grad_norm": 0.7257831241745193, "learning_rate": 1.6870650347972165e-06, "loss": 0.4137, "step": 2110 }, { "epoch": 0.01695986432108543, "grad_norm": 0.7231093177815116, "learning_rate": 1.6950643948484122e-06, "loss": 0.4321, "step": 2120 }, { "epoch": 0.01703986368109055, "grad_norm": 0.7604848904942334, "learning_rate": 1.7030637548996081e-06, "loss": 0.4173, "step": 2130 }, { "epoch": 0.01711986304109567, "grad_norm": 0.8671202797505491, "learning_rate": 1.7110631149508043e-06, "loss": 0.4172, "step": 2140 }, { "epoch": 0.01719986240110079, "grad_norm": 0.8818905733108134, "learning_rate": 1.719062475002e-06, "loss": 0.4096, "step": 2150 }, { "epoch": 0.01727986176110591, "grad_norm": 0.7073377083366502, "learning_rate": 1.7270618350531959e-06, "loss": 0.4327, "step": 2160 }, { "epoch": 0.01735986112111103, "grad_norm": 0.9637856611728648, "learning_rate": 1.735061195104392e-06, "loss": 0.457, "step": 2170 }, { "epoch": 0.017439860481116152, "grad_norm": 0.7763253238443666, "learning_rate": 1.7430605551555877e-06, "loss": 0.4444, "step": 2180 }, { "epoch": 0.017519859841121272, "grad_norm": 0.7532387544984117, "learning_rate": 1.7510599152067836e-06, "loss": 0.4193, "step": 2190 }, { "epoch": 0.017599859201126392, "grad_norm": 0.7001703245870058, "learning_rate": 1.7590592752579793e-06, "loss": 0.4334, "step": 2200 }, { "epoch": 0.017679858561131512, "grad_norm": 0.653320909525973, "learning_rate": 1.7670586353091755e-06, "loss": 0.4137, "step": 2210 }, { "epoch": 0.01775985792113663, "grad_norm": 0.6431905950303065, "learning_rate": 1.7750579953603714e-06, "loss": 0.4056, "step": 2220 }, { "epoch": 0.01783985728114175, "grad_norm": 0.6355510699319944, "learning_rate": 1.783057355411567e-06, "loss": 0.3967, "step": 2230 }, { "epoch": 0.01791985664114687, "grad_norm": 1.2693108108319435, "learning_rate": 1.7910567154627632e-06, "loss": 0.3958, "step": 2240 }, { "epoch": 0.01799985600115199, "grad_norm": 0.7084484301635297, "learning_rate": 1.7990560755139591e-06, "loss": 0.4341, "step": 2250 }, { "epoch": 0.01807985536115711, "grad_norm": 0.7305495029250423, "learning_rate": 1.8070554355651548e-06, "loss": 0.4343, "step": 2260 }, { "epoch": 0.01815985472116223, "grad_norm": 0.7855576477357182, "learning_rate": 1.8150547956163507e-06, "loss": 0.4158, "step": 2270 }, { "epoch": 0.01823985408116735, "grad_norm": 0.8272843321029247, "learning_rate": 1.8230541556675469e-06, "loss": 0.4355, "step": 2280 }, { "epoch": 0.01831985344117247, "grad_norm": 0.753702291149743, "learning_rate": 1.8310535157187426e-06, "loss": 0.4025, "step": 2290 }, { "epoch": 0.01839985280117759, "grad_norm": 0.7598104262546688, "learning_rate": 1.8390528757699385e-06, "loss": 0.413, "step": 2300 }, { "epoch": 0.01847985216118271, "grad_norm": 0.7611395927729616, "learning_rate": 1.8470522358211346e-06, "loss": 0.4373, "step": 2310 }, { "epoch": 0.01855985152118783, "grad_norm": 0.760722694363519, "learning_rate": 1.8550515958723303e-06, "loss": 0.4179, "step": 2320 }, { "epoch": 0.01863985088119295, "grad_norm": 0.5957047498546761, "learning_rate": 1.8630509559235262e-06, "loss": 0.4231, "step": 2330 }, { "epoch": 0.01871985024119807, "grad_norm": 0.7186940138009597, "learning_rate": 1.8710503159747224e-06, "loss": 0.4171, "step": 2340 }, { "epoch": 0.018799849601203192, "grad_norm": 0.7760364408450707, "learning_rate": 1.879049676025918e-06, "loss": 0.4003, "step": 2350 }, { "epoch": 0.018879848961208312, "grad_norm": 0.7481302705420546, "learning_rate": 1.887049036077114e-06, "loss": 0.4182, "step": 2360 }, { "epoch": 0.01895984832121343, "grad_norm": 0.7358536669321117, "learning_rate": 1.8950483961283097e-06, "loss": 0.4272, "step": 2370 }, { "epoch": 0.01903984768121855, "grad_norm": 0.7602582219791679, "learning_rate": 1.9030477561795058e-06, "loss": 0.4157, "step": 2380 }, { "epoch": 0.01911984704122367, "grad_norm": 0.7426387012574098, "learning_rate": 1.911047116230702e-06, "loss": 0.4136, "step": 2390 }, { "epoch": 0.01919984640122879, "grad_norm": 0.6954613448087369, "learning_rate": 1.9190464762818974e-06, "loss": 0.4081, "step": 2400 }, { "epoch": 0.01927984576123391, "grad_norm": 0.7604294663875293, "learning_rate": 1.9270458363330933e-06, "loss": 0.4116, "step": 2410 }, { "epoch": 0.01935984512123903, "grad_norm": 0.7196806209080392, "learning_rate": 1.9350451963842897e-06, "loss": 0.4253, "step": 2420 }, { "epoch": 0.01943984448124415, "grad_norm": 0.7619473447002568, "learning_rate": 1.943044556435485e-06, "loss": 0.4048, "step": 2430 }, { "epoch": 0.01951984384124927, "grad_norm": 0.7052222380767645, "learning_rate": 1.951043916486681e-06, "loss": 0.4092, "step": 2440 }, { "epoch": 0.01959984320125439, "grad_norm": 0.6311702076235657, "learning_rate": 1.9590432765378774e-06, "loss": 0.409, "step": 2450 }, { "epoch": 0.01967984256125951, "grad_norm": 0.7989650814927569, "learning_rate": 1.967042636589073e-06, "loss": 0.4031, "step": 2460 }, { "epoch": 0.01975984192126463, "grad_norm": 0.7243725001067509, "learning_rate": 1.975041996640269e-06, "loss": 0.4139, "step": 2470 }, { "epoch": 0.01983984128126975, "grad_norm": 0.7689695050248523, "learning_rate": 1.983041356691465e-06, "loss": 0.419, "step": 2480 }, { "epoch": 0.01991984064127487, "grad_norm": 0.6784111247678405, "learning_rate": 1.9910407167426607e-06, "loss": 0.4255, "step": 2490 }, { "epoch": 0.01999984000127999, "grad_norm": 0.696241896489532, "learning_rate": 1.9990400767938566e-06, "loss": 0.4387, "step": 2500 }, { "epoch": 0.020079839361285108, "grad_norm": 0.7337662500397681, "learning_rate": 2.0070394368450525e-06, "loss": 0.4107, "step": 2510 }, { "epoch": 0.02015983872129023, "grad_norm": 0.7522121421950386, "learning_rate": 2.0150387968962484e-06, "loss": 0.4341, "step": 2520 }, { "epoch": 0.02023983808129535, "grad_norm": 0.7635117961228429, "learning_rate": 2.0230381569474443e-06, "loss": 0.444, "step": 2530 }, { "epoch": 0.02031983744130047, "grad_norm": 0.7258340461733414, "learning_rate": 2.0310375169986402e-06, "loss": 0.4532, "step": 2540 }, { "epoch": 0.02039983680130559, "grad_norm": 0.8227531741667117, "learning_rate": 2.039036877049836e-06, "loss": 0.4211, "step": 2550 }, { "epoch": 0.02047983616131071, "grad_norm": 0.8046278992558573, "learning_rate": 2.047036237101032e-06, "loss": 0.4304, "step": 2560 }, { "epoch": 0.02055983552131583, "grad_norm": 0.8583586766230429, "learning_rate": 2.055035597152228e-06, "loss": 0.389, "step": 2570 }, { "epoch": 0.02063983488132095, "grad_norm": 0.7059091393994599, "learning_rate": 2.063034957203424e-06, "loss": 0.4143, "step": 2580 }, { "epoch": 0.02071983424132607, "grad_norm": 0.7169812723780176, "learning_rate": 2.07103431725462e-06, "loss": 0.4231, "step": 2590 }, { "epoch": 0.02079983360133119, "grad_norm": 0.744498117665369, "learning_rate": 2.0790336773058157e-06, "loss": 0.3931, "step": 2600 }, { "epoch": 0.02087983296133631, "grad_norm": 0.8714679551154206, "learning_rate": 2.0870330373570117e-06, "loss": 0.4227, "step": 2610 }, { "epoch": 0.02095983232134143, "grad_norm": 0.6310793333444085, "learning_rate": 2.0950323974082076e-06, "loss": 0.4092, "step": 2620 }, { "epoch": 0.02103983168134655, "grad_norm": 0.7793866057944923, "learning_rate": 2.1030317574594035e-06, "loss": 0.4139, "step": 2630 }, { "epoch": 0.02111983104135167, "grad_norm": 0.8117526334063988, "learning_rate": 2.1110311175105994e-06, "loss": 0.4154, "step": 2640 }, { "epoch": 0.021199830401356787, "grad_norm": 0.7466901509429923, "learning_rate": 2.1190304775617953e-06, "loss": 0.4139, "step": 2650 }, { "epoch": 0.021279829761361908, "grad_norm": 0.9396797684739518, "learning_rate": 2.1270298376129912e-06, "loss": 0.4166, "step": 2660 }, { "epoch": 0.021359829121367028, "grad_norm": 0.7212101339600039, "learning_rate": 2.135029197664187e-06, "loss": 0.4299, "step": 2670 }, { "epoch": 0.021439828481372148, "grad_norm": 0.9051397916977733, "learning_rate": 2.1430285577153826e-06, "loss": 0.4203, "step": 2680 }, { "epoch": 0.021519827841377268, "grad_norm": 0.6759148258397492, "learning_rate": 2.151027917766579e-06, "loss": 0.4065, "step": 2690 }, { "epoch": 0.02159982720138239, "grad_norm": 0.7191776566727898, "learning_rate": 2.159027277817775e-06, "loss": 0.3957, "step": 2700 }, { "epoch": 0.02167982656138751, "grad_norm": 0.7752464373073066, "learning_rate": 2.1670266378689704e-06, "loss": 0.4198, "step": 2710 }, { "epoch": 0.02175982592139263, "grad_norm": 0.7096879372714776, "learning_rate": 2.1750259979201667e-06, "loss": 0.4337, "step": 2720 }, { "epoch": 0.02183982528139775, "grad_norm": 0.7577341998111574, "learning_rate": 2.1830253579713626e-06, "loss": 0.4484, "step": 2730 }, { "epoch": 0.02191982464140287, "grad_norm": 0.8303467101337862, "learning_rate": 2.191024718022558e-06, "loss": 0.436, "step": 2740 }, { "epoch": 0.02199982400140799, "grad_norm": 0.740434972574641, "learning_rate": 2.1990240780737545e-06, "loss": 0.4026, "step": 2750 }, { "epoch": 0.02207982336141311, "grad_norm": 0.7385703007931491, "learning_rate": 2.2070234381249504e-06, "loss": 0.3994, "step": 2760 }, { "epoch": 0.02215982272141823, "grad_norm": 0.804160370262748, "learning_rate": 2.215022798176146e-06, "loss": 0.4247, "step": 2770 }, { "epoch": 0.02223982208142335, "grad_norm": 0.9699580286471233, "learning_rate": 2.2230221582273422e-06, "loss": 0.4158, "step": 2780 }, { "epoch": 0.02231982144142847, "grad_norm": 0.854317456477185, "learning_rate": 2.231021518278538e-06, "loss": 0.4224, "step": 2790 }, { "epoch": 0.022399820801433587, "grad_norm": 0.7261443031804087, "learning_rate": 2.2390208783297336e-06, "loss": 0.4153, "step": 2800 }, { "epoch": 0.022479820161438707, "grad_norm": 0.7221332661288903, "learning_rate": 2.24702023838093e-06, "loss": 0.4287, "step": 2810 }, { "epoch": 0.022559819521443827, "grad_norm": 0.8879662037891538, "learning_rate": 2.2550195984321255e-06, "loss": 0.4221, "step": 2820 }, { "epoch": 0.022639818881448948, "grad_norm": 0.7885980884870208, "learning_rate": 2.2630189584833214e-06, "loss": 0.4394, "step": 2830 }, { "epoch": 0.022719818241454068, "grad_norm": 4.841458093614334, "learning_rate": 2.2710183185345173e-06, "loss": 0.4306, "step": 2840 }, { "epoch": 0.022799817601459188, "grad_norm": 0.762762416297712, "learning_rate": 2.279017678585713e-06, "loss": 0.4248, "step": 2850 }, { "epoch": 0.022879816961464308, "grad_norm": 0.8448845187481899, "learning_rate": 2.287017038636909e-06, "loss": 0.4016, "step": 2860 }, { "epoch": 0.02295981632146943, "grad_norm": 0.7119692504285716, "learning_rate": 2.295016398688105e-06, "loss": 0.4187, "step": 2870 }, { "epoch": 0.02303981568147455, "grad_norm": 0.7322860524669678, "learning_rate": 2.303015758739301e-06, "loss": 0.3992, "step": 2880 }, { "epoch": 0.02311981504147967, "grad_norm": 0.7809553100217358, "learning_rate": 2.311015118790497e-06, "loss": 0.4323, "step": 2890 }, { "epoch": 0.02319981440148479, "grad_norm": 0.7497042609672665, "learning_rate": 2.3190144788416928e-06, "loss": 0.4216, "step": 2900 }, { "epoch": 0.02327981376148991, "grad_norm": 0.77908486703842, "learning_rate": 2.3270138388928887e-06, "loss": 0.4139, "step": 2910 }, { "epoch": 0.02335981312149503, "grad_norm": 0.8259941131428074, "learning_rate": 2.3350131989440846e-06, "loss": 0.4298, "step": 2920 }, { "epoch": 0.02343981248150015, "grad_norm": 0.7315958297934407, "learning_rate": 2.3430125589952805e-06, "loss": 0.4148, "step": 2930 }, { "epoch": 0.023519811841505266, "grad_norm": 0.7127501348213877, "learning_rate": 2.3510119190464764e-06, "loss": 0.4226, "step": 2940 }, { "epoch": 0.023599811201510387, "grad_norm": 0.7004822200975431, "learning_rate": 2.3590112790976724e-06, "loss": 0.4037, "step": 2950 }, { "epoch": 0.023679810561515507, "grad_norm": 0.7640873416448367, "learning_rate": 2.3670106391488683e-06, "loss": 0.4196, "step": 2960 }, { "epoch": 0.023759809921520627, "grad_norm": 0.7562584220469137, "learning_rate": 2.375009999200064e-06, "loss": 0.4226, "step": 2970 }, { "epoch": 0.023839809281525747, "grad_norm": 0.700408619087647, "learning_rate": 2.38300935925126e-06, "loss": 0.4194, "step": 2980 }, { "epoch": 0.023919808641530867, "grad_norm": 0.6926553567290514, "learning_rate": 2.391008719302456e-06, "loss": 0.4377, "step": 2990 }, { "epoch": 0.023999808001535988, "grad_norm": 0.8080569171783707, "learning_rate": 2.399008079353652e-06, "loss": 0.425, "step": 3000 }, { "epoch": 0.024079807361541108, "grad_norm": 0.7127303977340983, "learning_rate": 2.407007439404848e-06, "loss": 0.4132, "step": 3010 }, { "epoch": 0.024159806721546228, "grad_norm": 0.8197933173831502, "learning_rate": 2.4150067994560438e-06, "loss": 0.455, "step": 3020 }, { "epoch": 0.024239806081551348, "grad_norm": 0.7710243766351836, "learning_rate": 2.4230061595072397e-06, "loss": 0.4181, "step": 3030 }, { "epoch": 0.02431980544155647, "grad_norm": 0.7237467513596426, "learning_rate": 2.4310055195584356e-06, "loss": 0.4019, "step": 3040 }, { "epoch": 0.02439980480156159, "grad_norm": 0.6859301673181247, "learning_rate": 2.4390048796096315e-06, "loss": 0.417, "step": 3050 }, { "epoch": 0.02447980416156671, "grad_norm": 0.7468441788531288, "learning_rate": 2.4470042396608274e-06, "loss": 0.4332, "step": 3060 }, { "epoch": 0.02455980352157183, "grad_norm": 0.6969645352923937, "learning_rate": 2.4550035997120233e-06, "loss": 0.4253, "step": 3070 }, { "epoch": 0.024639802881576946, "grad_norm": 0.732458270879925, "learning_rate": 2.4630029597632193e-06, "loss": 0.4071, "step": 3080 }, { "epoch": 0.024719802241582066, "grad_norm": 1.0213686642017104, "learning_rate": 2.471002319814415e-06, "loss": 0.4284, "step": 3090 }, { "epoch": 0.024799801601587186, "grad_norm": 0.7109617020682577, "learning_rate": 2.479001679865611e-06, "loss": 0.4338, "step": 3100 }, { "epoch": 0.024879800961592306, "grad_norm": 0.7970001881138196, "learning_rate": 2.487001039916807e-06, "loss": 0.4071, "step": 3110 }, { "epoch": 0.024959800321597427, "grad_norm": 0.7531634719565369, "learning_rate": 2.495000399968003e-06, "loss": 0.4639, "step": 3120 }, { "epoch": 0.025039799681602547, "grad_norm": 0.6656181891345934, "learning_rate": 2.5029997600191984e-06, "loss": 0.4099, "step": 3130 }, { "epoch": 0.025119799041607667, "grad_norm": 0.9053194365491155, "learning_rate": 2.5109991200703947e-06, "loss": 0.4117, "step": 3140 }, { "epoch": 0.025199798401612787, "grad_norm": 0.7986025391471563, "learning_rate": 2.5189984801215907e-06, "loss": 0.4293, "step": 3150 }, { "epoch": 0.025279797761617907, "grad_norm": 0.7216163241881903, "learning_rate": 2.526997840172786e-06, "loss": 0.4014, "step": 3160 }, { "epoch": 0.025359797121623028, "grad_norm": 0.8773953768019375, "learning_rate": 2.5349972002239825e-06, "loss": 0.4272, "step": 3170 }, { "epoch": 0.025439796481628148, "grad_norm": 0.69671419994169, "learning_rate": 2.5429965602751784e-06, "loss": 0.4056, "step": 3180 }, { "epoch": 0.025519795841633268, "grad_norm": 0.8372255728456501, "learning_rate": 2.550995920326374e-06, "loss": 0.421, "step": 3190 }, { "epoch": 0.025599795201638388, "grad_norm": 0.7425111234564166, "learning_rate": 2.5589952803775702e-06, "loss": 0.42, "step": 3200 }, { "epoch": 0.02567979456164351, "grad_norm": 0.7792291619496273, "learning_rate": 2.566994640428766e-06, "loss": 0.3956, "step": 3210 }, { "epoch": 0.02575979392164863, "grad_norm": 0.791207552625433, "learning_rate": 2.5749940004799616e-06, "loss": 0.4284, "step": 3220 }, { "epoch": 0.025839793281653745, "grad_norm": 0.7836824912313216, "learning_rate": 2.5829933605311576e-06, "loss": 0.4091, "step": 3230 }, { "epoch": 0.025919792641658865, "grad_norm": 0.7110649040200595, "learning_rate": 2.590992720582354e-06, "loss": 0.4135, "step": 3240 }, { "epoch": 0.025999792001663986, "grad_norm": 0.7404153916766049, "learning_rate": 2.5989920806335494e-06, "loss": 0.429, "step": 3250 }, { "epoch": 0.026079791361669106, "grad_norm": 0.6986558695921249, "learning_rate": 2.6069914406847453e-06, "loss": 0.4334, "step": 3260 }, { "epoch": 0.026159790721674226, "grad_norm": 0.7632258973881477, "learning_rate": 2.6149908007359416e-06, "loss": 0.3944, "step": 3270 }, { "epoch": 0.026239790081679346, "grad_norm": 0.785186697637848, "learning_rate": 2.622990160787137e-06, "loss": 0.4178, "step": 3280 }, { "epoch": 0.026319789441684466, "grad_norm": 0.8661439723668041, "learning_rate": 2.630989520838333e-06, "loss": 0.4113, "step": 3290 }, { "epoch": 0.026399788801689587, "grad_norm": 0.7237402851018382, "learning_rate": 2.6389888808895294e-06, "loss": 0.4078, "step": 3300 }, { "epoch": 0.026479788161694707, "grad_norm": 0.8392950022908678, "learning_rate": 2.646988240940725e-06, "loss": 0.4223, "step": 3310 }, { "epoch": 0.026559787521699827, "grad_norm": 0.92567701672041, "learning_rate": 2.654987600991921e-06, "loss": 0.3845, "step": 3320 }, { "epoch": 0.026639786881704947, "grad_norm": 0.6933742034635484, "learning_rate": 2.662986961043117e-06, "loss": 0.427, "step": 3330 }, { "epoch": 0.026719786241710067, "grad_norm": 0.7620381540647038, "learning_rate": 2.6709863210943126e-06, "loss": 0.4209, "step": 3340 }, { "epoch": 0.026799785601715188, "grad_norm": 0.721902719522637, "learning_rate": 2.6789856811455085e-06, "loss": 0.41, "step": 3350 }, { "epoch": 0.026879784961720308, "grad_norm": 0.7299501620253848, "learning_rate": 2.6869850411967045e-06, "loss": 0.4166, "step": 3360 }, { "epoch": 0.026959784321725425, "grad_norm": 0.7715065662581788, "learning_rate": 2.6949844012479004e-06, "loss": 0.4024, "step": 3370 }, { "epoch": 0.027039783681730545, "grad_norm": 0.8682048882317643, "learning_rate": 2.7029837612990963e-06, "loss": 0.4131, "step": 3380 }, { "epoch": 0.027119783041735665, "grad_norm": 0.869764700266281, "learning_rate": 2.710983121350292e-06, "loss": 0.4261, "step": 3390 }, { "epoch": 0.027199782401740785, "grad_norm": 0.7340237050441278, "learning_rate": 2.718982481401488e-06, "loss": 0.4025, "step": 3400 }, { "epoch": 0.027279781761745905, "grad_norm": 0.8579554974753395, "learning_rate": 2.726981841452684e-06, "loss": 0.407, "step": 3410 }, { "epoch": 0.027359781121751026, "grad_norm": 0.7274223844530919, "learning_rate": 2.73498120150388e-06, "loss": 0.4133, "step": 3420 }, { "epoch": 0.027439780481756146, "grad_norm": 0.7441058560749837, "learning_rate": 2.742980561555076e-06, "loss": 0.4243, "step": 3430 }, { "epoch": 0.027519779841761266, "grad_norm": 0.8051355651618886, "learning_rate": 2.7509799216062718e-06, "loss": 0.4251, "step": 3440 }, { "epoch": 0.027599779201766386, "grad_norm": 0.7432285405318118, "learning_rate": 2.7589792816574677e-06, "loss": 0.4412, "step": 3450 }, { "epoch": 0.027679778561771506, "grad_norm": 0.6517122251801726, "learning_rate": 2.7669786417086636e-06, "loss": 0.4252, "step": 3460 }, { "epoch": 0.027759777921776627, "grad_norm": 0.6875721581221387, "learning_rate": 2.774978001759859e-06, "loss": 0.4177, "step": 3470 }, { "epoch": 0.027839777281781747, "grad_norm": 0.702253196059382, "learning_rate": 2.7829773618110554e-06, "loss": 0.4205, "step": 3480 }, { "epoch": 0.027919776641786867, "grad_norm": 0.8482036395209255, "learning_rate": 2.7909767218622514e-06, "loss": 0.4017, "step": 3490 }, { "epoch": 0.027999776001791987, "grad_norm": 0.6925655746894002, "learning_rate": 2.798976081913447e-06, "loss": 0.4121, "step": 3500 }, { "epoch": 0.028079775361797104, "grad_norm": 0.7220552449627191, "learning_rate": 2.806975441964643e-06, "loss": 0.4102, "step": 3510 }, { "epoch": 0.028159774721802224, "grad_norm": 0.8831384842937139, "learning_rate": 2.814974802015839e-06, "loss": 0.4228, "step": 3520 }, { "epoch": 0.028239774081807344, "grad_norm": 0.7579479724344111, "learning_rate": 2.8229741620670346e-06, "loss": 0.413, "step": 3530 }, { "epoch": 0.028319773441812465, "grad_norm": 0.7501882165699492, "learning_rate": 2.8309735221182305e-06, "loss": 0.4282, "step": 3540 }, { "epoch": 0.028399772801817585, "grad_norm": 0.8755928070904447, "learning_rate": 2.838972882169427e-06, "loss": 0.4007, "step": 3550 }, { "epoch": 0.028479772161822705, "grad_norm": 0.7554891461392241, "learning_rate": 2.8469722422206223e-06, "loss": 0.4168, "step": 3560 }, { "epoch": 0.028559771521827825, "grad_norm": 0.6985247419196446, "learning_rate": 2.8549716022718183e-06, "loss": 0.4169, "step": 3570 }, { "epoch": 0.028639770881832945, "grad_norm": 0.7592211529186911, "learning_rate": 2.8629709623230146e-06, "loss": 0.4065, "step": 3580 }, { "epoch": 0.028719770241838066, "grad_norm": 0.7833965541238131, "learning_rate": 2.87097032237421e-06, "loss": 0.4446, "step": 3590 }, { "epoch": 0.028799769601843186, "grad_norm": 0.7738713593579586, "learning_rate": 2.878969682425406e-06, "loss": 0.419, "step": 3600 }, { "epoch": 0.028879768961848306, "grad_norm": 0.8315629244025291, "learning_rate": 2.8869690424766023e-06, "loss": 0.4219, "step": 3610 }, { "epoch": 0.028959768321853426, "grad_norm": 0.781696668298691, "learning_rate": 2.894968402527798e-06, "loss": 0.4151, "step": 3620 }, { "epoch": 0.029039767681858546, "grad_norm": 0.7720539381394058, "learning_rate": 2.9029677625789938e-06, "loss": 0.4202, "step": 3630 }, { "epoch": 0.029119767041863667, "grad_norm": 0.7526278482938622, "learning_rate": 2.91096712263019e-06, "loss": 0.4436, "step": 3640 }, { "epoch": 0.029199766401868787, "grad_norm": 0.7337976104344518, "learning_rate": 2.9189664826813856e-06, "loss": 0.4236, "step": 3650 }, { "epoch": 0.029279765761873904, "grad_norm": 0.83533803326155, "learning_rate": 2.9269658427325815e-06, "loss": 0.4176, "step": 3660 }, { "epoch": 0.029359765121879024, "grad_norm": 0.787318610210915, "learning_rate": 2.934965202783778e-06, "loss": 0.4165, "step": 3670 }, { "epoch": 0.029439764481884144, "grad_norm": 1.4090517215188612, "learning_rate": 2.9429645628349733e-06, "loss": 0.4119, "step": 3680 }, { "epoch": 0.029519763841889264, "grad_norm": 0.9266366914858271, "learning_rate": 2.9509639228861692e-06, "loss": 0.4179, "step": 3690 }, { "epoch": 0.029599763201894384, "grad_norm": 0.934424674393174, "learning_rate": 2.9589632829373656e-06, "loss": 0.4, "step": 3700 }, { "epoch": 0.029679762561899505, "grad_norm": 0.8568902144006225, "learning_rate": 2.966962642988561e-06, "loss": 0.4096, "step": 3710 }, { "epoch": 0.029759761921904625, "grad_norm": 0.8114058465400533, "learning_rate": 2.974962003039757e-06, "loss": 0.4447, "step": 3720 }, { "epoch": 0.029839761281909745, "grad_norm": 1.6052989805278164, "learning_rate": 2.9829613630909533e-06, "loss": 0.4291, "step": 3730 }, { "epoch": 0.029919760641914865, "grad_norm": 0.73538618969903, "learning_rate": 2.990960723142149e-06, "loss": 0.4248, "step": 3740 }, { "epoch": 0.029999760001919985, "grad_norm": 0.7966165280123241, "learning_rate": 2.9989600831933447e-06, "loss": 0.4153, "step": 3750 }, { "epoch": 0.030079759361925106, "grad_norm": 0.8380095957922997, "learning_rate": 3.006959443244541e-06, "loss": 0.378, "step": 3760 }, { "epoch": 0.030159758721930226, "grad_norm": 0.9346417813995904, "learning_rate": 3.0149588032957366e-06, "loss": 0.4242, "step": 3770 }, { "epoch": 0.030239758081935346, "grad_norm": 0.7661127998339704, "learning_rate": 3.0229581633469325e-06, "loss": 0.4218, "step": 3780 }, { "epoch": 0.030319757441940466, "grad_norm": 0.7820076796497493, "learning_rate": 3.030957523398129e-06, "loss": 0.4057, "step": 3790 }, { "epoch": 0.030399756801945583, "grad_norm": 0.7683193002692328, "learning_rate": 3.0389568834493243e-06, "loss": 0.4282, "step": 3800 }, { "epoch": 0.030479756161950703, "grad_norm": 0.7330718238871217, "learning_rate": 3.0469562435005202e-06, "loss": 0.4155, "step": 3810 }, { "epoch": 0.030559755521955823, "grad_norm": 0.7070524082806218, "learning_rate": 3.0549556035517166e-06, "loss": 0.4087, "step": 3820 }, { "epoch": 0.030639754881960943, "grad_norm": 0.7825560701966258, "learning_rate": 3.062954963602912e-06, "loss": 0.4149, "step": 3830 }, { "epoch": 0.030719754241966064, "grad_norm": 0.674902754654649, "learning_rate": 3.070954323654108e-06, "loss": 0.4222, "step": 3840 }, { "epoch": 0.030799753601971184, "grad_norm": 0.6899663241225518, "learning_rate": 3.0789536837053035e-06, "loss": 0.4072, "step": 3850 }, { "epoch": 0.030879752961976304, "grad_norm": 0.747751536532187, "learning_rate": 3.0869530437565e-06, "loss": 0.4265, "step": 3860 }, { "epoch": 0.030959752321981424, "grad_norm": 0.7319670446839773, "learning_rate": 3.0949524038076957e-06, "loss": 0.4365, "step": 3870 }, { "epoch": 0.031039751681986544, "grad_norm": 0.8044391132448001, "learning_rate": 3.1029517638588912e-06, "loss": 0.4345, "step": 3880 }, { "epoch": 0.031119751041991665, "grad_norm": 0.6852016073842015, "learning_rate": 3.1109511239100876e-06, "loss": 0.3909, "step": 3890 }, { "epoch": 0.031199750401996785, "grad_norm": 0.8109666979474524, "learning_rate": 3.1189504839612835e-06, "loss": 0.4159, "step": 3900 }, { "epoch": 0.0312797497620019, "grad_norm": 0.7690716259319208, "learning_rate": 3.126949844012479e-06, "loss": 0.4261, "step": 3910 }, { "epoch": 0.03135974912200702, "grad_norm": 0.770382066563297, "learning_rate": 3.1349492040636753e-06, "loss": 0.4197, "step": 3920 }, { "epoch": 0.03143974848201214, "grad_norm": 0.7899366284185928, "learning_rate": 3.1429485641148712e-06, "loss": 0.4356, "step": 3930 }, { "epoch": 0.03151974784201726, "grad_norm": 0.7287030959080197, "learning_rate": 3.1509479241660667e-06, "loss": 0.4271, "step": 3940 }, { "epoch": 0.03159974720202238, "grad_norm": 0.6751549357858156, "learning_rate": 3.158947284217263e-06, "loss": 0.432, "step": 3950 }, { "epoch": 0.0316797465620275, "grad_norm": 0.7162873288902682, "learning_rate": 3.166946644268459e-06, "loss": 0.4168, "step": 3960 }, { "epoch": 0.03175974592203262, "grad_norm": 0.7624399562051731, "learning_rate": 3.1749460043196545e-06, "loss": 0.4077, "step": 3970 }, { "epoch": 0.03183974528203774, "grad_norm": 0.720382439048551, "learning_rate": 3.182945364370851e-06, "loss": 0.4186, "step": 3980 }, { "epoch": 0.03191974464204286, "grad_norm": 0.8771157238336784, "learning_rate": 3.1909447244220467e-06, "loss": 0.411, "step": 3990 }, { "epoch": 0.03199974400204798, "grad_norm": 0.8439843188095638, "learning_rate": 3.198944084473242e-06, "loss": 0.4254, "step": 4000 }, { "epoch": 0.032079743362053104, "grad_norm": 0.7522465189570665, "learning_rate": 3.2069434445244385e-06, "loss": 0.4182, "step": 4010 }, { "epoch": 0.032159742722058224, "grad_norm": 0.7990809626930068, "learning_rate": 3.214942804575634e-06, "loss": 0.4283, "step": 4020 }, { "epoch": 0.032239742082063344, "grad_norm": 0.7484878445246484, "learning_rate": 3.22294216462683e-06, "loss": 0.3957, "step": 4030 }, { "epoch": 0.032319741442068464, "grad_norm": 0.653062676131544, "learning_rate": 3.2309415246780263e-06, "loss": 0.4339, "step": 4040 }, { "epoch": 0.032399740802073584, "grad_norm": 0.7654847826834672, "learning_rate": 3.2389408847292218e-06, "loss": 0.4277, "step": 4050 }, { "epoch": 0.032479740162078705, "grad_norm": 0.7545901814731136, "learning_rate": 3.2469402447804177e-06, "loss": 0.4181, "step": 4060 }, { "epoch": 0.032559739522083825, "grad_norm": 0.6639250152597482, "learning_rate": 3.254939604831614e-06, "loss": 0.4259, "step": 4070 }, { "epoch": 0.032639738882088945, "grad_norm": 0.8320704412806774, "learning_rate": 3.2629389648828095e-06, "loss": 0.4258, "step": 4080 }, { "epoch": 0.032719738242094065, "grad_norm": 0.8748220246905324, "learning_rate": 3.2709383249340054e-06, "loss": 0.4348, "step": 4090 }, { "epoch": 0.032799737602099185, "grad_norm": 1.0006726922931282, "learning_rate": 3.2789376849852018e-06, "loss": 0.4111, "step": 4100 }, { "epoch": 0.032879736962104306, "grad_norm": 0.703908274983939, "learning_rate": 3.2869370450363973e-06, "loss": 0.4508, "step": 4110 }, { "epoch": 0.032959736322109426, "grad_norm": 0.7114379493698554, "learning_rate": 3.294936405087593e-06, "loss": 0.4011, "step": 4120 }, { "epoch": 0.033039735682114546, "grad_norm": 0.7836668838529308, "learning_rate": 3.3029357651387895e-06, "loss": 0.4059, "step": 4130 }, { "epoch": 0.033119735042119666, "grad_norm": 0.7037822230916331, "learning_rate": 3.310935125189985e-06, "loss": 0.4171, "step": 4140 }, { "epoch": 0.033199734402124786, "grad_norm": 0.8429288510951353, "learning_rate": 3.318934485241181e-06, "loss": 0.4336, "step": 4150 }, { "epoch": 0.0332797337621299, "grad_norm": 0.8160186312241915, "learning_rate": 3.3269338452923764e-06, "loss": 0.4313, "step": 4160 }, { "epoch": 0.03335973312213502, "grad_norm": 0.8158956823402862, "learning_rate": 3.3349332053435728e-06, "loss": 0.4117, "step": 4170 }, { "epoch": 0.03343973248214014, "grad_norm": 0.7830740452600919, "learning_rate": 3.3429325653947687e-06, "loss": 0.394, "step": 4180 }, { "epoch": 0.03351973184214526, "grad_norm": 0.8744626314539858, "learning_rate": 3.350931925445964e-06, "loss": 0.4229, "step": 4190 }, { "epoch": 0.03359973120215038, "grad_norm": 0.8927610578944701, "learning_rate": 3.3589312854971605e-06, "loss": 0.4582, "step": 4200 }, { "epoch": 0.0336797305621555, "grad_norm": 0.7195985669161646, "learning_rate": 3.3669306455483564e-06, "loss": 0.4335, "step": 4210 }, { "epoch": 0.03375972992216062, "grad_norm": 0.8906544223501964, "learning_rate": 3.374930005599552e-06, "loss": 0.4131, "step": 4220 }, { "epoch": 0.03383972928216574, "grad_norm": 0.7389006577317037, "learning_rate": 3.3829293656507483e-06, "loss": 0.4272, "step": 4230 }, { "epoch": 0.03391972864217086, "grad_norm": 0.742682266765258, "learning_rate": 3.390928725701944e-06, "loss": 0.4418, "step": 4240 }, { "epoch": 0.03399972800217598, "grad_norm": 0.6774990098774326, "learning_rate": 3.3989280857531397e-06, "loss": 0.4194, "step": 4250 }, { "epoch": 0.0340797273621811, "grad_norm": 0.9017975115557092, "learning_rate": 3.406927445804336e-06, "loss": 0.4148, "step": 4260 }, { "epoch": 0.03415972672218622, "grad_norm": 0.7641682976170854, "learning_rate": 3.414926805855532e-06, "loss": 0.4015, "step": 4270 }, { "epoch": 0.03423972608219134, "grad_norm": 1.7976847080628966, "learning_rate": 3.4229261659067274e-06, "loss": 0.4218, "step": 4280 }, { "epoch": 0.03431972544219646, "grad_norm": 0.7912987344925629, "learning_rate": 3.4309255259579237e-06, "loss": 0.4273, "step": 4290 }, { "epoch": 0.03439972480220158, "grad_norm": 0.865219190435078, "learning_rate": 3.4389248860091197e-06, "loss": 0.4155, "step": 4300 }, { "epoch": 0.0344797241622067, "grad_norm": 0.7247465267197502, "learning_rate": 3.446924246060315e-06, "loss": 0.4354, "step": 4310 }, { "epoch": 0.03455972352221182, "grad_norm": 0.7302003644690978, "learning_rate": 3.4549236061115115e-06, "loss": 0.4202, "step": 4320 }, { "epoch": 0.03463972288221694, "grad_norm": 0.7687470691197218, "learning_rate": 3.4629229661627074e-06, "loss": 0.4162, "step": 4330 }, { "epoch": 0.03471972224222206, "grad_norm": 0.8026747308203839, "learning_rate": 3.470922326213903e-06, "loss": 0.4168, "step": 4340 }, { "epoch": 0.034799721602227184, "grad_norm": 0.7280393352379876, "learning_rate": 3.4789216862650992e-06, "loss": 0.4196, "step": 4350 }, { "epoch": 0.034879720962232304, "grad_norm": 0.7564739490939029, "learning_rate": 3.486921046316295e-06, "loss": 0.4385, "step": 4360 }, { "epoch": 0.034959720322237424, "grad_norm": 0.8650076738362572, "learning_rate": 3.4949204063674906e-06, "loss": 0.4092, "step": 4370 }, { "epoch": 0.035039719682242544, "grad_norm": 0.7393962081468373, "learning_rate": 3.502919766418687e-06, "loss": 0.424, "step": 4380 }, { "epoch": 0.035119719042247664, "grad_norm": 0.9059758990047778, "learning_rate": 3.510919126469883e-06, "loss": 0.4222, "step": 4390 }, { "epoch": 0.035199718402252785, "grad_norm": 0.7241080903311046, "learning_rate": 3.5189184865210784e-06, "loss": 0.4176, "step": 4400 }, { "epoch": 0.035279717762257905, "grad_norm": 0.7393352165228971, "learning_rate": 3.5269178465722747e-06, "loss": 0.4193, "step": 4410 }, { "epoch": 0.035359717122263025, "grad_norm": 0.7178879442718987, "learning_rate": 3.5349172066234706e-06, "loss": 0.4219, "step": 4420 }, { "epoch": 0.035439716482268145, "grad_norm": 0.8075219805081396, "learning_rate": 3.542916566674666e-06, "loss": 0.4147, "step": 4430 }, { "epoch": 0.03551971584227326, "grad_norm": 1.2127017079493791, "learning_rate": 3.5509159267258625e-06, "loss": 0.4132, "step": 4440 }, { "epoch": 0.03559971520227838, "grad_norm": 0.7192010045742855, "learning_rate": 3.5589152867770584e-06, "loss": 0.411, "step": 4450 }, { "epoch": 0.0356797145622835, "grad_norm": 0.9428723692964156, "learning_rate": 3.566914646828254e-06, "loss": 0.4236, "step": 4460 }, { "epoch": 0.03575971392228862, "grad_norm": 0.8614292082386968, "learning_rate": 3.57491400687945e-06, "loss": 0.4213, "step": 4470 }, { "epoch": 0.03583971328229374, "grad_norm": 0.6673883560889204, "learning_rate": 3.582913366930646e-06, "loss": 0.4075, "step": 4480 }, { "epoch": 0.03591971264229886, "grad_norm": 0.9643089999372365, "learning_rate": 3.5909127269818416e-06, "loss": 0.4546, "step": 4490 }, { "epoch": 0.03599971200230398, "grad_norm": 0.9297826294459585, "learning_rate": 3.5989120870330375e-06, "loss": 0.4172, "step": 4500 }, { "epoch": 0.0360797113623091, "grad_norm": 0.7557684985300384, "learning_rate": 3.606911447084234e-06, "loss": 0.4105, "step": 4510 }, { "epoch": 0.03615971072231422, "grad_norm": 0.7931119899775526, "learning_rate": 3.6149108071354294e-06, "loss": 0.4123, "step": 4520 }, { "epoch": 0.03623971008231934, "grad_norm": 0.797325353191407, "learning_rate": 3.6229101671866253e-06, "loss": 0.4347, "step": 4530 }, { "epoch": 0.03631970944232446, "grad_norm": 0.74214327307857, "learning_rate": 3.6309095272378216e-06, "loss": 0.4212, "step": 4540 }, { "epoch": 0.03639970880232958, "grad_norm": 0.7681829935013766, "learning_rate": 3.638908887289017e-06, "loss": 0.4049, "step": 4550 }, { "epoch": 0.0364797081623347, "grad_norm": 0.8110044800452652, "learning_rate": 3.646908247340213e-06, "loss": 0.4152, "step": 4560 }, { "epoch": 0.03655970752233982, "grad_norm": 0.7505043518350412, "learning_rate": 3.654907607391409e-06, "loss": 0.4284, "step": 4570 }, { "epoch": 0.03663970688234494, "grad_norm": 0.7877830350035486, "learning_rate": 3.662906967442605e-06, "loss": 0.4329, "step": 4580 }, { "epoch": 0.03671970624235006, "grad_norm": 0.6812701461168307, "learning_rate": 3.6709063274938008e-06, "loss": 0.4173, "step": 4590 }, { "epoch": 0.03679970560235518, "grad_norm": 0.8001631177482795, "learning_rate": 3.6789056875449967e-06, "loss": 0.4331, "step": 4600 }, { "epoch": 0.0368797049623603, "grad_norm": 0.7895666869661798, "learning_rate": 3.6869050475961926e-06, "loss": 0.4033, "step": 4610 }, { "epoch": 0.03695970432236542, "grad_norm": 0.8173983494560131, "learning_rate": 3.6949044076473885e-06, "loss": 0.4164, "step": 4620 }, { "epoch": 0.03703970368237054, "grad_norm": 0.8378313154048689, "learning_rate": 3.7029037676985844e-06, "loss": 0.3991, "step": 4630 }, { "epoch": 0.03711970304237566, "grad_norm": 0.9935623637553458, "learning_rate": 3.7109031277497804e-06, "loss": 0.4162, "step": 4640 }, { "epoch": 0.03719970240238078, "grad_norm": 0.8245648280013409, "learning_rate": 3.7189024878009763e-06, "loss": 0.4269, "step": 4650 }, { "epoch": 0.0372797017623859, "grad_norm": 0.8268507152029221, "learning_rate": 3.726901847852172e-06, "loss": 0.4169, "step": 4660 }, { "epoch": 0.03735970112239102, "grad_norm": 0.7781820643589694, "learning_rate": 3.734901207903368e-06, "loss": 0.4277, "step": 4670 }, { "epoch": 0.03743970048239614, "grad_norm": 0.9906594923724112, "learning_rate": 3.7429005679545636e-06, "loss": 0.4402, "step": 4680 }, { "epoch": 0.03751969984240126, "grad_norm": 0.7088718853595577, "learning_rate": 3.75089992800576e-06, "loss": 0.4065, "step": 4690 }, { "epoch": 0.037599699202406384, "grad_norm": 0.7737684662407376, "learning_rate": 3.758899288056956e-06, "loss": 0.43, "step": 4700 }, { "epoch": 0.037679698562411504, "grad_norm": 0.6799556259390402, "learning_rate": 3.7668986481081513e-06, "loss": 0.4269, "step": 4710 }, { "epoch": 0.037759697922416624, "grad_norm": 0.8353980941780943, "learning_rate": 3.7748980081593477e-06, "loss": 0.4292, "step": 4720 }, { "epoch": 0.03783969728242174, "grad_norm": 0.9240909978256971, "learning_rate": 3.7828973682105436e-06, "loss": 0.4218, "step": 4730 }, { "epoch": 0.03791969664242686, "grad_norm": 0.793600464236656, "learning_rate": 3.790896728261739e-06, "loss": 0.4066, "step": 4740 }, { "epoch": 0.03799969600243198, "grad_norm": 0.853623661723338, "learning_rate": 3.7988960883129354e-06, "loss": 0.3968, "step": 4750 }, { "epoch": 0.0380796953624371, "grad_norm": 0.9308117082987841, "learning_rate": 3.8068954483641313e-06, "loss": 0.4127, "step": 4760 }, { "epoch": 0.03815969472244222, "grad_norm": 0.767583639173416, "learning_rate": 3.814894808415327e-06, "loss": 0.405, "step": 4770 }, { "epoch": 0.03823969408244734, "grad_norm": 0.7248396423731486, "learning_rate": 3.822894168466522e-06, "loss": 0.4049, "step": 4780 }, { "epoch": 0.03831969344245246, "grad_norm": 0.7835674270723467, "learning_rate": 3.830893528517719e-06, "loss": 0.4447, "step": 4790 }, { "epoch": 0.03839969280245758, "grad_norm": 0.7336637502161489, "learning_rate": 3.838892888568915e-06, "loss": 0.415, "step": 4800 }, { "epoch": 0.0384796921624627, "grad_norm": 0.8211434186162638, "learning_rate": 3.8468922486201105e-06, "loss": 0.4073, "step": 4810 }, { "epoch": 0.03855969152246782, "grad_norm": 0.6895255239007041, "learning_rate": 3.854891608671307e-06, "loss": 0.4315, "step": 4820 }, { "epoch": 0.03863969088247294, "grad_norm": 0.8404363403282458, "learning_rate": 3.862890968722502e-06, "loss": 0.4041, "step": 4830 }, { "epoch": 0.03871969024247806, "grad_norm": 0.8233254492362007, "learning_rate": 3.870890328773698e-06, "loss": 0.4197, "step": 4840 }, { "epoch": 0.03879968960248318, "grad_norm": 0.7092426325090834, "learning_rate": 3.878889688824894e-06, "loss": 0.415, "step": 4850 }, { "epoch": 0.0388796889624883, "grad_norm": 0.8335378303937002, "learning_rate": 3.8868890488760905e-06, "loss": 0.4213, "step": 4860 }, { "epoch": 0.03895968832249342, "grad_norm": 1.0339208338921473, "learning_rate": 3.894888408927286e-06, "loss": 0.4241, "step": 4870 }, { "epoch": 0.03903968768249854, "grad_norm": 0.7027107330027197, "learning_rate": 3.902887768978482e-06, "loss": 0.4335, "step": 4880 }, { "epoch": 0.03911968704250366, "grad_norm": 0.6792413425728526, "learning_rate": 3.910887129029678e-06, "loss": 0.4, "step": 4890 }, { "epoch": 0.03919968640250878, "grad_norm": 0.8489275951527085, "learning_rate": 3.918886489080873e-06, "loss": 0.4207, "step": 4900 }, { "epoch": 0.0392796857625139, "grad_norm": 0.7357801876777263, "learning_rate": 3.92688584913207e-06, "loss": 0.3994, "step": 4910 }, { "epoch": 0.03935968512251902, "grad_norm": 0.7141792707926279, "learning_rate": 3.934885209183266e-06, "loss": 0.4154, "step": 4920 }, { "epoch": 0.03943968448252414, "grad_norm": 0.6915855315198979, "learning_rate": 3.9428845692344615e-06, "loss": 0.433, "step": 4930 }, { "epoch": 0.03951968384252926, "grad_norm": 0.746029310002541, "learning_rate": 3.950883929285658e-06, "loss": 0.4239, "step": 4940 }, { "epoch": 0.03959968320253438, "grad_norm": 0.7613311051113969, "learning_rate": 3.958883289336853e-06, "loss": 0.4128, "step": 4950 }, { "epoch": 0.0396796825625395, "grad_norm": 0.7738245574431311, "learning_rate": 3.966882649388049e-06, "loss": 0.3933, "step": 4960 }, { "epoch": 0.03975968192254462, "grad_norm": 0.775290020753402, "learning_rate": 3.974882009439245e-06, "loss": 0.4193, "step": 4970 }, { "epoch": 0.03983968128254974, "grad_norm": 0.6385875554512914, "learning_rate": 3.9828813694904415e-06, "loss": 0.4327, "step": 4980 }, { "epoch": 0.03991968064255486, "grad_norm": 0.8273897543410583, "learning_rate": 3.990880729541637e-06, "loss": 0.3948, "step": 4990 }, { "epoch": 0.03999968000255998, "grad_norm": 0.8086324970076214, "learning_rate": 3.998880089592833e-06, "loss": 0.431, "step": 5000 }, { "epoch": 0.0400796793625651, "grad_norm": 0.8410462600158055, "learning_rate": 4.006879449644029e-06, "loss": 0.4375, "step": 5010 }, { "epoch": 0.040159678722570216, "grad_norm": 0.7970093334590466, "learning_rate": 4.014878809695224e-06, "loss": 0.3964, "step": 5020 }, { "epoch": 0.040239678082575336, "grad_norm": 0.8451855140965538, "learning_rate": 4.022878169746421e-06, "loss": 0.4223, "step": 5030 }, { "epoch": 0.04031967744258046, "grad_norm": 0.8037696326031617, "learning_rate": 4.030877529797617e-06, "loss": 0.4013, "step": 5040 }, { "epoch": 0.04039967680258558, "grad_norm": 0.8184764620028723, "learning_rate": 4.0388768898488125e-06, "loss": 0.4108, "step": 5050 }, { "epoch": 0.0404796761625907, "grad_norm": 0.9491635462604268, "learning_rate": 4.046876249900009e-06, "loss": 0.4205, "step": 5060 }, { "epoch": 0.04055967552259582, "grad_norm": 0.7846683380202701, "learning_rate": 4.054875609951204e-06, "loss": 0.4135, "step": 5070 }, { "epoch": 0.04063967488260094, "grad_norm": 0.7742096863296556, "learning_rate": 4.0628749700024e-06, "loss": 0.4201, "step": 5080 }, { "epoch": 0.04071967424260606, "grad_norm": 0.8603867407537333, "learning_rate": 4.070874330053596e-06, "loss": 0.4146, "step": 5090 }, { "epoch": 0.04079967360261118, "grad_norm": 0.8816260253828302, "learning_rate": 4.0788736901047925e-06, "loss": 0.4263, "step": 5100 }, { "epoch": 0.0408796729626163, "grad_norm": 0.7726729093372912, "learning_rate": 4.086873050155988e-06, "loss": 0.4023, "step": 5110 }, { "epoch": 0.04095967232262142, "grad_norm": 0.7029498112628307, "learning_rate": 4.0948724102071835e-06, "loss": 0.4, "step": 5120 }, { "epoch": 0.04103967168262654, "grad_norm": 0.8417772501154345, "learning_rate": 4.10287177025838e-06, "loss": 0.4232, "step": 5130 }, { "epoch": 0.04111967104263166, "grad_norm": 0.7774979644578314, "learning_rate": 4.110871130309575e-06, "loss": 0.4441, "step": 5140 }, { "epoch": 0.04119967040263678, "grad_norm": 0.7424923411566658, "learning_rate": 4.118870490360772e-06, "loss": 0.449, "step": 5150 }, { "epoch": 0.0412796697626419, "grad_norm": 0.8638955839230246, "learning_rate": 4.126869850411967e-06, "loss": 0.4304, "step": 5160 }, { "epoch": 0.04135966912264702, "grad_norm": 0.6725881737180752, "learning_rate": 4.1348692104631635e-06, "loss": 0.3961, "step": 5170 }, { "epoch": 0.04143966848265214, "grad_norm": 0.874148877394667, "learning_rate": 4.142868570514359e-06, "loss": 0.4163, "step": 5180 }, { "epoch": 0.04151966784265726, "grad_norm": 0.7786117262424453, "learning_rate": 4.150867930565555e-06, "loss": 0.3964, "step": 5190 }, { "epoch": 0.04159966720266238, "grad_norm": 0.8826497392107578, "learning_rate": 4.158867290616751e-06, "loss": 0.4242, "step": 5200 }, { "epoch": 0.0416796665626675, "grad_norm": 0.7161328119385545, "learning_rate": 4.166866650667947e-06, "loss": 0.4241, "step": 5210 }, { "epoch": 0.04175966592267262, "grad_norm": 0.7009477661171519, "learning_rate": 4.174866010719143e-06, "loss": 0.4167, "step": 5220 }, { "epoch": 0.04183966528267774, "grad_norm": 0.7634385876905924, "learning_rate": 4.182865370770339e-06, "loss": 0.4115, "step": 5230 }, { "epoch": 0.04191966464268286, "grad_norm": 0.8092533212226951, "learning_rate": 4.1908647308215344e-06, "loss": 0.4458, "step": 5240 }, { "epoch": 0.04199966400268798, "grad_norm": 0.7307358220907433, "learning_rate": 4.198864090872731e-06, "loss": 0.4111, "step": 5250 }, { "epoch": 0.0420796633626931, "grad_norm": 0.8240713560424313, "learning_rate": 4.206863450923926e-06, "loss": 0.4282, "step": 5260 }, { "epoch": 0.04215966272269822, "grad_norm": 0.7285558418634195, "learning_rate": 4.214862810975122e-06, "loss": 0.4169, "step": 5270 }, { "epoch": 0.04223966208270334, "grad_norm": 0.7400665551990516, "learning_rate": 4.222862171026318e-06, "loss": 0.415, "step": 5280 }, { "epoch": 0.04231966144270846, "grad_norm": 0.6992384256270314, "learning_rate": 4.2308615310775144e-06, "loss": 0.3923, "step": 5290 }, { "epoch": 0.042399660802713575, "grad_norm": 0.7596287545876239, "learning_rate": 4.23886089112871e-06, "loss": 0.4332, "step": 5300 }, { "epoch": 0.042479660162718695, "grad_norm": 0.6993588489525644, "learning_rate": 4.246860251179906e-06, "loss": 0.411, "step": 5310 }, { "epoch": 0.042559659522723815, "grad_norm": 0.7163819865765352, "learning_rate": 4.254859611231102e-06, "loss": 0.404, "step": 5320 }, { "epoch": 0.042639658882728936, "grad_norm": 0.7137747559848004, "learning_rate": 4.262858971282297e-06, "loss": 0.4163, "step": 5330 }, { "epoch": 0.042719658242734056, "grad_norm": 0.7411317996876314, "learning_rate": 4.270858331333494e-06, "loss": 0.4449, "step": 5340 }, { "epoch": 0.042799657602739176, "grad_norm": 0.8612548649441135, "learning_rate": 4.27885769138469e-06, "loss": 0.4151, "step": 5350 }, { "epoch": 0.042879656962744296, "grad_norm": 0.8160447659427233, "learning_rate": 4.286857051435885e-06, "loss": 0.4069, "step": 5360 }, { "epoch": 0.042959656322749416, "grad_norm": 0.7116797296203955, "learning_rate": 4.294856411487082e-06, "loss": 0.4203, "step": 5370 }, { "epoch": 0.043039655682754537, "grad_norm": 0.7727639601627679, "learning_rate": 4.302855771538277e-06, "loss": 0.4046, "step": 5380 }, { "epoch": 0.04311965504275966, "grad_norm": 0.8791872746640758, "learning_rate": 4.310855131589473e-06, "loss": 0.4379, "step": 5390 }, { "epoch": 0.04319965440276478, "grad_norm": 0.7659239499980519, "learning_rate": 4.318854491640669e-06, "loss": 0.3981, "step": 5400 }, { "epoch": 0.0432796537627699, "grad_norm": 0.7057200335964391, "learning_rate": 4.326853851691865e-06, "loss": 0.3914, "step": 5410 }, { "epoch": 0.04335965312277502, "grad_norm": 0.8364791780833386, "learning_rate": 4.334853211743061e-06, "loss": 0.4323, "step": 5420 }, { "epoch": 0.04343965248278014, "grad_norm": 0.7230418038628961, "learning_rate": 4.342852571794256e-06, "loss": 0.428, "step": 5430 }, { "epoch": 0.04351965184278526, "grad_norm": 0.8017011119605163, "learning_rate": 4.350851931845453e-06, "loss": 0.4329, "step": 5440 }, { "epoch": 0.04359965120279038, "grad_norm": 0.8224464314945057, "learning_rate": 4.358851291896648e-06, "loss": 0.4188, "step": 5450 }, { "epoch": 0.0436796505627955, "grad_norm": 0.8487141311391908, "learning_rate": 4.3668506519478446e-06, "loss": 0.4109, "step": 5460 }, { "epoch": 0.04375964992280062, "grad_norm": 0.8362819787833997, "learning_rate": 4.374850011999041e-06, "loss": 0.4041, "step": 5470 }, { "epoch": 0.04383964928280574, "grad_norm": 0.8413708798880616, "learning_rate": 4.382849372050236e-06, "loss": 0.4143, "step": 5480 }, { "epoch": 0.04391964864281086, "grad_norm": 0.7778695561186965, "learning_rate": 4.390848732101432e-06, "loss": 0.437, "step": 5490 }, { "epoch": 0.04399964800281598, "grad_norm": 0.7070477839585702, "learning_rate": 4.398848092152628e-06, "loss": 0.4415, "step": 5500 }, { "epoch": 0.0440796473628211, "grad_norm": 0.7643170244395198, "learning_rate": 4.406847452203824e-06, "loss": 0.4151, "step": 5510 }, { "epoch": 0.04415964672282622, "grad_norm": 0.8180361972664614, "learning_rate": 4.41484681225502e-06, "loss": 0.402, "step": 5520 }, { "epoch": 0.04423964608283134, "grad_norm": 0.8400739915393374, "learning_rate": 4.422846172306216e-06, "loss": 0.4415, "step": 5530 }, { "epoch": 0.04431964544283646, "grad_norm": 0.887791122205854, "learning_rate": 4.430845532357412e-06, "loss": 0.446, "step": 5540 }, { "epoch": 0.04439964480284158, "grad_norm": 0.7922315907335036, "learning_rate": 4.438844892408607e-06, "loss": 0.4345, "step": 5550 }, { "epoch": 0.0444796441628467, "grad_norm": 0.8398474963475308, "learning_rate": 4.446844252459804e-06, "loss": 0.4154, "step": 5560 }, { "epoch": 0.04455964352285182, "grad_norm": 0.718096003504427, "learning_rate": 4.454843612510999e-06, "loss": 0.4025, "step": 5570 }, { "epoch": 0.04463964288285694, "grad_norm": 0.7465946590158571, "learning_rate": 4.4628429725621956e-06, "loss": 0.4077, "step": 5580 }, { "epoch": 0.044719642242862054, "grad_norm": 0.7496879152039617, "learning_rate": 4.470842332613392e-06, "loss": 0.4012, "step": 5590 }, { "epoch": 0.044799641602867174, "grad_norm": 0.7764065193469302, "learning_rate": 4.478841692664587e-06, "loss": 0.4188, "step": 5600 }, { "epoch": 0.044879640962872294, "grad_norm": 0.7909490842171853, "learning_rate": 4.486841052715783e-06, "loss": 0.4313, "step": 5610 }, { "epoch": 0.044959640322877414, "grad_norm": 0.6033961397381622, "learning_rate": 4.494840412766979e-06, "loss": 0.4099, "step": 5620 }, { "epoch": 0.045039639682882535, "grad_norm": 0.8328308195864031, "learning_rate": 4.502839772818175e-06, "loss": 0.4568, "step": 5630 }, { "epoch": 0.045119639042887655, "grad_norm": 0.8906590062569159, "learning_rate": 4.510839132869371e-06, "loss": 0.4105, "step": 5640 }, { "epoch": 0.045199638402892775, "grad_norm": 0.7810846642007622, "learning_rate": 4.518838492920567e-06, "loss": 0.418, "step": 5650 }, { "epoch": 0.045279637762897895, "grad_norm": 0.7691276895060195, "learning_rate": 4.526837852971763e-06, "loss": 0.4189, "step": 5660 }, { "epoch": 0.045359637122903015, "grad_norm": 0.8163696329870775, "learning_rate": 4.534837213022958e-06, "loss": 0.4173, "step": 5670 }, { "epoch": 0.045439636482908136, "grad_norm": 0.7062465072016096, "learning_rate": 4.542836573074155e-06, "loss": 0.4207, "step": 5680 }, { "epoch": 0.045519635842913256, "grad_norm": 0.805646012024512, "learning_rate": 4.55083593312535e-06, "loss": 0.4162, "step": 5690 }, { "epoch": 0.045599635202918376, "grad_norm": 0.7447422817137118, "learning_rate": 4.5588352931765465e-06, "loss": 0.3986, "step": 5700 }, { "epoch": 0.045679634562923496, "grad_norm": 0.6741792033954532, "learning_rate": 4.566834653227742e-06, "loss": 0.4118, "step": 5710 }, { "epoch": 0.045759633922928616, "grad_norm": 0.7997986814284688, "learning_rate": 4.574834013278938e-06, "loss": 0.401, "step": 5720 }, { "epoch": 0.04583963328293374, "grad_norm": 0.7879026350578152, "learning_rate": 4.582833373330134e-06, "loss": 0.4373, "step": 5730 }, { "epoch": 0.04591963264293886, "grad_norm": 0.7607099727514617, "learning_rate": 4.590832733381329e-06, "loss": 0.4073, "step": 5740 }, { "epoch": 0.04599963200294398, "grad_norm": 0.8672209118162942, "learning_rate": 4.598832093432526e-06, "loss": 0.4185, "step": 5750 }, { "epoch": 0.0460796313629491, "grad_norm": 0.7405726490446888, "learning_rate": 4.606831453483722e-06, "loss": 0.423, "step": 5760 }, { "epoch": 0.04615963072295422, "grad_norm": 0.9234040419576202, "learning_rate": 4.6148308135349175e-06, "loss": 0.4209, "step": 5770 }, { "epoch": 0.04623963008295934, "grad_norm": 0.769509110439197, "learning_rate": 4.622830173586114e-06, "loss": 0.43, "step": 5780 }, { "epoch": 0.04631962944296446, "grad_norm": 0.8047670046262847, "learning_rate": 4.630829533637309e-06, "loss": 0.4345, "step": 5790 }, { "epoch": 0.04639962880296958, "grad_norm": 0.8114822324502435, "learning_rate": 4.638828893688505e-06, "loss": 0.4302, "step": 5800 }, { "epoch": 0.0464796281629747, "grad_norm": 0.7322480343462014, "learning_rate": 4.646828253739701e-06, "loss": 0.4469, "step": 5810 }, { "epoch": 0.04655962752297982, "grad_norm": 0.7981121876596042, "learning_rate": 4.654827613790897e-06, "loss": 0.4394, "step": 5820 }, { "epoch": 0.04663962688298494, "grad_norm": 0.7784696958518791, "learning_rate": 4.662826973842093e-06, "loss": 0.4443, "step": 5830 }, { "epoch": 0.04671962624299006, "grad_norm": 0.750230434133907, "learning_rate": 4.670826333893289e-06, "loss": 0.4549, "step": 5840 }, { "epoch": 0.04679962560299518, "grad_norm": 0.8676530099323116, "learning_rate": 4.678825693944485e-06, "loss": 0.4113, "step": 5850 }, { "epoch": 0.0468796249630003, "grad_norm": 0.8813119689815591, "learning_rate": 4.68682505399568e-06, "loss": 0.4328, "step": 5860 }, { "epoch": 0.04695962432300541, "grad_norm": 0.8130208223775043, "learning_rate": 4.694824414046877e-06, "loss": 0.4027, "step": 5870 }, { "epoch": 0.04703962368301053, "grad_norm": 0.7991446551205977, "learning_rate": 4.702823774098072e-06, "loss": 0.426, "step": 5880 }, { "epoch": 0.04711962304301565, "grad_norm": 0.8447827866752773, "learning_rate": 4.7108231341492685e-06, "loss": 0.4198, "step": 5890 }, { "epoch": 0.04719962240302077, "grad_norm": 0.7913094599735477, "learning_rate": 4.718822494200465e-06, "loss": 0.3972, "step": 5900 }, { "epoch": 0.04727962176302589, "grad_norm": 0.8134074721024743, "learning_rate": 4.72682185425166e-06, "loss": 0.4232, "step": 5910 }, { "epoch": 0.047359621123031014, "grad_norm": 0.7983831019472948, "learning_rate": 4.734821214302856e-06, "loss": 0.4367, "step": 5920 }, { "epoch": 0.047439620483036134, "grad_norm": 0.7549222598476109, "learning_rate": 4.742820574354052e-06, "loss": 0.4437, "step": 5930 }, { "epoch": 0.047519619843041254, "grad_norm": 0.7378301144776566, "learning_rate": 4.750819934405248e-06, "loss": 0.4443, "step": 5940 }, { "epoch": 0.047599619203046374, "grad_norm": 0.9051931133289314, "learning_rate": 4.758819294456444e-06, "loss": 0.4018, "step": 5950 }, { "epoch": 0.047679618563051494, "grad_norm": 0.7044298339856356, "learning_rate": 4.76681865450764e-06, "loss": 0.399, "step": 5960 }, { "epoch": 0.047759617923056615, "grad_norm": 0.695409116907069, "learning_rate": 4.774818014558836e-06, "loss": 0.4245, "step": 5970 }, { "epoch": 0.047839617283061735, "grad_norm": 0.6636800326886599, "learning_rate": 4.782817374610031e-06, "loss": 0.4023, "step": 5980 }, { "epoch": 0.047919616643066855, "grad_norm": 0.8216151070494467, "learning_rate": 4.790816734661228e-06, "loss": 0.4262, "step": 5990 }, { "epoch": 0.047999616003071975, "grad_norm": 0.8223759231547197, "learning_rate": 4.798816094712423e-06, "loss": 0.4398, "step": 6000 }, { "epoch": 0.048079615363077095, "grad_norm": 0.8033000887009285, "learning_rate": 4.8068154547636195e-06, "loss": 0.4158, "step": 6010 }, { "epoch": 0.048159614723082216, "grad_norm": 0.7342902983433318, "learning_rate": 4.814814814814815e-06, "loss": 0.4126, "step": 6020 }, { "epoch": 0.048239614083087336, "grad_norm": 0.7634000835388248, "learning_rate": 4.822814174866011e-06, "loss": 0.4352, "step": 6030 }, { "epoch": 0.048319613443092456, "grad_norm": 0.7248483852253137, "learning_rate": 4.830813534917207e-06, "loss": 0.3845, "step": 6040 }, { "epoch": 0.048399612803097576, "grad_norm": 0.8854204490346038, "learning_rate": 4.838812894968402e-06, "loss": 0.4019, "step": 6050 }, { "epoch": 0.048479612163102696, "grad_norm": 0.8747182560718072, "learning_rate": 4.846812255019599e-06, "loss": 0.4339, "step": 6060 }, { "epoch": 0.04855961152310782, "grad_norm": 0.8580183945201533, "learning_rate": 4.854811615070795e-06, "loss": 0.4173, "step": 6070 }, { "epoch": 0.04863961088311294, "grad_norm": 0.7762011122451777, "learning_rate": 4.8628109751219905e-06, "loss": 0.4289, "step": 6080 }, { "epoch": 0.04871961024311806, "grad_norm": 0.6391251991892131, "learning_rate": 4.870810335173187e-06, "loss": 0.4284, "step": 6090 }, { "epoch": 0.04879960960312318, "grad_norm": 1.004742911052753, "learning_rate": 4.878809695224382e-06, "loss": 0.4566, "step": 6100 }, { "epoch": 0.0488796089631283, "grad_norm": 0.8925421525049868, "learning_rate": 4.886809055275578e-06, "loss": 0.403, "step": 6110 }, { "epoch": 0.04895960832313342, "grad_norm": 0.6794827219927263, "learning_rate": 4.894808415326774e-06, "loss": 0.4147, "step": 6120 }, { "epoch": 0.04903960768313854, "grad_norm": 0.7499724194341333, "learning_rate": 4.9028077753779705e-06, "loss": 0.4175, "step": 6130 }, { "epoch": 0.04911960704314366, "grad_norm": 0.7400658598365715, "learning_rate": 4.910807135429166e-06, "loss": 0.4299, "step": 6140 }, { "epoch": 0.04919960640314878, "grad_norm": 1.7983549292526597, "learning_rate": 4.918806495480362e-06, "loss": 0.4116, "step": 6150 }, { "epoch": 0.04927960576315389, "grad_norm": 1.9938733421374386, "learning_rate": 4.926805855531558e-06, "loss": 0.4002, "step": 6160 }, { "epoch": 0.04935960512315901, "grad_norm": 0.9474486566622513, "learning_rate": 4.934805215582753e-06, "loss": 0.4008, "step": 6170 }, { "epoch": 0.04943960448316413, "grad_norm": 0.8307656516497727, "learning_rate": 4.94280457563395e-06, "loss": 0.4174, "step": 6180 }, { "epoch": 0.04951960384316925, "grad_norm": 0.6713444006588423, "learning_rate": 4.950803935685146e-06, "loss": 0.4392, "step": 6190 }, { "epoch": 0.04959960320317437, "grad_norm": 0.755564469281041, "learning_rate": 4.9588032957363415e-06, "loss": 0.428, "step": 6200 }, { "epoch": 0.04967960256317949, "grad_norm": 0.7536154744672062, "learning_rate": 4.966802655787538e-06, "loss": 0.4229, "step": 6210 }, { "epoch": 0.04975960192318461, "grad_norm": 0.8857434820048017, "learning_rate": 4.974802015838733e-06, "loss": 0.433, "step": 6220 }, { "epoch": 0.04983960128318973, "grad_norm": 0.7675174058350489, "learning_rate": 4.982801375889929e-06, "loss": 0.4136, "step": 6230 }, { "epoch": 0.04991960064319485, "grad_norm": 0.798037042114789, "learning_rate": 4.990800735941125e-06, "loss": 0.4163, "step": 6240 }, { "epoch": 0.04999960000319997, "grad_norm": 0.7516994857111614, "learning_rate": 4.9988000959923215e-06, "loss": 0.4143, "step": 6250 }, { "epoch": 0.05007959936320509, "grad_norm": 0.8302356173670787, "learning_rate": 5.006799456043517e-06, "loss": 0.4215, "step": 6260 }, { "epoch": 0.050159598723210214, "grad_norm": 0.797675966096359, "learning_rate": 5.0147988160947125e-06, "loss": 0.4195, "step": 6270 }, { "epoch": 0.050239598083215334, "grad_norm": 0.8644161647231903, "learning_rate": 5.022798176145909e-06, "loss": 0.4076, "step": 6280 }, { "epoch": 0.050319597443220454, "grad_norm": 0.8098154203555272, "learning_rate": 5.030797536197105e-06, "loss": 0.4534, "step": 6290 }, { "epoch": 0.050399596803225574, "grad_norm": 0.7056791640318798, "learning_rate": 5.038796896248301e-06, "loss": 0.4236, "step": 6300 }, { "epoch": 0.050479596163230694, "grad_norm": 0.8249471510785888, "learning_rate": 5.046796256299497e-06, "loss": 0.4291, "step": 6310 }, { "epoch": 0.050559595523235815, "grad_norm": 0.7222395321714273, "learning_rate": 5.0547956163506925e-06, "loss": 0.4233, "step": 6320 }, { "epoch": 0.050639594883240935, "grad_norm": 0.7057643939954904, "learning_rate": 5.062794976401888e-06, "loss": 0.4213, "step": 6330 }, { "epoch": 0.050719594243246055, "grad_norm": 0.8264437067679866, "learning_rate": 5.070794336453084e-06, "loss": 0.4077, "step": 6340 }, { "epoch": 0.050799593603251175, "grad_norm": 1.0388607693903051, "learning_rate": 5.078793696504281e-06, "loss": 0.4366, "step": 6350 }, { "epoch": 0.050879592963256295, "grad_norm": 0.8080012806728645, "learning_rate": 5.086793056555476e-06, "loss": 0.4225, "step": 6360 }, { "epoch": 0.050959592323261416, "grad_norm": 0.7476369787987285, "learning_rate": 5.094792416606672e-06, "loss": 0.407, "step": 6370 }, { "epoch": 0.051039591683266536, "grad_norm": 0.9679185864544443, "learning_rate": 5.102791776657868e-06, "loss": 0.426, "step": 6380 }, { "epoch": 0.051119591043271656, "grad_norm": 0.6875368812585673, "learning_rate": 5.1107911367090634e-06, "loss": 0.3896, "step": 6390 }, { "epoch": 0.051199590403276776, "grad_norm": 0.7386358027938409, "learning_rate": 5.11879049676026e-06, "loss": 0.417, "step": 6400 }, { "epoch": 0.051279589763281896, "grad_norm": 0.9381132321231814, "learning_rate": 5.126789856811456e-06, "loss": 0.4305, "step": 6410 }, { "epoch": 0.05135958912328702, "grad_norm": 0.8239862096758729, "learning_rate": 5.134789216862652e-06, "loss": 0.396, "step": 6420 }, { "epoch": 0.05143958848329214, "grad_norm": 0.7476020449143346, "learning_rate": 5.142788576913847e-06, "loss": 0.4171, "step": 6430 }, { "epoch": 0.05151958784329726, "grad_norm": 0.7242757879836423, "learning_rate": 5.1507879369650434e-06, "loss": 0.4125, "step": 6440 }, { "epoch": 0.05159958720330237, "grad_norm": 0.7861421746357671, "learning_rate": 5.158787297016239e-06, "loss": 0.4101, "step": 6450 }, { "epoch": 0.05167958656330749, "grad_norm": 0.824212383214468, "learning_rate": 5.166786657067435e-06, "loss": 0.4244, "step": 6460 }, { "epoch": 0.05175958592331261, "grad_norm": 0.7955206953941191, "learning_rate": 5.174786017118631e-06, "loss": 0.4216, "step": 6470 }, { "epoch": 0.05183958528331773, "grad_norm": 0.8316708836999984, "learning_rate": 5.182785377169826e-06, "loss": 0.4342, "step": 6480 }, { "epoch": 0.05191958464332285, "grad_norm": 0.8272258906930956, "learning_rate": 5.190784737221023e-06, "loss": 0.414, "step": 6490 }, { "epoch": 0.05199958400332797, "grad_norm": 0.6823043581615391, "learning_rate": 5.198784097272218e-06, "loss": 0.3984, "step": 6500 }, { "epoch": 0.05207958336333309, "grad_norm": 0.8817831373319156, "learning_rate": 5.206783457323414e-06, "loss": 0.4466, "step": 6510 }, { "epoch": 0.05215958272333821, "grad_norm": 0.746756423921006, "learning_rate": 5.214782817374611e-06, "loss": 0.4, "step": 6520 }, { "epoch": 0.05223958208334333, "grad_norm": 0.8254132469292718, "learning_rate": 5.222782177425806e-06, "loss": 0.4243, "step": 6530 }, { "epoch": 0.05231958144334845, "grad_norm": 0.7483453961652315, "learning_rate": 5.230781537477002e-06, "loss": 0.4029, "step": 6540 }, { "epoch": 0.05239958080335357, "grad_norm": 0.6692314580646245, "learning_rate": 5.238780897528198e-06, "loss": 0.3966, "step": 6550 }, { "epoch": 0.05247958016335869, "grad_norm": 0.8743442425248779, "learning_rate": 5.2467802575793936e-06, "loss": 0.3911, "step": 6560 }, { "epoch": 0.05255957952336381, "grad_norm": 0.7300020550439825, "learning_rate": 5.25477961763059e-06, "loss": 0.4129, "step": 6570 }, { "epoch": 0.05263957888336893, "grad_norm": 0.8276407888138613, "learning_rate": 5.262778977681786e-06, "loss": 0.4099, "step": 6580 }, { "epoch": 0.05271957824337405, "grad_norm": 0.8407987704851236, "learning_rate": 5.270778337732981e-06, "loss": 0.4128, "step": 6590 }, { "epoch": 0.05279957760337917, "grad_norm": 0.7184040081887624, "learning_rate": 5.278777697784177e-06, "loss": 0.4013, "step": 6600 }, { "epoch": 0.052879576963384294, "grad_norm": 0.7928520992660366, "learning_rate": 5.2867770578353736e-06, "loss": 0.4344, "step": 6610 }, { "epoch": 0.052959576323389414, "grad_norm": 0.7836033016591775, "learning_rate": 5.294776417886569e-06, "loss": 0.4232, "step": 6620 }, { "epoch": 0.053039575683394534, "grad_norm": 0.7842755586264969, "learning_rate": 5.302775777937765e-06, "loss": 0.414, "step": 6630 }, { "epoch": 0.053119575043399654, "grad_norm": 0.7662370492795886, "learning_rate": 5.310775137988962e-06, "loss": 0.4137, "step": 6640 }, { "epoch": 0.053199574403404774, "grad_norm": 0.7518825044624718, "learning_rate": 5.318774498040156e-06, "loss": 0.4172, "step": 6650 }, { "epoch": 0.053279573763409895, "grad_norm": 0.7486588083347894, "learning_rate": 5.326773858091353e-06, "loss": 0.4121, "step": 6660 }, { "epoch": 0.053359573123415015, "grad_norm": 0.7918996874502444, "learning_rate": 5.334773218142549e-06, "loss": 0.4354, "step": 6670 }, { "epoch": 0.053439572483420135, "grad_norm": 0.772870515190994, "learning_rate": 5.3427725781937446e-06, "loss": 0.4205, "step": 6680 }, { "epoch": 0.053519571843425255, "grad_norm": 0.7823289026439282, "learning_rate": 5.350771938244941e-06, "loss": 0.393, "step": 6690 }, { "epoch": 0.053599571203430375, "grad_norm": 0.7683507502459773, "learning_rate": 5.358771298296137e-06, "loss": 0.407, "step": 6700 }, { "epoch": 0.053679570563435496, "grad_norm": 0.7485237765758522, "learning_rate": 5.366770658347332e-06, "loss": 0.4232, "step": 6710 }, { "epoch": 0.053759569923440616, "grad_norm": 0.8048908909291014, "learning_rate": 5.374770018398528e-06, "loss": 0.4149, "step": 6720 }, { "epoch": 0.05383956928344573, "grad_norm": 0.8634183753228784, "learning_rate": 5.3827693784497246e-06, "loss": 0.4277, "step": 6730 }, { "epoch": 0.05391956864345085, "grad_norm": 0.8629550886011823, "learning_rate": 5.39076873850092e-06, "loss": 0.4211, "step": 6740 }, { "epoch": 0.05399956800345597, "grad_norm": 0.9926210686947936, "learning_rate": 5.398768098552116e-06, "loss": 0.4319, "step": 6750 }, { "epoch": 0.05407956736346109, "grad_norm": 0.9544796860209203, "learning_rate": 5.406767458603313e-06, "loss": 0.4144, "step": 6760 }, { "epoch": 0.05415956672346621, "grad_norm": 0.7141430119291869, "learning_rate": 5.414766818654507e-06, "loss": 0.4198, "step": 6770 }, { "epoch": 0.05423956608347133, "grad_norm": 0.8163443807047818, "learning_rate": 5.422766178705704e-06, "loss": 0.4282, "step": 6780 }, { "epoch": 0.05431956544347645, "grad_norm": 0.8980133033882974, "learning_rate": 5.4307655387569e-06, "loss": 0.4271, "step": 6790 }, { "epoch": 0.05439956480348157, "grad_norm": 0.7583205706681612, "learning_rate": 5.4387648988080955e-06, "loss": 0.4215, "step": 6800 }, { "epoch": 0.05447956416348669, "grad_norm": 0.8601786387256353, "learning_rate": 5.446764258859292e-06, "loss": 0.4287, "step": 6810 }, { "epoch": 0.05455956352349181, "grad_norm": 0.7146015163098371, "learning_rate": 5.454763618910488e-06, "loss": 0.4394, "step": 6820 }, { "epoch": 0.05463956288349693, "grad_norm": 0.7068819896578783, "learning_rate": 5.462762978961683e-06, "loss": 0.4141, "step": 6830 }, { "epoch": 0.05471956224350205, "grad_norm": 0.8181889256730985, "learning_rate": 5.470762339012879e-06, "loss": 0.4449, "step": 6840 }, { "epoch": 0.05479956160350717, "grad_norm": 0.8489011359815101, "learning_rate": 5.4787616990640755e-06, "loss": 0.4149, "step": 6850 }, { "epoch": 0.05487956096351229, "grad_norm": 0.7364447466975118, "learning_rate": 5.486761059115271e-06, "loss": 0.4218, "step": 6860 }, { "epoch": 0.05495956032351741, "grad_norm": 0.8275679029633717, "learning_rate": 5.494760419166467e-06, "loss": 0.4571, "step": 6870 }, { "epoch": 0.05503955968352253, "grad_norm": 1.375677984473118, "learning_rate": 5.502759779217664e-06, "loss": 0.4365, "step": 6880 }, { "epoch": 0.05511955904352765, "grad_norm": 0.8476301196477392, "learning_rate": 5.510759139268858e-06, "loss": 0.4052, "step": 6890 }, { "epoch": 0.05519955840353277, "grad_norm": 0.7646104536188305, "learning_rate": 5.518758499320055e-06, "loss": 0.4249, "step": 6900 }, { "epoch": 0.05527955776353789, "grad_norm": 0.7287145741603691, "learning_rate": 5.526757859371251e-06, "loss": 0.4264, "step": 6910 }, { "epoch": 0.05535955712354301, "grad_norm": 0.7258483395543417, "learning_rate": 5.5347572194224465e-06, "loss": 0.4057, "step": 6920 }, { "epoch": 0.05543955648354813, "grad_norm": 0.6949135968161559, "learning_rate": 5.542756579473643e-06, "loss": 0.4111, "step": 6930 }, { "epoch": 0.05551955584355325, "grad_norm": 0.7819325827595374, "learning_rate": 5.550755939524839e-06, "loss": 0.4278, "step": 6940 }, { "epoch": 0.05559955520355837, "grad_norm": 0.7316623226922514, "learning_rate": 5.558755299576034e-06, "loss": 0.4065, "step": 6950 }, { "epoch": 0.055679554563563494, "grad_norm": 0.7351588541656371, "learning_rate": 5.56675465962723e-06, "loss": 0.413, "step": 6960 }, { "epoch": 0.055759553923568614, "grad_norm": 0.8531297261307409, "learning_rate": 5.5747540196784265e-06, "loss": 0.414, "step": 6970 }, { "epoch": 0.055839553283573734, "grad_norm": 0.7091116241702139, "learning_rate": 5.582753379729622e-06, "loss": 0.4136, "step": 6980 }, { "epoch": 0.055919552643578854, "grad_norm": 0.6525508515636341, "learning_rate": 5.590752739780818e-06, "loss": 0.3999, "step": 6990 }, { "epoch": 0.055999552003583974, "grad_norm": 0.847349465679055, "learning_rate": 5.598752099832015e-06, "loss": 0.4132, "step": 7000 }, { "epoch": 0.056079551363589095, "grad_norm": 0.7255808216911677, "learning_rate": 5.606751459883209e-06, "loss": 0.4314, "step": 7010 }, { "epoch": 0.05615955072359421, "grad_norm": 0.7206170792907532, "learning_rate": 5.614750819934406e-06, "loss": 0.4096, "step": 7020 }, { "epoch": 0.05623955008359933, "grad_norm": 0.8038911680104806, "learning_rate": 5.622750179985602e-06, "loss": 0.4135, "step": 7030 }, { "epoch": 0.05631954944360445, "grad_norm": 0.8947816811192346, "learning_rate": 5.6307495400367975e-06, "loss": 0.4332, "step": 7040 }, { "epoch": 0.05639954880360957, "grad_norm": 0.7051515913986038, "learning_rate": 5.638748900087994e-06, "loss": 0.4231, "step": 7050 }, { "epoch": 0.05647954816361469, "grad_norm": 0.7580216965150411, "learning_rate": 5.64674826013919e-06, "loss": 0.4281, "step": 7060 }, { "epoch": 0.05655954752361981, "grad_norm": 0.799958668553628, "learning_rate": 5.654747620190385e-06, "loss": 0.4251, "step": 7070 }, { "epoch": 0.05663954688362493, "grad_norm": 0.7461140596993789, "learning_rate": 5.662746980241581e-06, "loss": 0.4151, "step": 7080 }, { "epoch": 0.05671954624363005, "grad_norm": 0.6817063734321613, "learning_rate": 5.6707463402927775e-06, "loss": 0.4034, "step": 7090 }, { "epoch": 0.05679954560363517, "grad_norm": 0.72588944939, "learning_rate": 5.678745700343973e-06, "loss": 0.411, "step": 7100 }, { "epoch": 0.05687954496364029, "grad_norm": 0.7367332583906436, "learning_rate": 5.686745060395169e-06, "loss": 0.4243, "step": 7110 }, { "epoch": 0.05695954432364541, "grad_norm": 0.8787659628868579, "learning_rate": 5.694744420446364e-06, "loss": 0.4282, "step": 7120 }, { "epoch": 0.05703954368365053, "grad_norm": 0.8024169914805899, "learning_rate": 5.70274378049756e-06, "loss": 0.4421, "step": 7130 }, { "epoch": 0.05711954304365565, "grad_norm": 0.7544464281314796, "learning_rate": 5.710743140548757e-06, "loss": 0.4132, "step": 7140 }, { "epoch": 0.05719954240366077, "grad_norm": 0.7753824059747692, "learning_rate": 5.718742500599952e-06, "loss": 0.4414, "step": 7150 }, { "epoch": 0.05727954176366589, "grad_norm": 0.7343234360571579, "learning_rate": 5.7267418606511485e-06, "loss": 0.4257, "step": 7160 }, { "epoch": 0.05735954112367101, "grad_norm": 0.677095555209141, "learning_rate": 5.734741220702345e-06, "loss": 0.4077, "step": 7170 }, { "epoch": 0.05743954048367613, "grad_norm": 0.7676771522640133, "learning_rate": 5.7427405807535395e-06, "loss": 0.4154, "step": 7180 }, { "epoch": 0.05751953984368125, "grad_norm": 0.8447271800633204, "learning_rate": 5.750739940804736e-06, "loss": 0.4241, "step": 7190 }, { "epoch": 0.05759953920368637, "grad_norm": 0.6573709296267352, "learning_rate": 5.758739300855932e-06, "loss": 0.403, "step": 7200 }, { "epoch": 0.05767953856369149, "grad_norm": 0.7705734564774043, "learning_rate": 5.766738660907128e-06, "loss": 0.4201, "step": 7210 }, { "epoch": 0.05775953792369661, "grad_norm": 0.7473288823419741, "learning_rate": 5.774738020958324e-06, "loss": 0.4305, "step": 7220 }, { "epoch": 0.05783953728370173, "grad_norm": 0.7821912721036804, "learning_rate": 5.78273738100952e-06, "loss": 0.4176, "step": 7230 }, { "epoch": 0.05791953664370685, "grad_norm": 0.7602793039426214, "learning_rate": 5.790736741060715e-06, "loss": 0.4248, "step": 7240 }, { "epoch": 0.05799953600371197, "grad_norm": 0.8373782328611198, "learning_rate": 5.798736101111911e-06, "loss": 0.4264, "step": 7250 }, { "epoch": 0.05807953536371709, "grad_norm": 0.7804225305237373, "learning_rate": 5.806735461163108e-06, "loss": 0.4226, "step": 7260 }, { "epoch": 0.05815953472372221, "grad_norm": 0.8876171315189683, "learning_rate": 5.814734821214303e-06, "loss": 0.428, "step": 7270 }, { "epoch": 0.05823953408372733, "grad_norm": 0.735610834080981, "learning_rate": 5.8227341812654995e-06, "loss": 0.4156, "step": 7280 }, { "epoch": 0.05831953344373245, "grad_norm": 0.7086727551777332, "learning_rate": 5.830733541316696e-06, "loss": 0.4062, "step": 7290 }, { "epoch": 0.058399532803737574, "grad_norm": 0.7337672348554692, "learning_rate": 5.8387329013678905e-06, "loss": 0.4216, "step": 7300 }, { "epoch": 0.05847953216374269, "grad_norm": 0.7172527018415951, "learning_rate": 5.846732261419087e-06, "loss": 0.42, "step": 7310 }, { "epoch": 0.05855953152374781, "grad_norm": 0.7294660657048117, "learning_rate": 5.854731621470283e-06, "loss": 0.4191, "step": 7320 }, { "epoch": 0.05863953088375293, "grad_norm": 0.767309902487694, "learning_rate": 5.862730981521479e-06, "loss": 0.417, "step": 7330 }, { "epoch": 0.05871953024375805, "grad_norm": 0.8294728316976088, "learning_rate": 5.870730341572675e-06, "loss": 0.4363, "step": 7340 }, { "epoch": 0.05879952960376317, "grad_norm": 0.6459025653133511, "learning_rate": 5.878729701623871e-06, "loss": 0.4306, "step": 7350 }, { "epoch": 0.05887952896376829, "grad_norm": 0.7250101248174007, "learning_rate": 5.886729061675066e-06, "loss": 0.4087, "step": 7360 }, { "epoch": 0.05895952832377341, "grad_norm": 0.759690291349007, "learning_rate": 5.894728421726262e-06, "loss": 0.4394, "step": 7370 }, { "epoch": 0.05903952768377853, "grad_norm": 0.7628869205141775, "learning_rate": 5.902727781777459e-06, "loss": 0.4041, "step": 7380 }, { "epoch": 0.05911952704378365, "grad_norm": 0.9931032154280071, "learning_rate": 5.910727141828654e-06, "loss": 0.4102, "step": 7390 }, { "epoch": 0.05919952640378877, "grad_norm": 0.8586084844634542, "learning_rate": 5.9187265018798505e-06, "loss": 0.4357, "step": 7400 }, { "epoch": 0.05927952576379389, "grad_norm": 0.7621882387099648, "learning_rate": 5.926725861931046e-06, "loss": 0.4154, "step": 7410 }, { "epoch": 0.05935952512379901, "grad_norm": 0.683718029375799, "learning_rate": 5.9347252219822414e-06, "loss": 0.4036, "step": 7420 }, { "epoch": 0.05943952448380413, "grad_norm": 0.8508081423243864, "learning_rate": 5.942724582033438e-06, "loss": 0.4112, "step": 7430 }, { "epoch": 0.05951952384380925, "grad_norm": 0.8091155003709047, "learning_rate": 5.950723942084634e-06, "loss": 0.4369, "step": 7440 }, { "epoch": 0.05959952320381437, "grad_norm": 0.8506390566289003, "learning_rate": 5.95872330213583e-06, "loss": 0.4432, "step": 7450 }, { "epoch": 0.05967952256381949, "grad_norm": 0.7981486686167805, "learning_rate": 5.966722662187026e-06, "loss": 0.4196, "step": 7460 }, { "epoch": 0.05975952192382461, "grad_norm": 0.6929189056928079, "learning_rate": 5.9747220222382214e-06, "loss": 0.4028, "step": 7470 }, { "epoch": 0.05983952128382973, "grad_norm": 0.8216084714470346, "learning_rate": 5.982721382289417e-06, "loss": 0.3906, "step": 7480 }, { "epoch": 0.05991952064383485, "grad_norm": 0.8514017002708969, "learning_rate": 5.990720742340613e-06, "loss": 0.4316, "step": 7490 }, { "epoch": 0.05999952000383997, "grad_norm": 0.812928699253907, "learning_rate": 5.99872010239181e-06, "loss": 0.4222, "step": 7500 }, { "epoch": 0.06007951936384509, "grad_norm": 0.8225758964829106, "learning_rate": 6.006719462443005e-06, "loss": 0.4023, "step": 7510 }, { "epoch": 0.06015951872385021, "grad_norm": 0.8062602176799216, "learning_rate": 6.0147188224942014e-06, "loss": 0.422, "step": 7520 }, { "epoch": 0.06023951808385533, "grad_norm": 0.7345493871563744, "learning_rate": 6.022718182545397e-06, "loss": 0.4254, "step": 7530 }, { "epoch": 0.06031951744386045, "grad_norm": 0.8137831796881624, "learning_rate": 6.0307175425965924e-06, "loss": 0.4181, "step": 7540 }, { "epoch": 0.06039951680386557, "grad_norm": 0.7546137284306326, "learning_rate": 6.038716902647789e-06, "loss": 0.4349, "step": 7550 }, { "epoch": 0.06047951616387069, "grad_norm": 0.7233085318056991, "learning_rate": 6.046716262698985e-06, "loss": 0.425, "step": 7560 }, { "epoch": 0.06055951552387581, "grad_norm": 0.8480044896419149, "learning_rate": 6.054715622750181e-06, "loss": 0.4253, "step": 7570 }, { "epoch": 0.06063951488388093, "grad_norm": 0.8651806318588238, "learning_rate": 6.062714982801376e-06, "loss": 0.4194, "step": 7580 }, { "epoch": 0.060719514243886046, "grad_norm": 1.0797032948467522, "learning_rate": 6.0707143428525724e-06, "loss": 0.4172, "step": 7590 }, { "epoch": 0.060799513603891166, "grad_norm": 0.8977844107337305, "learning_rate": 6.078713702903768e-06, "loss": 0.4045, "step": 7600 }, { "epoch": 0.060879512963896286, "grad_norm": 0.7750731063441698, "learning_rate": 6.086713062954964e-06, "loss": 0.4189, "step": 7610 }, { "epoch": 0.060959512323901406, "grad_norm": 0.7529900947517107, "learning_rate": 6.094712423006161e-06, "loss": 0.4105, "step": 7620 }, { "epoch": 0.061039511683906526, "grad_norm": 0.9631520983371807, "learning_rate": 6.102711783057356e-06, "loss": 0.4457, "step": 7630 }, { "epoch": 0.061119511043911647, "grad_norm": 0.6940463899451977, "learning_rate": 6.110711143108552e-06, "loss": 0.4444, "step": 7640 }, { "epoch": 0.06119951040391677, "grad_norm": 0.8589705783638818, "learning_rate": 6.118710503159748e-06, "loss": 0.4104, "step": 7650 }, { "epoch": 0.06127950976392189, "grad_norm": 0.8939848750346538, "learning_rate": 6.126709863210943e-06, "loss": 0.4278, "step": 7660 }, { "epoch": 0.06135950912392701, "grad_norm": 0.9330295945570575, "learning_rate": 6.13470922326214e-06, "loss": 0.4405, "step": 7670 }, { "epoch": 0.06143950848393213, "grad_norm": 0.8255506825271308, "learning_rate": 6.142708583313336e-06, "loss": 0.4725, "step": 7680 }, { "epoch": 0.06151950784393725, "grad_norm": 0.7721676031745077, "learning_rate": 6.150707943364531e-06, "loss": 0.3995, "step": 7690 }, { "epoch": 0.06159950720394237, "grad_norm": 0.8539653382332946, "learning_rate": 6.158707303415727e-06, "loss": 0.4112, "step": 7700 }, { "epoch": 0.06167950656394749, "grad_norm": 0.9080892628857296, "learning_rate": 6.166706663466923e-06, "loss": 0.4184, "step": 7710 }, { "epoch": 0.06175950592395261, "grad_norm": 0.7282918785259038, "learning_rate": 6.174706023518119e-06, "loss": 0.4193, "step": 7720 }, { "epoch": 0.06183950528395773, "grad_norm": 0.8290178427543562, "learning_rate": 6.182705383569315e-06, "loss": 0.4178, "step": 7730 }, { "epoch": 0.06191950464396285, "grad_norm": 0.8095281081355657, "learning_rate": 6.190704743620511e-06, "loss": 0.4129, "step": 7740 }, { "epoch": 0.06199950400396797, "grad_norm": 0.8688888938386247, "learning_rate": 6.198704103671706e-06, "loss": 0.4282, "step": 7750 }, { "epoch": 0.06207950336397309, "grad_norm": 0.8254926102029508, "learning_rate": 6.2067034637229026e-06, "loss": 0.4324, "step": 7760 }, { "epoch": 0.06215950272397821, "grad_norm": 0.852468858851944, "learning_rate": 6.214702823774098e-06, "loss": 0.4402, "step": 7770 }, { "epoch": 0.06223950208398333, "grad_norm": 2.1648865326662032, "learning_rate": 6.222702183825294e-06, "loss": 0.3994, "step": 7780 }, { "epoch": 0.06231950144398845, "grad_norm": 0.6971320239232389, "learning_rate": 6.230701543876491e-06, "loss": 0.4342, "step": 7790 }, { "epoch": 0.06239950080399357, "grad_norm": 0.848280582165616, "learning_rate": 6.238700903927685e-06, "loss": 0.4174, "step": 7800 }, { "epoch": 0.06247950016399869, "grad_norm": 0.6935359952991249, "learning_rate": 6.246700263978882e-06, "loss": 0.4047, "step": 7810 }, { "epoch": 0.0625594995240038, "grad_norm": 0.7523979790711425, "learning_rate": 6.254699624030078e-06, "loss": 0.4514, "step": 7820 }, { "epoch": 0.06263949888400892, "grad_norm": 0.8455736374815672, "learning_rate": 6.2626989840812736e-06, "loss": 0.4251, "step": 7830 }, { "epoch": 0.06271949824401404, "grad_norm": 0.7581838766183062, "learning_rate": 6.27069834413247e-06, "loss": 0.4129, "step": 7840 }, { "epoch": 0.06279949760401916, "grad_norm": 0.6778320482430824, "learning_rate": 6.278697704183666e-06, "loss": 0.4223, "step": 7850 }, { "epoch": 0.06287949696402428, "grad_norm": 0.9005644989526148, "learning_rate": 6.286697064234861e-06, "loss": 0.4126, "step": 7860 }, { "epoch": 0.0629594963240294, "grad_norm": 0.8149670790428409, "learning_rate": 6.294696424286057e-06, "loss": 0.423, "step": 7870 }, { "epoch": 0.06303949568403452, "grad_norm": 0.721998071534193, "learning_rate": 6.3026957843372536e-06, "loss": 0.4308, "step": 7880 }, { "epoch": 0.06311949504403964, "grad_norm": 0.7892502102762363, "learning_rate": 6.310695144388449e-06, "loss": 0.4267, "step": 7890 }, { "epoch": 0.06319949440404476, "grad_norm": 0.748367111359071, "learning_rate": 6.318694504439645e-06, "loss": 0.4258, "step": 7900 }, { "epoch": 0.06327949376404989, "grad_norm": 0.6882091672561046, "learning_rate": 6.326693864490842e-06, "loss": 0.462, "step": 7910 }, { "epoch": 0.063359493124055, "grad_norm": 0.7326353946870572, "learning_rate": 6.334693224542036e-06, "loss": 0.43, "step": 7920 }, { "epoch": 0.06343949248406013, "grad_norm": 0.7142600762530484, "learning_rate": 6.342692584593233e-06, "loss": 0.4135, "step": 7930 }, { "epoch": 0.06351949184406525, "grad_norm": 0.7469867282320235, "learning_rate": 6.350691944644429e-06, "loss": 0.4201, "step": 7940 }, { "epoch": 0.06359949120407037, "grad_norm": 0.7248617231857303, "learning_rate": 6.3586913046956245e-06, "loss": 0.4543, "step": 7950 }, { "epoch": 0.06367949056407549, "grad_norm": 0.7720128861416149, "learning_rate": 6.366690664746821e-06, "loss": 0.4238, "step": 7960 }, { "epoch": 0.0637594899240806, "grad_norm": 0.887664850533525, "learning_rate": 6.374690024798017e-06, "loss": 0.4173, "step": 7970 }, { "epoch": 0.06383948928408573, "grad_norm": 0.7659892959604853, "learning_rate": 6.382689384849212e-06, "loss": 0.4163, "step": 7980 }, { "epoch": 0.06391948864409085, "grad_norm": 0.6664494757054431, "learning_rate": 6.390688744900408e-06, "loss": 0.4276, "step": 7990 }, { "epoch": 0.06399948800409597, "grad_norm": 0.6920568608854232, "learning_rate": 6.3986881049516045e-06, "loss": 0.4024, "step": 8000 }, { "epoch": 0.06407948736410109, "grad_norm": 0.7756544848438256, "learning_rate": 6.4066874650028e-06, "loss": 0.4223, "step": 8010 }, { "epoch": 0.06415948672410621, "grad_norm": 0.7153514629020057, "learning_rate": 6.414686825053996e-06, "loss": 0.4441, "step": 8020 }, { "epoch": 0.06423948608411133, "grad_norm": 0.8388540933386982, "learning_rate": 6.422686185105193e-06, "loss": 0.425, "step": 8030 }, { "epoch": 0.06431948544411645, "grad_norm": 0.78667075627769, "learning_rate": 6.430685545156387e-06, "loss": 0.4297, "step": 8040 }, { "epoch": 0.06439948480412157, "grad_norm": 0.8907176550108923, "learning_rate": 6.438684905207584e-06, "loss": 0.4225, "step": 8050 }, { "epoch": 0.06447948416412669, "grad_norm": 0.7978696484540556, "learning_rate": 6.44668426525878e-06, "loss": 0.4308, "step": 8060 }, { "epoch": 0.06455948352413181, "grad_norm": 0.822704845147674, "learning_rate": 6.4546836253099755e-06, "loss": 0.4157, "step": 8070 }, { "epoch": 0.06463948288413693, "grad_norm": 0.8021713515013377, "learning_rate": 6.462682985361172e-06, "loss": 0.44, "step": 8080 }, { "epoch": 0.06471948224414205, "grad_norm": 0.5945777605311626, "learning_rate": 6.470682345412368e-06, "loss": 0.4087, "step": 8090 }, { "epoch": 0.06479948160414717, "grad_norm": 0.896634199657745, "learning_rate": 6.478681705463563e-06, "loss": 0.4365, "step": 8100 }, { "epoch": 0.06487948096415229, "grad_norm": 0.7125616215375814, "learning_rate": 6.486681065514759e-06, "loss": 0.3801, "step": 8110 }, { "epoch": 0.06495948032415741, "grad_norm": 0.7428642947318103, "learning_rate": 6.4946804255659555e-06, "loss": 0.4231, "step": 8120 }, { "epoch": 0.06503947968416253, "grad_norm": 0.7950906998073008, "learning_rate": 6.502679785617151e-06, "loss": 0.427, "step": 8130 }, { "epoch": 0.06511947904416765, "grad_norm": 0.7705933807721023, "learning_rate": 6.510679145668347e-06, "loss": 0.401, "step": 8140 }, { "epoch": 0.06519947840417277, "grad_norm": 0.7763146875784712, "learning_rate": 6.518678505719544e-06, "loss": 0.4272, "step": 8150 }, { "epoch": 0.06527947776417789, "grad_norm": 0.7777066028630303, "learning_rate": 6.526677865770738e-06, "loss": 0.418, "step": 8160 }, { "epoch": 0.06535947712418301, "grad_norm": 0.6660845332306095, "learning_rate": 6.534677225821935e-06, "loss": 0.4405, "step": 8170 }, { "epoch": 0.06543947648418813, "grad_norm": 0.8149098823689624, "learning_rate": 6.542676585873131e-06, "loss": 0.4377, "step": 8180 }, { "epoch": 0.06551947584419325, "grad_norm": 0.7145622294247337, "learning_rate": 6.5506759459243265e-06, "loss": 0.4276, "step": 8190 }, { "epoch": 0.06559947520419837, "grad_norm": 0.9079013711838755, "learning_rate": 6.558675305975523e-06, "loss": 0.415, "step": 8200 }, { "epoch": 0.06567947456420349, "grad_norm": 0.8203944873933887, "learning_rate": 6.566674666026719e-06, "loss": 0.426, "step": 8210 }, { "epoch": 0.06575947392420861, "grad_norm": 0.790973843508601, "learning_rate": 6.574674026077914e-06, "loss": 0.4163, "step": 8220 }, { "epoch": 0.06583947328421373, "grad_norm": 0.7211097772568297, "learning_rate": 6.58267338612911e-06, "loss": 0.4374, "step": 8230 }, { "epoch": 0.06591947264421885, "grad_norm": 0.7618728292966134, "learning_rate": 6.5906727461803065e-06, "loss": 0.4139, "step": 8240 }, { "epoch": 0.06599947200422397, "grad_norm": 0.7065179824311332, "learning_rate": 6.598672106231502e-06, "loss": 0.41, "step": 8250 }, { "epoch": 0.06607947136422909, "grad_norm": 0.844769495402148, "learning_rate": 6.606671466282698e-06, "loss": 0.4227, "step": 8260 }, { "epoch": 0.06615947072423421, "grad_norm": 0.8634593059917065, "learning_rate": 6.614670826333895e-06, "loss": 0.4124, "step": 8270 }, { "epoch": 0.06623947008423933, "grad_norm": 0.7751391423793837, "learning_rate": 6.622670186385089e-06, "loss": 0.4287, "step": 8280 }, { "epoch": 0.06631946944424445, "grad_norm": 0.7999977168540645, "learning_rate": 6.630669546436286e-06, "loss": 0.4236, "step": 8290 }, { "epoch": 0.06639946880424957, "grad_norm": 0.8321313814451414, "learning_rate": 6.638668906487482e-06, "loss": 0.3993, "step": 8300 }, { "epoch": 0.06647946816425468, "grad_norm": 0.803990400729381, "learning_rate": 6.6466682665386775e-06, "loss": 0.4211, "step": 8310 }, { "epoch": 0.0665594675242598, "grad_norm": 0.7060920922166406, "learning_rate": 6.654667626589874e-06, "loss": 0.4178, "step": 8320 }, { "epoch": 0.06663946688426492, "grad_norm": 0.8243689446316145, "learning_rate": 6.66266698664107e-06, "loss": 0.4279, "step": 8330 }, { "epoch": 0.06671946624427004, "grad_norm": 0.7543464574629886, "learning_rate": 6.670666346692265e-06, "loss": 0.4033, "step": 8340 }, { "epoch": 0.06679946560427516, "grad_norm": 0.7068963460217894, "learning_rate": 6.678665706743461e-06, "loss": 0.4123, "step": 8350 }, { "epoch": 0.06687946496428028, "grad_norm": 0.8157720950970228, "learning_rate": 6.686665066794657e-06, "loss": 0.4227, "step": 8360 }, { "epoch": 0.0669594643242854, "grad_norm": 0.8410263467537004, "learning_rate": 6.694664426845853e-06, "loss": 0.4166, "step": 8370 }, { "epoch": 0.06703946368429052, "grad_norm": 0.7448909681604488, "learning_rate": 6.702663786897049e-06, "loss": 0.423, "step": 8380 }, { "epoch": 0.06711946304429564, "grad_norm": 0.7377083762920874, "learning_rate": 6.710663146948244e-06, "loss": 0.4231, "step": 8390 }, { "epoch": 0.06719946240430076, "grad_norm": 0.6820370204418199, "learning_rate": 6.71866250699944e-06, "loss": 0.4112, "step": 8400 }, { "epoch": 0.06727946176430588, "grad_norm": 0.7780711674831293, "learning_rate": 6.726661867050637e-06, "loss": 0.4244, "step": 8410 }, { "epoch": 0.067359461124311, "grad_norm": 0.876196212005378, "learning_rate": 6.734661227101832e-06, "loss": 0.4182, "step": 8420 }, { "epoch": 0.06743946048431612, "grad_norm": 0.7866103346604862, "learning_rate": 6.7426605871530285e-06, "loss": 0.4409, "step": 8430 }, { "epoch": 0.06751945984432124, "grad_norm": 0.9071926221950564, "learning_rate": 6.750659947204225e-06, "loss": 0.4348, "step": 8440 }, { "epoch": 0.06759945920432636, "grad_norm": 0.7642496531476078, "learning_rate": 6.7586593072554195e-06, "loss": 0.4422, "step": 8450 }, { "epoch": 0.06767945856433148, "grad_norm": 0.8410874577701892, "learning_rate": 6.766658667306616e-06, "loss": 0.4196, "step": 8460 }, { "epoch": 0.0677594579243366, "grad_norm": 0.7745971541754758, "learning_rate": 6.774658027357812e-06, "loss": 0.4127, "step": 8470 }, { "epoch": 0.06783945728434172, "grad_norm": 0.7422159510242838, "learning_rate": 6.782657387409008e-06, "loss": 0.4418, "step": 8480 }, { "epoch": 0.06791945664434684, "grad_norm": 1.1887265679440804, "learning_rate": 6.790656747460204e-06, "loss": 0.4348, "step": 8490 }, { "epoch": 0.06799945600435196, "grad_norm": 0.7650340884369765, "learning_rate": 6.7986561075114e-06, "loss": 0.4063, "step": 8500 }, { "epoch": 0.06807945536435708, "grad_norm": 0.765636643563393, "learning_rate": 6.806655467562595e-06, "loss": 0.4342, "step": 8510 }, { "epoch": 0.0681594547243622, "grad_norm": 0.7308421602145869, "learning_rate": 6.814654827613791e-06, "loss": 0.3984, "step": 8520 }, { "epoch": 0.06823945408436732, "grad_norm": 0.9124259759717053, "learning_rate": 6.822654187664988e-06, "loss": 0.4178, "step": 8530 }, { "epoch": 0.06831945344437244, "grad_norm": 0.7901490429775138, "learning_rate": 6.830653547716183e-06, "loss": 0.4281, "step": 8540 }, { "epoch": 0.06839945280437756, "grad_norm": 0.6619288881557224, "learning_rate": 6.8386529077673795e-06, "loss": 0.4073, "step": 8550 }, { "epoch": 0.06847945216438268, "grad_norm": 0.8946635050390923, "learning_rate": 6.846652267818576e-06, "loss": 0.4292, "step": 8560 }, { "epoch": 0.0685594515243878, "grad_norm": 0.8362095694996007, "learning_rate": 6.8546516278697704e-06, "loss": 0.4376, "step": 8570 }, { "epoch": 0.06863945088439292, "grad_norm": 0.7499443153763287, "learning_rate": 6.862650987920967e-06, "loss": 0.4292, "step": 8580 }, { "epoch": 0.06871945024439804, "grad_norm": 0.7231686991305788, "learning_rate": 6.870650347972163e-06, "loss": 0.4014, "step": 8590 }, { "epoch": 0.06879944960440317, "grad_norm": 0.7234952194720571, "learning_rate": 6.878649708023359e-06, "loss": 0.3974, "step": 8600 }, { "epoch": 0.06887944896440829, "grad_norm": 0.8030701997367343, "learning_rate": 6.886649068074555e-06, "loss": 0.4276, "step": 8610 }, { "epoch": 0.0689594483244134, "grad_norm": 0.6931153034680378, "learning_rate": 6.8946484281257504e-06, "loss": 0.4004, "step": 8620 }, { "epoch": 0.06903944768441853, "grad_norm": 0.9679545736879995, "learning_rate": 6.902647788176946e-06, "loss": 0.4225, "step": 8630 }, { "epoch": 0.06911944704442365, "grad_norm": 0.7417189842039227, "learning_rate": 6.910647148228142e-06, "loss": 0.4153, "step": 8640 }, { "epoch": 0.06919944640442877, "grad_norm": 0.7946553594506627, "learning_rate": 6.918646508279339e-06, "loss": 0.4394, "step": 8650 }, { "epoch": 0.06927944576443389, "grad_norm": 0.7950605755730097, "learning_rate": 6.926645868330534e-06, "loss": 0.4282, "step": 8660 }, { "epoch": 0.069359445124439, "grad_norm": 0.6562382411775249, "learning_rate": 6.9346452283817304e-06, "loss": 0.4119, "step": 8670 }, { "epoch": 0.06943944448444413, "grad_norm": 0.7042178527891966, "learning_rate": 6.942644588432926e-06, "loss": 0.3935, "step": 8680 }, { "epoch": 0.06951944384444925, "grad_norm": 0.7829930319121265, "learning_rate": 6.9506439484841214e-06, "loss": 0.3893, "step": 8690 }, { "epoch": 0.06959944320445437, "grad_norm": 0.7823983125447515, "learning_rate": 6.958643308535318e-06, "loss": 0.4161, "step": 8700 }, { "epoch": 0.06967944256445949, "grad_norm": 0.7978003103731074, "learning_rate": 6.966642668586514e-06, "loss": 0.3883, "step": 8710 }, { "epoch": 0.06975944192446461, "grad_norm": 0.7746680680120757, "learning_rate": 6.97464202863771e-06, "loss": 0.418, "step": 8720 }, { "epoch": 0.06983944128446973, "grad_norm": 0.7550315142511637, "learning_rate": 6.982641388688905e-06, "loss": 0.4286, "step": 8730 }, { "epoch": 0.06991944064447485, "grad_norm": 0.7354010975137131, "learning_rate": 6.9906407487401014e-06, "loss": 0.4353, "step": 8740 }, { "epoch": 0.06999944000447997, "grad_norm": 0.7979516321209136, "learning_rate": 6.998640108791297e-06, "loss": 0.4178, "step": 8750 }, { "epoch": 0.07007943936448509, "grad_norm": 0.8348236701064392, "learning_rate": 7.006639468842493e-06, "loss": 0.4413, "step": 8760 }, { "epoch": 0.07015943872449021, "grad_norm": 0.7216544257587991, "learning_rate": 7.01463882889369e-06, "loss": 0.4148, "step": 8770 }, { "epoch": 0.07023943808449533, "grad_norm": 0.7728944103067048, "learning_rate": 7.022638188944885e-06, "loss": 0.4184, "step": 8780 }, { "epoch": 0.07031943744450045, "grad_norm": 0.8445452974807574, "learning_rate": 7.030637548996081e-06, "loss": 0.4132, "step": 8790 }, { "epoch": 0.07039943680450557, "grad_norm": 0.9185386117197132, "learning_rate": 7.038636909047277e-06, "loss": 0.4254, "step": 8800 }, { "epoch": 0.07047943616451069, "grad_norm": 0.8649896308243605, "learning_rate": 7.046636269098472e-06, "loss": 0.4314, "step": 8810 }, { "epoch": 0.07055943552451581, "grad_norm": 0.7909289512997125, "learning_rate": 7.054635629149669e-06, "loss": 0.4204, "step": 8820 }, { "epoch": 0.07063943488452093, "grad_norm": 0.6873900268108154, "learning_rate": 7.062634989200865e-06, "loss": 0.4089, "step": 8830 }, { "epoch": 0.07071943424452605, "grad_norm": 0.7687676857052521, "learning_rate": 7.07063434925206e-06, "loss": 0.4333, "step": 8840 }, { "epoch": 0.07079943360453117, "grad_norm": 0.9092831664043449, "learning_rate": 7.078633709303256e-06, "loss": 0.4092, "step": 8850 }, { "epoch": 0.07087943296453629, "grad_norm": 0.6890871684803186, "learning_rate": 7.086633069354452e-06, "loss": 0.4231, "step": 8860 }, { "epoch": 0.07095943232454141, "grad_norm": 0.8804434601100858, "learning_rate": 7.094632429405648e-06, "loss": 0.4257, "step": 8870 }, { "epoch": 0.07103943168454652, "grad_norm": 0.8353008374857295, "learning_rate": 7.102631789456844e-06, "loss": 0.424, "step": 8880 }, { "epoch": 0.07111943104455164, "grad_norm": 0.6821973178289632, "learning_rate": 7.110631149508041e-06, "loss": 0.4282, "step": 8890 }, { "epoch": 0.07119943040455676, "grad_norm": 0.7302679136881407, "learning_rate": 7.118630509559235e-06, "loss": 0.4122, "step": 8900 }, { "epoch": 0.07127942976456188, "grad_norm": 0.7326342110377058, "learning_rate": 7.1266298696104316e-06, "loss": 0.4266, "step": 8910 }, { "epoch": 0.071359429124567, "grad_norm": 0.6680112273214374, "learning_rate": 7.134629229661628e-06, "loss": 0.4298, "step": 8920 }, { "epoch": 0.07143942848457212, "grad_norm": 0.762152163416973, "learning_rate": 7.142628589712823e-06, "loss": 0.4279, "step": 8930 }, { "epoch": 0.07151942784457724, "grad_norm": 0.783244637369176, "learning_rate": 7.15062794976402e-06, "loss": 0.3926, "step": 8940 }, { "epoch": 0.07159942720458236, "grad_norm": 0.8433561523525562, "learning_rate": 7.158627309815216e-06, "loss": 0.4225, "step": 8950 }, { "epoch": 0.07167942656458748, "grad_norm": 0.7567491158182517, "learning_rate": 7.166626669866411e-06, "loss": 0.4106, "step": 8960 }, { "epoch": 0.0717594259245926, "grad_norm": 0.9182147638678375, "learning_rate": 7.174626029917607e-06, "loss": 0.4172, "step": 8970 }, { "epoch": 0.07183942528459772, "grad_norm": 0.8370337180028405, "learning_rate": 7.1826253899688026e-06, "loss": 0.4131, "step": 8980 }, { "epoch": 0.07191942464460284, "grad_norm": 0.7347721970892678, "learning_rate": 7.190624750019999e-06, "loss": 0.4334, "step": 8990 }, { "epoch": 0.07199942400460796, "grad_norm": 0.7128112896521479, "learning_rate": 7.198624110071195e-06, "loss": 0.4115, "step": 9000 }, { "epoch": 0.07207942336461308, "grad_norm": 0.7220660015545859, "learning_rate": 7.20662347012239e-06, "loss": 0.4201, "step": 9010 }, { "epoch": 0.0721594227246182, "grad_norm": 0.8753203682278846, "learning_rate": 7.214622830173586e-06, "loss": 0.4249, "step": 9020 }, { "epoch": 0.07223942208462332, "grad_norm": 0.7808932418724313, "learning_rate": 7.2226221902247826e-06, "loss": 0.4126, "step": 9030 }, { "epoch": 0.07231942144462844, "grad_norm": 0.8260175841945936, "learning_rate": 7.230621550275978e-06, "loss": 0.3875, "step": 9040 }, { "epoch": 0.07239942080463356, "grad_norm": 0.6976031543596856, "learning_rate": 7.238620910327174e-06, "loss": 0.4176, "step": 9050 }, { "epoch": 0.07247942016463868, "grad_norm": 0.7404074897782499, "learning_rate": 7.246620270378371e-06, "loss": 0.4392, "step": 9060 }, { "epoch": 0.0725594195246438, "grad_norm": 0.7912060591995853, "learning_rate": 7.254619630429565e-06, "loss": 0.4144, "step": 9070 }, { "epoch": 0.07263941888464892, "grad_norm": 0.6942669812874187, "learning_rate": 7.262618990480762e-06, "loss": 0.4114, "step": 9080 }, { "epoch": 0.07271941824465404, "grad_norm": 0.8732168275007478, "learning_rate": 7.270618350531958e-06, "loss": 0.4287, "step": 9090 }, { "epoch": 0.07279941760465916, "grad_norm": 0.9737932167690426, "learning_rate": 7.2786177105831535e-06, "loss": 0.4158, "step": 9100 }, { "epoch": 0.07287941696466428, "grad_norm": 0.812009631805305, "learning_rate": 7.28661707063435e-06, "loss": 0.4275, "step": 9110 }, { "epoch": 0.0729594163246694, "grad_norm": 0.7699639663973765, "learning_rate": 7.294616430685546e-06, "loss": 0.4215, "step": 9120 }, { "epoch": 0.07303941568467452, "grad_norm": 0.8748467298251865, "learning_rate": 7.302615790736741e-06, "loss": 0.445, "step": 9130 }, { "epoch": 0.07311941504467964, "grad_norm": 0.7742761915411642, "learning_rate": 7.310615150787937e-06, "loss": 0.4535, "step": 9140 }, { "epoch": 0.07319941440468476, "grad_norm": 0.7174657210038272, "learning_rate": 7.3186145108391335e-06, "loss": 0.4158, "step": 9150 }, { "epoch": 0.07327941376468988, "grad_norm": 0.7423978986609119, "learning_rate": 7.326613870890329e-06, "loss": 0.4252, "step": 9160 }, { "epoch": 0.073359413124695, "grad_norm": 0.7731417139594886, "learning_rate": 7.334613230941525e-06, "loss": 0.4298, "step": 9170 }, { "epoch": 0.07343941248470012, "grad_norm": 0.7404373573472962, "learning_rate": 7.342612590992722e-06, "loss": 0.4321, "step": 9180 }, { "epoch": 0.07351941184470524, "grad_norm": 0.7835644110590861, "learning_rate": 7.350611951043916e-06, "loss": 0.4179, "step": 9190 }, { "epoch": 0.07359941120471036, "grad_norm": 0.7589455937647834, "learning_rate": 7.358611311095113e-06, "loss": 0.388, "step": 9200 }, { "epoch": 0.07367941056471548, "grad_norm": 0.7618453444439949, "learning_rate": 7.366610671146309e-06, "loss": 0.4234, "step": 9210 }, { "epoch": 0.0737594099247206, "grad_norm": 0.73626098988893, "learning_rate": 7.3746100311975045e-06, "loss": 0.4143, "step": 9220 }, { "epoch": 0.07383940928472572, "grad_norm": 0.757596465351543, "learning_rate": 7.382609391248701e-06, "loss": 0.4338, "step": 9230 }, { "epoch": 0.07391940864473084, "grad_norm": 0.7761625064136773, "learning_rate": 7.390608751299897e-06, "loss": 0.388, "step": 9240 }, { "epoch": 0.07399940800473596, "grad_norm": 0.9472266058131255, "learning_rate": 7.398608111351092e-06, "loss": 0.4396, "step": 9250 }, { "epoch": 0.07407940736474108, "grad_norm": 0.7565455540083956, "learning_rate": 7.406607471402288e-06, "loss": 0.4252, "step": 9260 }, { "epoch": 0.0741594067247462, "grad_norm": 0.7075301963376511, "learning_rate": 7.4146068314534845e-06, "loss": 0.4134, "step": 9270 }, { "epoch": 0.07423940608475132, "grad_norm": 0.7746267916575363, "learning_rate": 7.42260619150468e-06, "loss": 0.4209, "step": 9280 }, { "epoch": 0.07431940544475645, "grad_norm": 0.6784177459846593, "learning_rate": 7.430605551555876e-06, "loss": 0.4257, "step": 9290 }, { "epoch": 0.07439940480476157, "grad_norm": 0.6673369378925459, "learning_rate": 7.438604911607073e-06, "loss": 0.4186, "step": 9300 }, { "epoch": 0.07447940416476669, "grad_norm": 0.7255658387073615, "learning_rate": 7.446604271658267e-06, "loss": 0.4259, "step": 9310 }, { "epoch": 0.0745594035247718, "grad_norm": 0.7574610328237186, "learning_rate": 7.454603631709464e-06, "loss": 0.4149, "step": 9320 }, { "epoch": 0.07463940288477693, "grad_norm": 0.7199711357069349, "learning_rate": 7.46260299176066e-06, "loss": 0.4293, "step": 9330 }, { "epoch": 0.07471940224478205, "grad_norm": 0.76311719714425, "learning_rate": 7.4706023518118555e-06, "loss": 0.4189, "step": 9340 }, { "epoch": 0.07479940160478717, "grad_norm": 0.6681083115188398, "learning_rate": 7.478601711863052e-06, "loss": 0.4186, "step": 9350 }, { "epoch": 0.07487940096479229, "grad_norm": 0.7723862189615095, "learning_rate": 7.486601071914248e-06, "loss": 0.4644, "step": 9360 }, { "epoch": 0.0749594003247974, "grad_norm": 0.7648192027019411, "learning_rate": 7.494600431965443e-06, "loss": 0.4371, "step": 9370 }, { "epoch": 0.07503939968480253, "grad_norm": 0.8695394302129262, "learning_rate": 7.502599792016639e-06, "loss": 0.4168, "step": 9380 }, { "epoch": 0.07511939904480765, "grad_norm": 0.7135752461177384, "learning_rate": 7.5105991520678355e-06, "loss": 0.4341, "step": 9390 }, { "epoch": 0.07519939840481277, "grad_norm": 0.7062441949950724, "learning_rate": 7.518598512119031e-06, "loss": 0.427, "step": 9400 }, { "epoch": 0.07527939776481789, "grad_norm": 0.807759258348696, "learning_rate": 7.526597872170227e-06, "loss": 0.4142, "step": 9410 }, { "epoch": 0.07535939712482301, "grad_norm": 0.7861933811623271, "learning_rate": 7.534597232221424e-06, "loss": 0.43, "step": 9420 }, { "epoch": 0.07543939648482813, "grad_norm": 0.8707481509484005, "learning_rate": 7.542596592272618e-06, "loss": 0.433, "step": 9430 }, { "epoch": 0.07551939584483325, "grad_norm": 0.7760284780108746, "learning_rate": 7.550595952323815e-06, "loss": 0.4369, "step": 9440 }, { "epoch": 0.07559939520483835, "grad_norm": 0.8271683368628876, "learning_rate": 7.558595312375011e-06, "loss": 0.4197, "step": 9450 }, { "epoch": 0.07567939456484347, "grad_norm": 0.7704001388110111, "learning_rate": 7.5665946724262065e-06, "loss": 0.4148, "step": 9460 }, { "epoch": 0.0757593939248486, "grad_norm": 0.7775794303452165, "learning_rate": 7.574594032477403e-06, "loss": 0.4312, "step": 9470 }, { "epoch": 0.07583939328485372, "grad_norm": 0.7831172024641054, "learning_rate": 7.582593392528599e-06, "loss": 0.4206, "step": 9480 }, { "epoch": 0.07591939264485884, "grad_norm": 0.6052944758339793, "learning_rate": 7.590592752579794e-06, "loss": 0.4059, "step": 9490 }, { "epoch": 0.07599939200486396, "grad_norm": 0.6753879576981862, "learning_rate": 7.59859211263099e-06, "loss": 0.4299, "step": 9500 }, { "epoch": 0.07607939136486908, "grad_norm": 0.8604355142939223, "learning_rate": 7.6065914726821865e-06, "loss": 0.4271, "step": 9510 }, { "epoch": 0.0761593907248742, "grad_norm": 0.7535557389209109, "learning_rate": 7.614590832733382e-06, "loss": 0.4214, "step": 9520 }, { "epoch": 0.07623939008487932, "grad_norm": 0.7536246085913865, "learning_rate": 7.622590192784578e-06, "loss": 0.4328, "step": 9530 }, { "epoch": 0.07631938944488444, "grad_norm": 0.8428182285823616, "learning_rate": 7.630589552835775e-06, "loss": 0.4163, "step": 9540 }, { "epoch": 0.07639938880488956, "grad_norm": 0.7537452203978798, "learning_rate": 7.63858891288697e-06, "loss": 0.4006, "step": 9550 }, { "epoch": 0.07647938816489468, "grad_norm": 2.1581625319615605, "learning_rate": 7.646588272938166e-06, "loss": 0.4215, "step": 9560 }, { "epoch": 0.0765593875248998, "grad_norm": 0.7302743297219303, "learning_rate": 7.654587632989362e-06, "loss": 0.4359, "step": 9570 }, { "epoch": 0.07663938688490492, "grad_norm": 0.7881840211837142, "learning_rate": 7.662586993040557e-06, "loss": 0.4215, "step": 9580 }, { "epoch": 0.07671938624491004, "grad_norm": 0.6968139833528485, "learning_rate": 7.670586353091753e-06, "loss": 0.4106, "step": 9590 }, { "epoch": 0.07679938560491516, "grad_norm": 0.7352058764306478, "learning_rate": 7.67858571314295e-06, "loss": 0.4255, "step": 9600 }, { "epoch": 0.07687938496492028, "grad_norm": 0.7383208962739828, "learning_rate": 7.686585073194146e-06, "loss": 0.4301, "step": 9610 }, { "epoch": 0.0769593843249254, "grad_norm": 0.8238810513924866, "learning_rate": 7.694584433245342e-06, "loss": 0.4111, "step": 9620 }, { "epoch": 0.07703938368493052, "grad_norm": 0.710621902710307, "learning_rate": 7.702583793296537e-06, "loss": 0.4249, "step": 9630 }, { "epoch": 0.07711938304493564, "grad_norm": 0.6945748312524411, "learning_rate": 7.710583153347733e-06, "loss": 0.4156, "step": 9640 }, { "epoch": 0.07719938240494076, "grad_norm": 0.7707194683960842, "learning_rate": 7.71858251339893e-06, "loss": 0.4552, "step": 9650 }, { "epoch": 0.07727938176494588, "grad_norm": 0.7638485142157124, "learning_rate": 7.726581873450124e-06, "loss": 0.4286, "step": 9660 }, { "epoch": 0.077359381124951, "grad_norm": 0.6738530037446632, "learning_rate": 7.73458123350132e-06, "loss": 0.4127, "step": 9670 }, { "epoch": 0.07743938048495612, "grad_norm": 0.7134549522343689, "learning_rate": 7.742580593552517e-06, "loss": 0.4096, "step": 9680 }, { "epoch": 0.07751937984496124, "grad_norm": 0.7267707333334443, "learning_rate": 7.750579953603711e-06, "loss": 0.4174, "step": 9690 }, { "epoch": 0.07759937920496636, "grad_norm": 0.759018369702987, "learning_rate": 7.758579313654908e-06, "loss": 0.4295, "step": 9700 }, { "epoch": 0.07767937856497148, "grad_norm": 0.7085029251112671, "learning_rate": 7.766578673706104e-06, "loss": 0.4406, "step": 9710 }, { "epoch": 0.0777593779249766, "grad_norm": 0.7244022332092789, "learning_rate": 7.7745780337573e-06, "loss": 0.4221, "step": 9720 }, { "epoch": 0.07783937728498172, "grad_norm": 0.7356818549691659, "learning_rate": 7.782577393808497e-06, "loss": 0.4237, "step": 9730 }, { "epoch": 0.07791937664498684, "grad_norm": 1.125237720592352, "learning_rate": 7.790576753859693e-06, "loss": 0.4367, "step": 9740 }, { "epoch": 0.07799937600499196, "grad_norm": 0.8308310004902963, "learning_rate": 7.798576113910888e-06, "loss": 0.4364, "step": 9750 }, { "epoch": 0.07807937536499708, "grad_norm": 0.7745767529165616, "learning_rate": 7.806575473962084e-06, "loss": 0.4431, "step": 9760 }, { "epoch": 0.0781593747250022, "grad_norm": 0.7561664429839915, "learning_rate": 7.81457483401328e-06, "loss": 0.4187, "step": 9770 }, { "epoch": 0.07823937408500732, "grad_norm": 0.7640131168542577, "learning_rate": 7.822574194064475e-06, "loss": 0.4248, "step": 9780 }, { "epoch": 0.07831937344501244, "grad_norm": 0.7197503924942193, "learning_rate": 7.830573554115671e-06, "loss": 0.4267, "step": 9790 }, { "epoch": 0.07839937280501756, "grad_norm": 0.7278151043282459, "learning_rate": 7.838572914166868e-06, "loss": 0.4359, "step": 9800 }, { "epoch": 0.07847937216502268, "grad_norm": 0.7452711506005507, "learning_rate": 7.846572274218062e-06, "loss": 0.4172, "step": 9810 }, { "epoch": 0.0785593715250278, "grad_norm": 0.7165980472556078, "learning_rate": 7.854571634269259e-06, "loss": 0.4299, "step": 9820 }, { "epoch": 0.07863937088503292, "grad_norm": 0.8418797221506509, "learning_rate": 7.862570994320455e-06, "loss": 0.4479, "step": 9830 }, { "epoch": 0.07871937024503804, "grad_norm": 1.0369061063955327, "learning_rate": 7.870570354371651e-06, "loss": 0.4485, "step": 9840 }, { "epoch": 0.07879936960504316, "grad_norm": 0.8712207777446473, "learning_rate": 7.878569714422848e-06, "loss": 0.4282, "step": 9850 }, { "epoch": 0.07887936896504828, "grad_norm": 0.7651295684183588, "learning_rate": 7.886569074474042e-06, "loss": 0.411, "step": 9860 }, { "epoch": 0.0789593683250534, "grad_norm": 0.6057054717841039, "learning_rate": 7.894568434525239e-06, "loss": 0.4346, "step": 9870 }, { "epoch": 0.07903936768505852, "grad_norm": 0.7461288218695842, "learning_rate": 7.902567794576435e-06, "loss": 0.4165, "step": 9880 }, { "epoch": 0.07911936704506364, "grad_norm": 0.7820177899079546, "learning_rate": 7.910567154627631e-06, "loss": 0.4307, "step": 9890 }, { "epoch": 0.07919936640506876, "grad_norm": 0.8796698783074814, "learning_rate": 7.918566514678826e-06, "loss": 0.4228, "step": 9900 }, { "epoch": 0.07927936576507388, "grad_norm": 0.6714560334086797, "learning_rate": 7.926565874730022e-06, "loss": 0.4022, "step": 9910 }, { "epoch": 0.079359365125079, "grad_norm": 0.7891326634193025, "learning_rate": 7.934565234781219e-06, "loss": 0.4, "step": 9920 }, { "epoch": 0.07943936448508412, "grad_norm": 0.7614942098734973, "learning_rate": 7.942564594832413e-06, "loss": 0.3959, "step": 9930 }, { "epoch": 0.07951936384508924, "grad_norm": 0.7782633381776749, "learning_rate": 7.95056395488361e-06, "loss": 0.4481, "step": 9940 }, { "epoch": 0.07959936320509436, "grad_norm": 0.7646716205945274, "learning_rate": 7.958563314934806e-06, "loss": 0.4506, "step": 9950 }, { "epoch": 0.07967936256509948, "grad_norm": 0.7271850996551853, "learning_rate": 7.966562674986002e-06, "loss": 0.416, "step": 9960 }, { "epoch": 0.0797593619251046, "grad_norm": 0.7078876115844411, "learning_rate": 7.974562035037197e-06, "loss": 0.3886, "step": 9970 }, { "epoch": 0.07983936128510973, "grad_norm": 0.6606874457122963, "learning_rate": 7.982561395088393e-06, "loss": 0.4006, "step": 9980 }, { "epoch": 0.07991936064511485, "grad_norm": 0.7729635400976105, "learning_rate": 7.99056075513959e-06, "loss": 0.4082, "step": 9990 }, { "epoch": 0.07999936000511997, "grad_norm": 0.7333685677151505, "learning_rate": 7.998560115190786e-06, "loss": 0.4149, "step": 10000 }, { "epoch": 0.08007935936512509, "grad_norm": 0.8362996144998345, "learning_rate": 8.006559475241982e-06, "loss": 0.4329, "step": 10010 }, { "epoch": 0.0801593587251302, "grad_norm": 0.8016170546033494, "learning_rate": 8.014558835293177e-06, "loss": 0.4312, "step": 10020 }, { "epoch": 0.08023935808513531, "grad_norm": 0.7556680756540225, "learning_rate": 8.022558195344373e-06, "loss": 0.4293, "step": 10030 }, { "epoch": 0.08031935744514043, "grad_norm": 0.79967280285087, "learning_rate": 8.03055755539557e-06, "loss": 0.411, "step": 10040 }, { "epoch": 0.08039935680514555, "grad_norm": 0.7530698891771462, "learning_rate": 8.038556915446764e-06, "loss": 0.4354, "step": 10050 }, { "epoch": 0.08047935616515067, "grad_norm": 0.833524062394011, "learning_rate": 8.04655627549796e-06, "loss": 0.4453, "step": 10060 }, { "epoch": 0.08055935552515579, "grad_norm": 0.9419326877711525, "learning_rate": 8.054555635549157e-06, "loss": 0.3992, "step": 10070 }, { "epoch": 0.08063935488516091, "grad_norm": 0.7203674798814277, "learning_rate": 8.062554995600352e-06, "loss": 0.4226, "step": 10080 }, { "epoch": 0.08071935424516603, "grad_norm": 0.6591542053386136, "learning_rate": 8.070554355651548e-06, "loss": 0.431, "step": 10090 }, { "epoch": 0.08079935360517115, "grad_norm": 0.6791497309598677, "learning_rate": 8.078553715702744e-06, "loss": 0.4026, "step": 10100 }, { "epoch": 0.08087935296517627, "grad_norm": 0.7647994564837595, "learning_rate": 8.08655307575394e-06, "loss": 0.4257, "step": 10110 }, { "epoch": 0.0809593523251814, "grad_norm": 0.822759228953568, "learning_rate": 8.094552435805137e-06, "loss": 0.4361, "step": 10120 }, { "epoch": 0.08103935168518651, "grad_norm": 0.7490074367709073, "learning_rate": 8.102551795856333e-06, "loss": 0.4134, "step": 10130 }, { "epoch": 0.08111935104519163, "grad_norm": 0.7292853826230042, "learning_rate": 8.110551155907528e-06, "loss": 0.4105, "step": 10140 }, { "epoch": 0.08119935040519675, "grad_norm": 0.8492381242063971, "learning_rate": 8.118550515958724e-06, "loss": 0.4218, "step": 10150 }, { "epoch": 0.08127934976520187, "grad_norm": 0.8627782401860304, "learning_rate": 8.12654987600992e-06, "loss": 0.4257, "step": 10160 }, { "epoch": 0.081359349125207, "grad_norm": 0.9916817541339781, "learning_rate": 8.134549236061115e-06, "loss": 0.4316, "step": 10170 }, { "epoch": 0.08143934848521212, "grad_norm": 0.8341211780355294, "learning_rate": 8.142548596112312e-06, "loss": 0.447, "step": 10180 }, { "epoch": 0.08151934784521724, "grad_norm": 2.325181905041473, "learning_rate": 8.150547956163508e-06, "loss": 0.3957, "step": 10190 }, { "epoch": 0.08159934720522236, "grad_norm": 0.8552717968031468, "learning_rate": 8.158547316214703e-06, "loss": 0.4279, "step": 10200 }, { "epoch": 0.08167934656522748, "grad_norm": 0.8239320840021814, "learning_rate": 8.166546676265899e-06, "loss": 0.4334, "step": 10210 }, { "epoch": 0.0817593459252326, "grad_norm": 0.87967217926997, "learning_rate": 8.174546036317095e-06, "loss": 0.4337, "step": 10220 }, { "epoch": 0.08183934528523772, "grad_norm": 0.7128598928859964, "learning_rate": 8.182545396368292e-06, "loss": 0.4262, "step": 10230 }, { "epoch": 0.08191934464524284, "grad_norm": 0.8013452752572597, "learning_rate": 8.190544756419488e-06, "loss": 0.4355, "step": 10240 }, { "epoch": 0.08199934400524796, "grad_norm": 0.7865942912260698, "learning_rate": 8.198544116470683e-06, "loss": 0.3968, "step": 10250 }, { "epoch": 0.08207934336525308, "grad_norm": 0.7165866552359123, "learning_rate": 8.206543476521879e-06, "loss": 0.4143, "step": 10260 }, { "epoch": 0.0821593427252582, "grad_norm": 0.6653862478131871, "learning_rate": 8.214542836573075e-06, "loss": 0.4327, "step": 10270 }, { "epoch": 0.08223934208526332, "grad_norm": 0.7591931555008371, "learning_rate": 8.22254219662427e-06, "loss": 0.4061, "step": 10280 }, { "epoch": 0.08231934144526844, "grad_norm": 0.7978998546660264, "learning_rate": 8.230541556675466e-06, "loss": 0.4132, "step": 10290 }, { "epoch": 0.08239934080527356, "grad_norm": 0.7065933848196233, "learning_rate": 8.238540916726663e-06, "loss": 0.4214, "step": 10300 }, { "epoch": 0.08247934016527868, "grad_norm": 0.8851532996892377, "learning_rate": 8.246540276777857e-06, "loss": 0.4323, "step": 10310 }, { "epoch": 0.0825593395252838, "grad_norm": 0.707277467538071, "learning_rate": 8.254539636829054e-06, "loss": 0.397, "step": 10320 }, { "epoch": 0.08263933888528892, "grad_norm": 0.6495200876069024, "learning_rate": 8.26253899688025e-06, "loss": 0.4144, "step": 10330 }, { "epoch": 0.08271933824529404, "grad_norm": 0.7105121047089369, "learning_rate": 8.270538356931446e-06, "loss": 0.4318, "step": 10340 }, { "epoch": 0.08279933760529916, "grad_norm": 0.7975431328016105, "learning_rate": 8.278537716982643e-06, "loss": 0.4364, "step": 10350 }, { "epoch": 0.08287933696530428, "grad_norm": 0.7889928848472462, "learning_rate": 8.286537077033839e-06, "loss": 0.4226, "step": 10360 }, { "epoch": 0.0829593363253094, "grad_norm": 0.8602261889738525, "learning_rate": 8.294536437085034e-06, "loss": 0.4248, "step": 10370 }, { "epoch": 0.08303933568531452, "grad_norm": 0.8691855158810055, "learning_rate": 8.30253579713623e-06, "loss": 0.4476, "step": 10380 }, { "epoch": 0.08311933504531964, "grad_norm": 0.6964635947262882, "learning_rate": 8.310535157187426e-06, "loss": 0.4414, "step": 10390 }, { "epoch": 0.08319933440532476, "grad_norm": 0.8884055169174275, "learning_rate": 8.31853451723862e-06, "loss": 0.4273, "step": 10400 }, { "epoch": 0.08327933376532988, "grad_norm": 0.6450978106641992, "learning_rate": 8.326533877289817e-06, "loss": 0.416, "step": 10410 }, { "epoch": 0.083359333125335, "grad_norm": 0.7517243186901964, "learning_rate": 8.334533237341014e-06, "loss": 0.4038, "step": 10420 }, { "epoch": 0.08343933248534012, "grad_norm": 0.7554595130828052, "learning_rate": 8.342532597392208e-06, "loss": 0.422, "step": 10430 }, { "epoch": 0.08351933184534524, "grad_norm": 0.7970520238014901, "learning_rate": 8.350531957443405e-06, "loss": 0.435, "step": 10440 }, { "epoch": 0.08359933120535036, "grad_norm": 0.6868611198158514, "learning_rate": 8.3585313174946e-06, "loss": 0.4182, "step": 10450 }, { "epoch": 0.08367933056535548, "grad_norm": 0.705922016641164, "learning_rate": 8.366530677545797e-06, "loss": 0.4095, "step": 10460 }, { "epoch": 0.0837593299253606, "grad_norm": 0.8047508705191073, "learning_rate": 8.374530037596994e-06, "loss": 0.4479, "step": 10470 }, { "epoch": 0.08383932928536572, "grad_norm": 0.7431088854481606, "learning_rate": 8.38252939764819e-06, "loss": 0.4371, "step": 10480 }, { "epoch": 0.08391932864537084, "grad_norm": 0.6966582351477671, "learning_rate": 8.390528757699385e-06, "loss": 0.418, "step": 10490 }, { "epoch": 0.08399932800537596, "grad_norm": 0.6463213718626103, "learning_rate": 8.39852811775058e-06, "loss": 0.4134, "step": 10500 }, { "epoch": 0.08407932736538108, "grad_norm": 0.8221831400482152, "learning_rate": 8.406527477801777e-06, "loss": 0.4392, "step": 10510 }, { "epoch": 0.0841593267253862, "grad_norm": 0.7917053276365509, "learning_rate": 8.414526837852972e-06, "loss": 0.4043, "step": 10520 }, { "epoch": 0.08423932608539132, "grad_norm": 0.7024888179507742, "learning_rate": 8.422526197904168e-06, "loss": 0.4179, "step": 10530 }, { "epoch": 0.08431932544539644, "grad_norm": 0.642044917326839, "learning_rate": 8.430525557955365e-06, "loss": 0.3991, "step": 10540 }, { "epoch": 0.08439932480540156, "grad_norm": 0.9296103975573807, "learning_rate": 8.438524918006559e-06, "loss": 0.4343, "step": 10550 }, { "epoch": 0.08447932416540668, "grad_norm": 0.8151410815232693, "learning_rate": 8.446524278057755e-06, "loss": 0.4338, "step": 10560 }, { "epoch": 0.0845593235254118, "grad_norm": 0.6909222025022131, "learning_rate": 8.454523638108952e-06, "loss": 0.4523, "step": 10570 }, { "epoch": 0.08463932288541692, "grad_norm": 0.7339369599988719, "learning_rate": 8.462522998160148e-06, "loss": 0.4237, "step": 10580 }, { "epoch": 0.08471932224542204, "grad_norm": 0.7729982452272108, "learning_rate": 8.470522358211345e-06, "loss": 0.4159, "step": 10590 }, { "epoch": 0.08479932160542715, "grad_norm": 0.7263934650408461, "learning_rate": 8.47852171826254e-06, "loss": 0.4299, "step": 10600 }, { "epoch": 0.08487932096543227, "grad_norm": 0.8711071207833823, "learning_rate": 8.486521078313735e-06, "loss": 0.4227, "step": 10610 }, { "epoch": 0.08495932032543739, "grad_norm": 0.7605649728312519, "learning_rate": 8.494520438364932e-06, "loss": 0.4056, "step": 10620 }, { "epoch": 0.08503931968544251, "grad_norm": 0.7070476131421068, "learning_rate": 8.502519798416128e-06, "loss": 0.4218, "step": 10630 }, { "epoch": 0.08511931904544763, "grad_norm": 0.8273653808730912, "learning_rate": 8.510519158467323e-06, "loss": 0.4562, "step": 10640 }, { "epoch": 0.08519931840545275, "grad_norm": 0.7290330658278465, "learning_rate": 8.518518518518519e-06, "loss": 0.4323, "step": 10650 }, { "epoch": 0.08527931776545787, "grad_norm": 0.7477934169003759, "learning_rate": 8.526517878569715e-06, "loss": 0.405, "step": 10660 }, { "epoch": 0.08535931712546299, "grad_norm": 0.7124707169199953, "learning_rate": 8.53451723862091e-06, "loss": 0.4278, "step": 10670 }, { "epoch": 0.08543931648546811, "grad_norm": 0.8192203662655624, "learning_rate": 8.542516598672106e-06, "loss": 0.4143, "step": 10680 }, { "epoch": 0.08551931584547323, "grad_norm": 0.6853933938229393, "learning_rate": 8.550515958723303e-06, "loss": 0.4137, "step": 10690 }, { "epoch": 0.08559931520547835, "grad_norm": 0.7880511507325222, "learning_rate": 8.5585153187745e-06, "loss": 0.4349, "step": 10700 }, { "epoch": 0.08567931456548347, "grad_norm": 0.6585250720116949, "learning_rate": 8.566514678825695e-06, "loss": 0.4108, "step": 10710 }, { "epoch": 0.08575931392548859, "grad_norm": 0.6845724010220955, "learning_rate": 8.574514038876892e-06, "loss": 0.3916, "step": 10720 }, { "epoch": 0.08583931328549371, "grad_norm": 0.6836423200658271, "learning_rate": 8.582513398928086e-06, "loss": 0.4264, "step": 10730 }, { "epoch": 0.08591931264549883, "grad_norm": 0.6947348864137265, "learning_rate": 8.590512758979283e-06, "loss": 0.4358, "step": 10740 }, { "epoch": 0.08599931200550395, "grad_norm": 0.7551059396096432, "learning_rate": 8.59851211903048e-06, "loss": 0.402, "step": 10750 }, { "epoch": 0.08607931136550907, "grad_norm": 0.8558226469421992, "learning_rate": 8.606511479081674e-06, "loss": 0.4281, "step": 10760 }, { "epoch": 0.0861593107255142, "grad_norm": 0.6557188478875992, "learning_rate": 8.61451083913287e-06, "loss": 0.4257, "step": 10770 }, { "epoch": 0.08623931008551931, "grad_norm": 0.7343863550885279, "learning_rate": 8.622510199184066e-06, "loss": 0.4167, "step": 10780 }, { "epoch": 0.08631930944552443, "grad_norm": 0.678564641776468, "learning_rate": 8.630509559235261e-06, "loss": 0.4156, "step": 10790 }, { "epoch": 0.08639930880552955, "grad_norm": 0.6765488698530058, "learning_rate": 8.638508919286457e-06, "loss": 0.404, "step": 10800 }, { "epoch": 0.08647930816553467, "grad_norm": 0.7308672648285914, "learning_rate": 8.646508279337654e-06, "loss": 0.4012, "step": 10810 }, { "epoch": 0.0865593075255398, "grad_norm": 0.6878776293772283, "learning_rate": 8.65450763938885e-06, "loss": 0.447, "step": 10820 }, { "epoch": 0.08663930688554491, "grad_norm": 0.7085673604329493, "learning_rate": 8.662506999440046e-06, "loss": 0.4291, "step": 10830 }, { "epoch": 0.08671930624555003, "grad_norm": 0.692779314418764, "learning_rate": 8.670506359491241e-06, "loss": 0.4329, "step": 10840 }, { "epoch": 0.08679930560555515, "grad_norm": 0.6927059149340964, "learning_rate": 8.678505719542437e-06, "loss": 0.4213, "step": 10850 }, { "epoch": 0.08687930496556028, "grad_norm": 0.702265717758418, "learning_rate": 8.686505079593634e-06, "loss": 0.463, "step": 10860 }, { "epoch": 0.0869593043255654, "grad_norm": 0.6796286879671983, "learning_rate": 8.694504439644828e-06, "loss": 0.4282, "step": 10870 }, { "epoch": 0.08703930368557052, "grad_norm": 0.7583256671591532, "learning_rate": 8.702503799696025e-06, "loss": 0.4081, "step": 10880 }, { "epoch": 0.08711930304557564, "grad_norm": 0.7339963876483099, "learning_rate": 8.710503159747221e-06, "loss": 0.4337, "step": 10890 }, { "epoch": 0.08719930240558076, "grad_norm": 0.786628367912159, "learning_rate": 8.718502519798416e-06, "loss": 0.4379, "step": 10900 }, { "epoch": 0.08727930176558588, "grad_norm": 0.7253843426233065, "learning_rate": 8.726501879849612e-06, "loss": 0.4323, "step": 10910 }, { "epoch": 0.087359301125591, "grad_norm": 0.7525132142622203, "learning_rate": 8.734501239900808e-06, "loss": 0.4271, "step": 10920 }, { "epoch": 0.08743930048559612, "grad_norm": 0.7306532457203765, "learning_rate": 8.742500599952005e-06, "loss": 0.3948, "step": 10930 }, { "epoch": 0.08751929984560124, "grad_norm": 0.7288381028680123, "learning_rate": 8.750499960003201e-06, "loss": 0.4169, "step": 10940 }, { "epoch": 0.08759929920560636, "grad_norm": 0.7452466352816842, "learning_rate": 8.758499320054397e-06, "loss": 0.4221, "step": 10950 }, { "epoch": 0.08767929856561148, "grad_norm": 0.7684047171576988, "learning_rate": 8.766498680105592e-06, "loss": 0.4696, "step": 10960 }, { "epoch": 0.0877592979256166, "grad_norm": 1.106001905794501, "learning_rate": 8.774498040156788e-06, "loss": 0.4389, "step": 10970 }, { "epoch": 0.08783929728562172, "grad_norm": 0.7188033526091617, "learning_rate": 8.782497400207985e-06, "loss": 0.4316, "step": 10980 }, { "epoch": 0.08791929664562684, "grad_norm": 0.7165732316195867, "learning_rate": 8.79049676025918e-06, "loss": 0.4254, "step": 10990 }, { "epoch": 0.08799929600563196, "grad_norm": 0.6928515295737864, "learning_rate": 8.798496120310376e-06, "loss": 0.4354, "step": 11000 }, { "epoch": 0.08807929536563708, "grad_norm": 0.735759503457832, "learning_rate": 8.806495480361572e-06, "loss": 0.4161, "step": 11010 }, { "epoch": 0.0881592947256422, "grad_norm": 0.745847701151922, "learning_rate": 8.814494840412767e-06, "loss": 0.4356, "step": 11020 }, { "epoch": 0.08823929408564732, "grad_norm": 0.7401576418885047, "learning_rate": 8.822494200463963e-06, "loss": 0.4059, "step": 11030 }, { "epoch": 0.08831929344565244, "grad_norm": 0.7150708003747663, "learning_rate": 8.83049356051516e-06, "loss": 0.4276, "step": 11040 }, { "epoch": 0.08839929280565756, "grad_norm": 0.7213102518806342, "learning_rate": 8.838492920566356e-06, "loss": 0.4352, "step": 11050 }, { "epoch": 0.08847929216566268, "grad_norm": 0.6599060098757383, "learning_rate": 8.846492280617552e-06, "loss": 0.4234, "step": 11060 }, { "epoch": 0.0885592915256678, "grad_norm": 0.8409157893672355, "learning_rate": 8.854491640668747e-06, "loss": 0.4216, "step": 11070 }, { "epoch": 0.08863929088567292, "grad_norm": 0.714587966725285, "learning_rate": 8.862491000719943e-06, "loss": 0.3902, "step": 11080 }, { "epoch": 0.08871929024567804, "grad_norm": 0.7815849463774848, "learning_rate": 8.87049036077114e-06, "loss": 0.419, "step": 11090 }, { "epoch": 0.08879928960568316, "grad_norm": 0.6789393458619897, "learning_rate": 8.878489720822336e-06, "loss": 0.4035, "step": 11100 }, { "epoch": 0.08887928896568828, "grad_norm": 0.7112225102387261, "learning_rate": 8.88648908087353e-06, "loss": 0.4009, "step": 11110 }, { "epoch": 0.0889592883256934, "grad_norm": 0.6973895981278818, "learning_rate": 8.894488440924727e-06, "loss": 0.4224, "step": 11120 }, { "epoch": 0.08903928768569852, "grad_norm": 0.7774098329637728, "learning_rate": 8.902487800975923e-06, "loss": 0.4098, "step": 11130 }, { "epoch": 0.08911928704570364, "grad_norm": 0.7658231203988602, "learning_rate": 8.910487161027118e-06, "loss": 0.4392, "step": 11140 }, { "epoch": 0.08919928640570876, "grad_norm": 0.7876157129039025, "learning_rate": 8.918486521078314e-06, "loss": 0.4186, "step": 11150 }, { "epoch": 0.08927928576571388, "grad_norm": 0.6937729561722094, "learning_rate": 8.92648588112951e-06, "loss": 0.4239, "step": 11160 }, { "epoch": 0.08935928512571899, "grad_norm": 0.6824480223369135, "learning_rate": 8.934485241180707e-06, "loss": 0.4191, "step": 11170 }, { "epoch": 0.08943928448572411, "grad_norm": 0.763102957342165, "learning_rate": 8.942484601231901e-06, "loss": 0.4238, "step": 11180 }, { "epoch": 0.08951928384572923, "grad_norm": 0.7074301309734028, "learning_rate": 8.950483961283098e-06, "loss": 0.4347, "step": 11190 }, { "epoch": 0.08959928320573435, "grad_norm": 2.8108315812837574, "learning_rate": 8.958483321334294e-06, "loss": 0.4158, "step": 11200 }, { "epoch": 0.08967928256573947, "grad_norm": 0.8158644198626702, "learning_rate": 8.96648268138549e-06, "loss": 0.4199, "step": 11210 }, { "epoch": 0.08975928192574459, "grad_norm": 0.8198554311861339, "learning_rate": 8.974482041436687e-06, "loss": 0.4187, "step": 11220 }, { "epoch": 0.08983928128574971, "grad_norm": 0.7441901464189733, "learning_rate": 8.982481401487881e-06, "loss": 0.4243, "step": 11230 }, { "epoch": 0.08991928064575483, "grad_norm": 0.9124606346607721, "learning_rate": 8.990480761539078e-06, "loss": 0.4367, "step": 11240 }, { "epoch": 0.08999928000575995, "grad_norm": 0.6975886252905562, "learning_rate": 8.998480121590274e-06, "loss": 0.4383, "step": 11250 }, { "epoch": 0.09007927936576507, "grad_norm": 0.6761285475593969, "learning_rate": 9.006479481641469e-06, "loss": 0.4113, "step": 11260 }, { "epoch": 0.09015927872577019, "grad_norm": 0.7336756203656983, "learning_rate": 9.014478841692665e-06, "loss": 0.4231, "step": 11270 }, { "epoch": 0.09023927808577531, "grad_norm": 0.7971291035507614, "learning_rate": 9.022478201743861e-06, "loss": 0.445, "step": 11280 }, { "epoch": 0.09031927744578043, "grad_norm": 0.7094222180225315, "learning_rate": 9.030477561795056e-06, "loss": 0.4164, "step": 11290 }, { "epoch": 0.09039927680578555, "grad_norm": 0.7441145183118785, "learning_rate": 9.038476921846252e-06, "loss": 0.4105, "step": 11300 }, { "epoch": 0.09047927616579067, "grad_norm": 0.8121338182799563, "learning_rate": 9.046476281897449e-06, "loss": 0.4325, "step": 11310 }, { "epoch": 0.09055927552579579, "grad_norm": 0.7654068498891533, "learning_rate": 9.054475641948645e-06, "loss": 0.4182, "step": 11320 }, { "epoch": 0.09063927488580091, "grad_norm": 0.6565792340624728, "learning_rate": 9.062475001999841e-06, "loss": 0.4353, "step": 11330 }, { "epoch": 0.09071927424580603, "grad_norm": 0.7639707679339371, "learning_rate": 9.070474362051038e-06, "loss": 0.4375, "step": 11340 }, { "epoch": 0.09079927360581115, "grad_norm": 1.1065307649096179, "learning_rate": 9.078473722102232e-06, "loss": 0.4042, "step": 11350 }, { "epoch": 0.09087927296581627, "grad_norm": 0.6836864969919944, "learning_rate": 9.086473082153429e-06, "loss": 0.4069, "step": 11360 }, { "epoch": 0.09095927232582139, "grad_norm": 0.6505179457584815, "learning_rate": 9.094472442204625e-06, "loss": 0.4233, "step": 11370 }, { "epoch": 0.09103927168582651, "grad_norm": 0.7864411447168694, "learning_rate": 9.10247180225582e-06, "loss": 0.4063, "step": 11380 }, { "epoch": 0.09111927104583163, "grad_norm": 0.81876647384435, "learning_rate": 9.110471162307016e-06, "loss": 0.4437, "step": 11390 }, { "epoch": 0.09119927040583675, "grad_norm": 0.7536510441193175, "learning_rate": 9.118470522358212e-06, "loss": 0.4457, "step": 11400 }, { "epoch": 0.09127926976584187, "grad_norm": 0.8242345661031574, "learning_rate": 9.126469882409407e-06, "loss": 0.4335, "step": 11410 }, { "epoch": 0.09135926912584699, "grad_norm": 0.8293010912996112, "learning_rate": 9.134469242460603e-06, "loss": 0.4246, "step": 11420 }, { "epoch": 0.09143926848585211, "grad_norm": 0.8525913074538886, "learning_rate": 9.1424686025118e-06, "loss": 0.4137, "step": 11430 }, { "epoch": 0.09151926784585723, "grad_norm": 0.823142430219861, "learning_rate": 9.150467962562996e-06, "loss": 0.4402, "step": 11440 }, { "epoch": 0.09159926720586235, "grad_norm": 0.796526317139337, "learning_rate": 9.158467322614192e-06, "loss": 0.4338, "step": 11450 }, { "epoch": 0.09167926656586747, "grad_norm": 0.6797611774276162, "learning_rate": 9.166466682665387e-06, "loss": 0.4033, "step": 11460 }, { "epoch": 0.0917592659258726, "grad_norm": 0.7739110407431978, "learning_rate": 9.174466042716583e-06, "loss": 0.4457, "step": 11470 }, { "epoch": 0.09183926528587771, "grad_norm": 0.6889786586847555, "learning_rate": 9.18246540276778e-06, "loss": 0.4256, "step": 11480 }, { "epoch": 0.09191926464588283, "grad_norm": 0.635528801129085, "learning_rate": 9.190464762818974e-06, "loss": 0.4094, "step": 11490 }, { "epoch": 0.09199926400588795, "grad_norm": 0.6715404045973435, "learning_rate": 9.19846412287017e-06, "loss": 0.4535, "step": 11500 }, { "epoch": 0.09207926336589307, "grad_norm": 0.9133726375929785, "learning_rate": 9.206463482921367e-06, "loss": 0.4038, "step": 11510 }, { "epoch": 0.0921592627258982, "grad_norm": 0.6910109579479943, "learning_rate": 9.214462842972562e-06, "loss": 0.4092, "step": 11520 }, { "epoch": 0.09223926208590331, "grad_norm": 0.6952573126841998, "learning_rate": 9.222462203023758e-06, "loss": 0.4139, "step": 11530 }, { "epoch": 0.09231926144590843, "grad_norm": 0.6535609454673438, "learning_rate": 9.230461563074954e-06, "loss": 0.4366, "step": 11540 }, { "epoch": 0.09239926080591356, "grad_norm": 0.7330114863255616, "learning_rate": 9.23846092312615e-06, "loss": 0.4007, "step": 11550 }, { "epoch": 0.09247926016591868, "grad_norm": 0.6761578694512926, "learning_rate": 9.246460283177347e-06, "loss": 0.4235, "step": 11560 }, { "epoch": 0.0925592595259238, "grad_norm": 1.0146605459519362, "learning_rate": 9.254459643228543e-06, "loss": 0.4088, "step": 11570 }, { "epoch": 0.09263925888592892, "grad_norm": 0.7637632560554473, "learning_rate": 9.262459003279738e-06, "loss": 0.4244, "step": 11580 }, { "epoch": 0.09271925824593404, "grad_norm": 0.7636711215154064, "learning_rate": 9.270458363330934e-06, "loss": 0.4268, "step": 11590 }, { "epoch": 0.09279925760593916, "grad_norm": 0.6554517795154312, "learning_rate": 9.27845772338213e-06, "loss": 0.431, "step": 11600 }, { "epoch": 0.09287925696594428, "grad_norm": 0.7432001209583402, "learning_rate": 9.286457083433325e-06, "loss": 0.4044, "step": 11610 }, { "epoch": 0.0929592563259494, "grad_norm": 0.7550661511146248, "learning_rate": 9.294456443484522e-06, "loss": 0.4277, "step": 11620 }, { "epoch": 0.09303925568595452, "grad_norm": 0.7348656703408412, "learning_rate": 9.302455803535718e-06, "loss": 0.4178, "step": 11630 }, { "epoch": 0.09311925504595964, "grad_norm": 0.6983278246825473, "learning_rate": 9.310455163586913e-06, "loss": 0.4303, "step": 11640 }, { "epoch": 0.09319925440596476, "grad_norm": 0.6583745223499466, "learning_rate": 9.318454523638109e-06, "loss": 0.4148, "step": 11650 }, { "epoch": 0.09327925376596988, "grad_norm": 0.8532367779643665, "learning_rate": 9.326453883689305e-06, "loss": 0.4116, "step": 11660 }, { "epoch": 0.093359253125975, "grad_norm": 0.6442139127745877, "learning_rate": 9.334453243740502e-06, "loss": 0.4062, "step": 11670 }, { "epoch": 0.09343925248598012, "grad_norm": 1.3385143343108732, "learning_rate": 9.342452603791698e-06, "loss": 0.435, "step": 11680 }, { "epoch": 0.09351925184598524, "grad_norm": 0.6779037071725426, "learning_rate": 9.350451963842894e-06, "loss": 0.4234, "step": 11690 }, { "epoch": 0.09359925120599036, "grad_norm": 0.7593951972542781, "learning_rate": 9.358451323894089e-06, "loss": 0.4017, "step": 11700 }, { "epoch": 0.09367925056599548, "grad_norm": 0.7112648691800992, "learning_rate": 9.366450683945285e-06, "loss": 0.4213, "step": 11710 }, { "epoch": 0.0937592499260006, "grad_norm": 0.7547692875337748, "learning_rate": 9.374450043996482e-06, "loss": 0.4178, "step": 11720 }, { "epoch": 0.09383924928600572, "grad_norm": 0.6789533790443413, "learning_rate": 9.382449404047676e-06, "loss": 0.4325, "step": 11730 }, { "epoch": 0.09391924864601083, "grad_norm": 0.5952264485901477, "learning_rate": 9.390448764098873e-06, "loss": 0.4404, "step": 11740 }, { "epoch": 0.09399924800601595, "grad_norm": 0.7666100035461162, "learning_rate": 9.398448124150069e-06, "loss": 0.4094, "step": 11750 }, { "epoch": 0.09407924736602107, "grad_norm": 0.8085263134870901, "learning_rate": 9.406447484201264e-06, "loss": 0.4409, "step": 11760 }, { "epoch": 0.09415924672602619, "grad_norm": 0.7275019727411437, "learning_rate": 9.41444684425246e-06, "loss": 0.4289, "step": 11770 }, { "epoch": 0.0942392460860313, "grad_norm": 0.7765367060017725, "learning_rate": 9.422446204303656e-06, "loss": 0.4157, "step": 11780 }, { "epoch": 0.09431924544603643, "grad_norm": 0.8280216319663867, "learning_rate": 9.430445564354853e-06, "loss": 0.459, "step": 11790 }, { "epoch": 0.09439924480604155, "grad_norm": 0.7427587496847495, "learning_rate": 9.438444924406049e-06, "loss": 0.4387, "step": 11800 }, { "epoch": 0.09447924416604667, "grad_norm": 0.6427172396573193, "learning_rate": 9.446444284457245e-06, "loss": 0.4176, "step": 11810 }, { "epoch": 0.09455924352605179, "grad_norm": 0.7265111423685158, "learning_rate": 9.45444364450844e-06, "loss": 0.4339, "step": 11820 }, { "epoch": 0.0946392428860569, "grad_norm": 0.7928384259415016, "learning_rate": 9.462443004559636e-06, "loss": 0.4244, "step": 11830 }, { "epoch": 0.09471924224606203, "grad_norm": 0.7953056790840066, "learning_rate": 9.470442364610833e-06, "loss": 0.4221, "step": 11840 }, { "epoch": 0.09479924160606715, "grad_norm": 0.7967319320184727, "learning_rate": 9.478441724662027e-06, "loss": 0.4159, "step": 11850 }, { "epoch": 0.09487924096607227, "grad_norm": 0.7094439636507324, "learning_rate": 9.486441084713224e-06, "loss": 0.4102, "step": 11860 }, { "epoch": 0.09495924032607739, "grad_norm": 0.7818734978773467, "learning_rate": 9.49444044476442e-06, "loss": 0.4202, "step": 11870 }, { "epoch": 0.09503923968608251, "grad_norm": 0.7353572637168984, "learning_rate": 9.502439804815615e-06, "loss": 0.4135, "step": 11880 }, { "epoch": 0.09511923904608763, "grad_norm": 0.6463009451504467, "learning_rate": 9.510439164866811e-06, "loss": 0.4088, "step": 11890 }, { "epoch": 0.09519923840609275, "grad_norm": 0.7814651963315116, "learning_rate": 9.518438524918007e-06, "loss": 0.4255, "step": 11900 }, { "epoch": 0.09527923776609787, "grad_norm": 0.6621498882551435, "learning_rate": 9.526437884969204e-06, "loss": 0.451, "step": 11910 }, { "epoch": 0.09535923712610299, "grad_norm": 0.6724469931269605, "learning_rate": 9.5344372450204e-06, "loss": 0.4128, "step": 11920 }, { "epoch": 0.09543923648610811, "grad_norm": 0.7128707087607582, "learning_rate": 9.542436605071596e-06, "loss": 0.41, "step": 11930 }, { "epoch": 0.09551923584611323, "grad_norm": 0.6855001356003686, "learning_rate": 9.550435965122791e-06, "loss": 0.4403, "step": 11940 }, { "epoch": 0.09559923520611835, "grad_norm": 0.7093252311276882, "learning_rate": 9.558435325173987e-06, "loss": 0.3888, "step": 11950 }, { "epoch": 0.09567923456612347, "grad_norm": 0.7389337793068324, "learning_rate": 9.566434685225184e-06, "loss": 0.4125, "step": 11960 }, { "epoch": 0.09575923392612859, "grad_norm": 0.7637255593855383, "learning_rate": 9.574434045276378e-06, "loss": 0.4282, "step": 11970 }, { "epoch": 0.09583923328613371, "grad_norm": 0.7007908519467984, "learning_rate": 9.582433405327575e-06, "loss": 0.4112, "step": 11980 }, { "epoch": 0.09591923264613883, "grad_norm": 0.76676374890212, "learning_rate": 9.590432765378771e-06, "loss": 0.4371, "step": 11990 }, { "epoch": 0.09599923200614395, "grad_norm": 0.7555781997041284, "learning_rate": 9.598432125429966e-06, "loss": 0.4226, "step": 12000 }, { "epoch": 0.09607923136614907, "grad_norm": 0.8502430978469868, "learning_rate": 9.606431485481162e-06, "loss": 0.4613, "step": 12010 }, { "epoch": 0.09615923072615419, "grad_norm": 0.6578754180440388, "learning_rate": 9.614430845532358e-06, "loss": 0.4263, "step": 12020 }, { "epoch": 0.09623923008615931, "grad_norm": 0.6996986016837791, "learning_rate": 9.622430205583555e-06, "loss": 0.4372, "step": 12030 }, { "epoch": 0.09631922944616443, "grad_norm": 0.6434988082947871, "learning_rate": 9.630429565634751e-06, "loss": 0.4335, "step": 12040 }, { "epoch": 0.09639922880616955, "grad_norm": 0.8898100124594418, "learning_rate": 9.638428925685946e-06, "loss": 0.45, "step": 12050 }, { "epoch": 0.09647922816617467, "grad_norm": 0.7379999717174638, "learning_rate": 9.646428285737142e-06, "loss": 0.4328, "step": 12060 }, { "epoch": 0.09655922752617979, "grad_norm": 0.6920407723604418, "learning_rate": 9.654427645788338e-06, "loss": 0.4304, "step": 12070 }, { "epoch": 0.09663922688618491, "grad_norm": 0.7249107271440087, "learning_rate": 9.662427005839533e-06, "loss": 0.4322, "step": 12080 }, { "epoch": 0.09671922624619003, "grad_norm": 0.602603780257296, "learning_rate": 9.67042636589073e-06, "loss": 0.4209, "step": 12090 }, { "epoch": 0.09679922560619515, "grad_norm": 0.7616351037057018, "learning_rate": 9.678425725941926e-06, "loss": 0.4382, "step": 12100 }, { "epoch": 0.09687922496620027, "grad_norm": 0.6060162549713525, "learning_rate": 9.68642508599312e-06, "loss": 0.3996, "step": 12110 }, { "epoch": 0.09695922432620539, "grad_norm": 0.8147642253854301, "learning_rate": 9.694424446044317e-06, "loss": 0.3938, "step": 12120 }, { "epoch": 0.09703922368621051, "grad_norm": 0.8279262205865401, "learning_rate": 9.702423806095513e-06, "loss": 0.422, "step": 12130 }, { "epoch": 0.09711922304621563, "grad_norm": 0.8102628616373332, "learning_rate": 9.71042316614671e-06, "loss": 0.4105, "step": 12140 }, { "epoch": 0.09719922240622075, "grad_norm": 0.6712635696218996, "learning_rate": 9.718422526197906e-06, "loss": 0.4412, "step": 12150 }, { "epoch": 0.09727922176622587, "grad_norm": 0.6591182316046312, "learning_rate": 9.726421886249102e-06, "loss": 0.4113, "step": 12160 }, { "epoch": 0.097359221126231, "grad_norm": 0.8261224184885669, "learning_rate": 9.734421246300297e-06, "loss": 0.4126, "step": 12170 }, { "epoch": 0.09743922048623611, "grad_norm": 0.686899131632395, "learning_rate": 9.742420606351493e-06, "loss": 0.4563, "step": 12180 }, { "epoch": 0.09751921984624123, "grad_norm": 0.7190791239306228, "learning_rate": 9.75041996640269e-06, "loss": 0.4044, "step": 12190 }, { "epoch": 0.09759921920624635, "grad_norm": 0.706838780283809, "learning_rate": 9.758419326453884e-06, "loss": 0.4374, "step": 12200 }, { "epoch": 0.09767921856625147, "grad_norm": 0.8801020116269548, "learning_rate": 9.76641868650508e-06, "loss": 0.4252, "step": 12210 }, { "epoch": 0.0977592179262566, "grad_norm": 0.7845144667463588, "learning_rate": 9.774418046556277e-06, "loss": 0.437, "step": 12220 }, { "epoch": 0.09783921728626171, "grad_norm": 0.7182256190740821, "learning_rate": 9.782417406607471e-06, "loss": 0.4325, "step": 12230 }, { "epoch": 0.09791921664626684, "grad_norm": 0.6275433532224614, "learning_rate": 9.790416766658668e-06, "loss": 0.4159, "step": 12240 }, { "epoch": 0.09799921600627196, "grad_norm": 0.7325550590206439, "learning_rate": 9.798416126709864e-06, "loss": 0.4136, "step": 12250 }, { "epoch": 0.09807921536627708, "grad_norm": 0.8156563378501891, "learning_rate": 9.80641548676106e-06, "loss": 0.4232, "step": 12260 }, { "epoch": 0.0981592147262822, "grad_norm": 0.8016294878044852, "learning_rate": 9.814414846812257e-06, "loss": 0.4369, "step": 12270 }, { "epoch": 0.09823921408628732, "grad_norm": 0.7244602387349577, "learning_rate": 9.822414206863451e-06, "loss": 0.4136, "step": 12280 }, { "epoch": 0.09831921344629244, "grad_norm": 0.7449774452893142, "learning_rate": 9.830413566914648e-06, "loss": 0.4479, "step": 12290 }, { "epoch": 0.09839921280629756, "grad_norm": 0.7822683175762138, "learning_rate": 9.838412926965844e-06, "loss": 0.4435, "step": 12300 }, { "epoch": 0.09847921216630268, "grad_norm": 0.7750690258305681, "learning_rate": 9.84641228701704e-06, "loss": 0.4258, "step": 12310 }, { "epoch": 0.09855921152630778, "grad_norm": 0.7860341769513675, "learning_rate": 9.854411647068235e-06, "loss": 0.4234, "step": 12320 }, { "epoch": 0.0986392108863129, "grad_norm": 0.7338288442264617, "learning_rate": 9.862411007119431e-06, "loss": 0.4266, "step": 12330 }, { "epoch": 0.09871921024631802, "grad_norm": 0.6773502041712129, "learning_rate": 9.870410367170628e-06, "loss": 0.4205, "step": 12340 }, { "epoch": 0.09879920960632314, "grad_norm": 0.6820746371134853, "learning_rate": 9.878409727221822e-06, "loss": 0.428, "step": 12350 }, { "epoch": 0.09887920896632826, "grad_norm": 0.6122951459571231, "learning_rate": 9.886409087273019e-06, "loss": 0.4192, "step": 12360 }, { "epoch": 0.09895920832633338, "grad_norm": 0.7625021496319138, "learning_rate": 9.894408447324215e-06, "loss": 0.4334, "step": 12370 }, { "epoch": 0.0990392076863385, "grad_norm": 0.706314737590307, "learning_rate": 9.902407807375411e-06, "loss": 0.4313, "step": 12380 }, { "epoch": 0.09911920704634362, "grad_norm": 0.6602793507120377, "learning_rate": 9.910407167426606e-06, "loss": 0.4107, "step": 12390 }, { "epoch": 0.09919920640634874, "grad_norm": 0.7239242123421675, "learning_rate": 9.918406527477802e-06, "loss": 0.4182, "step": 12400 }, { "epoch": 0.09927920576635386, "grad_norm": 0.7236288796299339, "learning_rate": 9.926405887528999e-06, "loss": 0.4287, "step": 12410 }, { "epoch": 0.09935920512635898, "grad_norm": 0.7323414168037194, "learning_rate": 9.934405247580195e-06, "loss": 0.4391, "step": 12420 }, { "epoch": 0.0994392044863641, "grad_norm": 0.6387064716799525, "learning_rate": 9.942404607631391e-06, "loss": 0.4117, "step": 12430 }, { "epoch": 0.09951920384636923, "grad_norm": 0.6642273425768288, "learning_rate": 9.950403967682586e-06, "loss": 0.4152, "step": 12440 }, { "epoch": 0.09959920320637435, "grad_norm": 0.722630197924363, "learning_rate": 9.958403327733782e-06, "loss": 0.4197, "step": 12450 }, { "epoch": 0.09967920256637947, "grad_norm": 0.8009999995639696, "learning_rate": 9.966402687784979e-06, "loss": 0.4176, "step": 12460 }, { "epoch": 0.09975920192638459, "grad_norm": 0.7563635124338784, "learning_rate": 9.974402047836173e-06, "loss": 0.4192, "step": 12470 }, { "epoch": 0.0998392012863897, "grad_norm": 0.7127787072825136, "learning_rate": 9.98240140788737e-06, "loss": 0.4218, "step": 12480 }, { "epoch": 0.09991920064639483, "grad_norm": 0.7006784792497183, "learning_rate": 9.990400767938566e-06, "loss": 0.446, "step": 12490 }, { "epoch": 0.09999920000639995, "grad_norm": 0.769984387588816, "learning_rate": 9.99840012798976e-06, "loss": 0.4249, "step": 12500 }, { "epoch": 0.10007919936640507, "grad_norm": 0.7036679460296063, "learning_rate": 9.999999875228707e-06, "loss": 0.4295, "step": 12510 }, { "epoch": 0.10015919872641019, "grad_norm": 0.6351700119634297, "learning_rate": 9.999999368345333e-06, "loss": 0.4037, "step": 12520 }, { "epoch": 0.10023919808641531, "grad_norm": 0.6816329025818101, "learning_rate": 9.999998471551712e-06, "loss": 0.4345, "step": 12530 }, { "epoch": 0.10031919744642043, "grad_norm": 0.6922625444611404, "learning_rate": 9.999997184847918e-06, "loss": 0.4238, "step": 12540 }, { "epoch": 0.10039919680642555, "grad_norm": 0.723161454673616, "learning_rate": 9.999995508234048e-06, "loss": 0.4345, "step": 12550 }, { "epoch": 0.10047919616643067, "grad_norm": 0.7287591477770445, "learning_rate": 9.999993441710235e-06, "loss": 0.4384, "step": 12560 }, { "epoch": 0.10055919552643579, "grad_norm": 0.698975228187669, "learning_rate": 9.999990985276636e-06, "loss": 0.424, "step": 12570 }, { "epoch": 0.10063919488644091, "grad_norm": 0.6364019826757156, "learning_rate": 9.999988138933445e-06, "loss": 0.4261, "step": 12580 }, { "epoch": 0.10071919424644603, "grad_norm": 0.7881903404118865, "learning_rate": 9.999984902680886e-06, "loss": 0.391, "step": 12590 }, { "epoch": 0.10079919360645115, "grad_norm": 0.7947123041317522, "learning_rate": 9.999981276519209e-06, "loss": 0.4292, "step": 12600 }, { "epoch": 0.10087919296645627, "grad_norm": 0.8833522887829013, "learning_rate": 9.999977260448697e-06, "loss": 0.4268, "step": 12610 }, { "epoch": 0.10095919232646139, "grad_norm": 0.6302767720268487, "learning_rate": 9.999972854469664e-06, "loss": 0.4241, "step": 12620 }, { "epoch": 0.10103919168646651, "grad_norm": 0.756058766851497, "learning_rate": 9.999968058582452e-06, "loss": 0.3913, "step": 12630 }, { "epoch": 0.10111919104647163, "grad_norm": 0.8096715663799439, "learning_rate": 9.999962872787437e-06, "loss": 0.4389, "step": 12640 }, { "epoch": 0.10119919040647675, "grad_norm": 0.816281428209869, "learning_rate": 9.999957297085022e-06, "loss": 0.4469, "step": 12650 }, { "epoch": 0.10127918976648187, "grad_norm": 0.7773327172181268, "learning_rate": 9.999951331475644e-06, "loss": 0.4188, "step": 12660 }, { "epoch": 0.10135918912648699, "grad_norm": 0.8298186386739432, "learning_rate": 9.999944975959764e-06, "loss": 0.4378, "step": 12670 }, { "epoch": 0.10143918848649211, "grad_norm": 0.6256601445147305, "learning_rate": 9.999938230537882e-06, "loss": 0.4101, "step": 12680 }, { "epoch": 0.10151918784649723, "grad_norm": 0.6357548441036565, "learning_rate": 9.99993109521052e-06, "loss": 0.4135, "step": 12690 }, { "epoch": 0.10159918720650235, "grad_norm": 0.5867022622665143, "learning_rate": 9.999923569978238e-06, "loss": 0.3912, "step": 12700 }, { "epoch": 0.10167918656650747, "grad_norm": 0.7757109623689213, "learning_rate": 9.99991565484162e-06, "loss": 0.4333, "step": 12710 }, { "epoch": 0.10175918592651259, "grad_norm": 0.7587053995990342, "learning_rate": 9.999907349801286e-06, "loss": 0.429, "step": 12720 }, { "epoch": 0.10183918528651771, "grad_norm": 0.7184906748106201, "learning_rate": 9.999898654857882e-06, "loss": 0.4187, "step": 12730 }, { "epoch": 0.10191918464652283, "grad_norm": 0.6513634789334192, "learning_rate": 9.999889570012086e-06, "loss": 0.3777, "step": 12740 }, { "epoch": 0.10199918400652795, "grad_norm": 0.6861628819778759, "learning_rate": 9.999880095264607e-06, "loss": 0.41, "step": 12750 }, { "epoch": 0.10207918336653307, "grad_norm": 0.7703414255648229, "learning_rate": 9.999870230616184e-06, "loss": 0.4363, "step": 12760 }, { "epoch": 0.10215918272653819, "grad_norm": 0.6054715966413567, "learning_rate": 9.999859976067587e-06, "loss": 0.4313, "step": 12770 }, { "epoch": 0.10223918208654331, "grad_norm": 0.8657052631359575, "learning_rate": 9.999849331619614e-06, "loss": 0.4296, "step": 12780 }, { "epoch": 0.10231918144654843, "grad_norm": 0.7045143898768585, "learning_rate": 9.999838297273093e-06, "loss": 0.4066, "step": 12790 }, { "epoch": 0.10239918080655355, "grad_norm": 0.7373548453148813, "learning_rate": 9.99982687302889e-06, "loss": 0.4244, "step": 12800 }, { "epoch": 0.10247918016655867, "grad_norm": 0.8495721225976693, "learning_rate": 9.999815058887892e-06, "loss": 0.4454, "step": 12810 }, { "epoch": 0.10255917952656379, "grad_norm": 0.7144118843656745, "learning_rate": 9.99980285485102e-06, "loss": 0.4253, "step": 12820 }, { "epoch": 0.10263917888656891, "grad_norm": 0.7174836292288956, "learning_rate": 9.99979026091923e-06, "loss": 0.4355, "step": 12830 }, { "epoch": 0.10271917824657403, "grad_norm": 0.8513635161170304, "learning_rate": 9.999777277093497e-06, "loss": 0.4205, "step": 12840 }, { "epoch": 0.10279917760657915, "grad_norm": 0.7619023035830205, "learning_rate": 9.99976390337484e-06, "loss": 0.4431, "step": 12850 }, { "epoch": 0.10287917696658427, "grad_norm": 0.6984679533392772, "learning_rate": 9.999750139764299e-06, "loss": 0.3985, "step": 12860 }, { "epoch": 0.1029591763265894, "grad_norm": 0.669221392028639, "learning_rate": 9.999735986262949e-06, "loss": 0.4134, "step": 12870 }, { "epoch": 0.10303917568659451, "grad_norm": 0.8272841132710705, "learning_rate": 9.999721442871892e-06, "loss": 0.4107, "step": 12880 }, { "epoch": 0.10311917504659962, "grad_norm": 0.8070336838024695, "learning_rate": 9.999706509592261e-06, "loss": 0.4322, "step": 12890 }, { "epoch": 0.10319917440660474, "grad_norm": 0.6502111750838412, "learning_rate": 9.999691186425225e-06, "loss": 0.4132, "step": 12900 }, { "epoch": 0.10327917376660986, "grad_norm": 0.713289341709735, "learning_rate": 9.999675473371974e-06, "loss": 0.4009, "step": 12910 }, { "epoch": 0.10335917312661498, "grad_norm": 0.6303547689683647, "learning_rate": 9.999659370433737e-06, "loss": 0.4111, "step": 12920 }, { "epoch": 0.1034391724866201, "grad_norm": 0.7096350567529363, "learning_rate": 9.999642877611768e-06, "loss": 0.4184, "step": 12930 }, { "epoch": 0.10351917184662522, "grad_norm": 0.7149827943059521, "learning_rate": 9.999625994907351e-06, "loss": 0.4418, "step": 12940 }, { "epoch": 0.10359917120663034, "grad_norm": 0.7128945682435563, "learning_rate": 9.999608722321808e-06, "loss": 0.428, "step": 12950 }, { "epoch": 0.10367917056663546, "grad_norm": 0.691750802161614, "learning_rate": 9.999591059856481e-06, "loss": 0.4219, "step": 12960 }, { "epoch": 0.10375916992664058, "grad_norm": 0.811218889692887, "learning_rate": 9.999573007512751e-06, "loss": 0.4342, "step": 12970 }, { "epoch": 0.1038391692866457, "grad_norm": 0.7982027206656489, "learning_rate": 9.999554565292022e-06, "loss": 0.4221, "step": 12980 }, { "epoch": 0.10391916864665082, "grad_norm": 0.7555905817340733, "learning_rate": 9.999535733195735e-06, "loss": 0.4262, "step": 12990 }, { "epoch": 0.10399916800665594, "grad_norm": 0.6825006474967331, "learning_rate": 9.999516511225357e-06, "loss": 0.4118, "step": 13000 }, { "epoch": 0.10407916736666106, "grad_norm": 0.6764882304977192, "learning_rate": 9.999496899382388e-06, "loss": 0.4154, "step": 13010 }, { "epoch": 0.10415916672666618, "grad_norm": 0.6901445347265379, "learning_rate": 9.999476897668356e-06, "loss": 0.4416, "step": 13020 }, { "epoch": 0.1042391660866713, "grad_norm": 0.6469273750192113, "learning_rate": 9.999456506084823e-06, "loss": 0.4352, "step": 13030 }, { "epoch": 0.10431916544667642, "grad_norm": 0.7132170974765063, "learning_rate": 9.99943572463338e-06, "loss": 0.4243, "step": 13040 }, { "epoch": 0.10439916480668154, "grad_norm": 0.6859123030222153, "learning_rate": 9.999414553315643e-06, "loss": 0.4244, "step": 13050 }, { "epoch": 0.10447916416668666, "grad_norm": 0.6133357633110745, "learning_rate": 9.999392992133265e-06, "loss": 0.3955, "step": 13060 }, { "epoch": 0.10455916352669178, "grad_norm": 0.6899403784993949, "learning_rate": 9.99937104108793e-06, "loss": 0.4474, "step": 13070 }, { "epoch": 0.1046391628866969, "grad_norm": 0.6633844867667744, "learning_rate": 9.999348700181347e-06, "loss": 0.4212, "step": 13080 }, { "epoch": 0.10471916224670202, "grad_norm": 0.6526667513234635, "learning_rate": 9.999325969415259e-06, "loss": 0.426, "step": 13090 }, { "epoch": 0.10479916160670714, "grad_norm": 1.2137764375717273, "learning_rate": 9.999302848791437e-06, "loss": 0.4304, "step": 13100 }, { "epoch": 0.10487916096671226, "grad_norm": 0.666014396380386, "learning_rate": 9.999279338311686e-06, "loss": 0.4383, "step": 13110 }, { "epoch": 0.10495916032671739, "grad_norm": 0.754200115534547, "learning_rate": 9.99925543797784e-06, "loss": 0.456, "step": 13120 }, { "epoch": 0.1050391596867225, "grad_norm": 0.6968706171853778, "learning_rate": 9.999231147791761e-06, "loss": 0.432, "step": 13130 }, { "epoch": 0.10511915904672763, "grad_norm": 0.7548497635097798, "learning_rate": 9.999206467755345e-06, "loss": 0.4196, "step": 13140 }, { "epoch": 0.10519915840673275, "grad_norm": 0.7556310241368107, "learning_rate": 9.999181397870514e-06, "loss": 0.4169, "step": 13150 }, { "epoch": 0.10527915776673787, "grad_norm": 0.6423424392083549, "learning_rate": 9.999155938139224e-06, "loss": 0.4193, "step": 13160 }, { "epoch": 0.10535915712674299, "grad_norm": 0.699908150234164, "learning_rate": 9.999130088563462e-06, "loss": 0.4214, "step": 13170 }, { "epoch": 0.1054391564867481, "grad_norm": 0.6908244737501557, "learning_rate": 9.999103849145243e-06, "loss": 0.414, "step": 13180 }, { "epoch": 0.10551915584675323, "grad_norm": 0.7031913714026168, "learning_rate": 9.999077219886613e-06, "loss": 0.4442, "step": 13190 }, { "epoch": 0.10559915520675835, "grad_norm": 0.6100354000257456, "learning_rate": 9.999050200789648e-06, "loss": 0.4283, "step": 13200 }, { "epoch": 0.10567915456676347, "grad_norm": 0.7478309737669325, "learning_rate": 9.999022791856456e-06, "loss": 0.4166, "step": 13210 }, { "epoch": 0.10575915392676859, "grad_norm": 0.7221605555970813, "learning_rate": 9.998994993089173e-06, "loss": 0.4339, "step": 13220 }, { "epoch": 0.10583915328677371, "grad_norm": 0.7801908967312815, "learning_rate": 9.998966804489968e-06, "loss": 0.42, "step": 13230 }, { "epoch": 0.10591915264677883, "grad_norm": 0.7653810384769054, "learning_rate": 9.99893822606104e-06, "loss": 0.423, "step": 13240 }, { "epoch": 0.10599915200678395, "grad_norm": 0.7372437344111843, "learning_rate": 9.998909257804615e-06, "loss": 0.4302, "step": 13250 }, { "epoch": 0.10607915136678907, "grad_norm": 0.7895314761292663, "learning_rate": 9.998879899722953e-06, "loss": 0.4274, "step": 13260 }, { "epoch": 0.10615915072679419, "grad_norm": 0.6856857190855432, "learning_rate": 9.998850151818345e-06, "loss": 0.4326, "step": 13270 }, { "epoch": 0.10623915008679931, "grad_norm": 0.7040209338535802, "learning_rate": 9.99882001409311e-06, "loss": 0.4131, "step": 13280 }, { "epoch": 0.10631914944680443, "grad_norm": 0.76566747364696, "learning_rate": 9.998789486549599e-06, "loss": 0.45, "step": 13290 }, { "epoch": 0.10639914880680955, "grad_norm": 0.7620861952330595, "learning_rate": 9.99875856919019e-06, "loss": 0.4353, "step": 13300 }, { "epoch": 0.10647914816681467, "grad_norm": 0.6844380052807055, "learning_rate": 9.998727262017294e-06, "loss": 0.429, "step": 13310 }, { "epoch": 0.10655914752681979, "grad_norm": 0.6871473587488564, "learning_rate": 9.998695565033356e-06, "loss": 0.4337, "step": 13320 }, { "epoch": 0.10663914688682491, "grad_norm": 0.6467821160465275, "learning_rate": 9.998663478240847e-06, "loss": 0.4087, "step": 13330 }, { "epoch": 0.10671914624683003, "grad_norm": 0.7445046121403667, "learning_rate": 9.998631001642266e-06, "loss": 0.419, "step": 13340 }, { "epoch": 0.10679914560683515, "grad_norm": 0.6999045373622794, "learning_rate": 9.998598135240147e-06, "loss": 0.4393, "step": 13350 }, { "epoch": 0.10687914496684027, "grad_norm": 0.7065438852653798, "learning_rate": 9.998564879037055e-06, "loss": 0.441, "step": 13360 }, { "epoch": 0.10695914432684539, "grad_norm": 0.7289400207108464, "learning_rate": 9.998531233035581e-06, "loss": 0.4288, "step": 13370 }, { "epoch": 0.10703914368685051, "grad_norm": 0.7979155077750588, "learning_rate": 9.99849719723835e-06, "loss": 0.4023, "step": 13380 }, { "epoch": 0.10711914304685563, "grad_norm": 0.6234469970434322, "learning_rate": 9.998462771648016e-06, "loss": 0.4231, "step": 13390 }, { "epoch": 0.10719914240686075, "grad_norm": 0.6280613896045786, "learning_rate": 9.998427956267263e-06, "loss": 0.4162, "step": 13400 }, { "epoch": 0.10727914176686587, "grad_norm": 0.8074198989774983, "learning_rate": 9.998392751098806e-06, "loss": 0.4215, "step": 13410 }, { "epoch": 0.10735914112687099, "grad_norm": 0.6602759534138636, "learning_rate": 9.998357156145392e-06, "loss": 0.4188, "step": 13420 }, { "epoch": 0.10743914048687611, "grad_norm": 0.747098954958143, "learning_rate": 9.998321171409795e-06, "loss": 0.4383, "step": 13430 }, { "epoch": 0.10751913984688123, "grad_norm": 0.74912187638577, "learning_rate": 9.99828479689482e-06, "loss": 0.4184, "step": 13440 }, { "epoch": 0.10759913920688635, "grad_norm": 0.7897243997531418, "learning_rate": 9.998248032603308e-06, "loss": 0.4144, "step": 13450 }, { "epoch": 0.10767913856689146, "grad_norm": 0.7435396072745026, "learning_rate": 9.998210878538124e-06, "loss": 0.42, "step": 13460 }, { "epoch": 0.10775913792689658, "grad_norm": 0.6506815221863534, "learning_rate": 9.998173334702162e-06, "loss": 0.4184, "step": 13470 }, { "epoch": 0.1078391372869017, "grad_norm": 0.6545567537990024, "learning_rate": 9.998135401098353e-06, "loss": 0.4054, "step": 13480 }, { "epoch": 0.10791913664690682, "grad_norm": 0.6910303162659658, "learning_rate": 9.998097077729656e-06, "loss": 0.4398, "step": 13490 }, { "epoch": 0.10799913600691194, "grad_norm": 0.6493686450810902, "learning_rate": 9.998058364599054e-06, "loss": 0.411, "step": 13500 }, { "epoch": 0.10807913536691706, "grad_norm": 0.6767329882407203, "learning_rate": 9.998019261709574e-06, "loss": 0.426, "step": 13510 }, { "epoch": 0.10815913472692218, "grad_norm": 0.7824989697597401, "learning_rate": 9.99797976906426e-06, "loss": 0.4419, "step": 13520 }, { "epoch": 0.1082391340869273, "grad_norm": 0.6727211507569748, "learning_rate": 9.997939886666193e-06, "loss": 0.4377, "step": 13530 }, { "epoch": 0.10831913344693242, "grad_norm": 0.6292571427473298, "learning_rate": 9.997899614518483e-06, "loss": 0.4036, "step": 13540 }, { "epoch": 0.10839913280693754, "grad_norm": 0.7263780061677583, "learning_rate": 9.99785895262427e-06, "loss": 0.4152, "step": 13550 }, { "epoch": 0.10847913216694266, "grad_norm": 0.7012123879496253, "learning_rate": 9.997817900986726e-06, "loss": 0.4123, "step": 13560 }, { "epoch": 0.10855913152694778, "grad_norm": 0.7720491610755343, "learning_rate": 9.997776459609053e-06, "loss": 0.3893, "step": 13570 }, { "epoch": 0.1086391308869529, "grad_norm": 0.7549036482374943, "learning_rate": 9.997734628494478e-06, "loss": 0.4574, "step": 13580 }, { "epoch": 0.10871913024695802, "grad_norm": 0.8285192739063386, "learning_rate": 9.997692407646268e-06, "loss": 0.4228, "step": 13590 }, { "epoch": 0.10879912960696314, "grad_norm": 0.6781889218529991, "learning_rate": 9.997649797067717e-06, "loss": 0.3979, "step": 13600 }, { "epoch": 0.10887912896696826, "grad_norm": 0.7418890349266019, "learning_rate": 9.997606796762142e-06, "loss": 0.3951, "step": 13610 }, { "epoch": 0.10895912832697338, "grad_norm": 0.6656008660430992, "learning_rate": 9.9975634067329e-06, "loss": 0.4484, "step": 13620 }, { "epoch": 0.1090391276869785, "grad_norm": 0.7926849320520285, "learning_rate": 9.997519626983373e-06, "loss": 0.418, "step": 13630 }, { "epoch": 0.10911912704698362, "grad_norm": 0.7309435362880802, "learning_rate": 9.997475457516976e-06, "loss": 0.4399, "step": 13640 }, { "epoch": 0.10919912640698874, "grad_norm": 0.8033662452766288, "learning_rate": 9.997430898337154e-06, "loss": 0.4302, "step": 13650 }, { "epoch": 0.10927912576699386, "grad_norm": 0.599419389437075, "learning_rate": 9.99738594944738e-06, "loss": 0.4398, "step": 13660 }, { "epoch": 0.10935912512699898, "grad_norm": 0.6901691347744769, "learning_rate": 9.997340610851162e-06, "loss": 0.4391, "step": 13670 }, { "epoch": 0.1094391244870041, "grad_norm": 0.6482135408501111, "learning_rate": 9.997294882552032e-06, "loss": 0.4218, "step": 13680 }, { "epoch": 0.10951912384700922, "grad_norm": 0.7404007968037052, "learning_rate": 9.997248764553559e-06, "loss": 0.422, "step": 13690 }, { "epoch": 0.10959912320701434, "grad_norm": 0.6003811745977393, "learning_rate": 9.997202256859338e-06, "loss": 0.431, "step": 13700 }, { "epoch": 0.10967912256701946, "grad_norm": 0.6784774862306612, "learning_rate": 9.997155359472995e-06, "loss": 0.4161, "step": 13710 }, { "epoch": 0.10975912192702458, "grad_norm": 0.7230278975122312, "learning_rate": 9.99710807239819e-06, "loss": 0.4281, "step": 13720 }, { "epoch": 0.1098391212870297, "grad_norm": 0.7005100694034989, "learning_rate": 9.997060395638607e-06, "loss": 0.4395, "step": 13730 }, { "epoch": 0.10991912064703482, "grad_norm": 0.8204439642759143, "learning_rate": 9.997012329197966e-06, "loss": 0.434, "step": 13740 }, { "epoch": 0.10999912000703994, "grad_norm": 2.4541180505157554, "learning_rate": 9.996963873080015e-06, "loss": 0.4245, "step": 13750 }, { "epoch": 0.11007911936704506, "grad_norm": 0.6600760757055205, "learning_rate": 9.996915027288532e-06, "loss": 0.4081, "step": 13760 }, { "epoch": 0.11015911872705018, "grad_norm": 0.7640706928695541, "learning_rate": 9.996865791827328e-06, "loss": 0.4241, "step": 13770 }, { "epoch": 0.1102391180870553, "grad_norm": 0.6726442335462584, "learning_rate": 9.99681616670024e-06, "loss": 0.4087, "step": 13780 }, { "epoch": 0.11031911744706042, "grad_norm": 0.6550275140598685, "learning_rate": 9.99676615191114e-06, "loss": 0.4454, "step": 13790 }, { "epoch": 0.11039911680706554, "grad_norm": 0.6108194056396166, "learning_rate": 9.996715747463928e-06, "loss": 0.408, "step": 13800 }, { "epoch": 0.11047911616707067, "grad_norm": 0.6588443687966232, "learning_rate": 9.996664953362533e-06, "loss": 0.3989, "step": 13810 }, { "epoch": 0.11055911552707579, "grad_norm": 0.7421137405146544, "learning_rate": 9.996613769610916e-06, "loss": 0.4171, "step": 13820 }, { "epoch": 0.1106391148870809, "grad_norm": 0.7108672059841729, "learning_rate": 9.996562196213071e-06, "loss": 0.4451, "step": 13830 }, { "epoch": 0.11071911424708603, "grad_norm": 0.6661569871416377, "learning_rate": 9.996510233173018e-06, "loss": 0.422, "step": 13840 }, { "epoch": 0.11079911360709115, "grad_norm": 0.6920780459478041, "learning_rate": 9.996457880494808e-06, "loss": 0.4401, "step": 13850 }, { "epoch": 0.11087911296709627, "grad_norm": 0.7278789840427938, "learning_rate": 9.996405138182527e-06, "loss": 0.4009, "step": 13860 }, { "epoch": 0.11095911232710139, "grad_norm": 0.6586156373762244, "learning_rate": 9.996352006240285e-06, "loss": 0.4322, "step": 13870 }, { "epoch": 0.1110391116871065, "grad_norm": 0.5926926699245251, "learning_rate": 9.996298484672226e-06, "loss": 0.4405, "step": 13880 }, { "epoch": 0.11111911104711163, "grad_norm": 0.6816009246459803, "learning_rate": 9.996244573482525e-06, "loss": 0.4388, "step": 13890 }, { "epoch": 0.11119911040711675, "grad_norm": 0.7027787835665336, "learning_rate": 9.996190272675383e-06, "loss": 0.4288, "step": 13900 }, { "epoch": 0.11127910976712187, "grad_norm": 0.7378587330830044, "learning_rate": 9.996135582255039e-06, "loss": 0.4502, "step": 13910 }, { "epoch": 0.11135910912712699, "grad_norm": 0.7653503976640814, "learning_rate": 9.996080502225753e-06, "loss": 0.4541, "step": 13920 }, { "epoch": 0.11143910848713211, "grad_norm": 0.9384234587755037, "learning_rate": 9.996025032591824e-06, "loss": 0.4502, "step": 13930 }, { "epoch": 0.11151910784713723, "grad_norm": 0.7901628593642558, "learning_rate": 9.995969173357579e-06, "loss": 0.4356, "step": 13940 }, { "epoch": 0.11159910720714235, "grad_norm": 0.7242045226537073, "learning_rate": 9.995912924527368e-06, "loss": 0.4272, "step": 13950 }, { "epoch": 0.11167910656714747, "grad_norm": 0.7873535840385736, "learning_rate": 9.995856286105581e-06, "loss": 0.4503, "step": 13960 }, { "epoch": 0.11175910592715259, "grad_norm": 0.7526657124651991, "learning_rate": 9.995799258096635e-06, "loss": 0.4103, "step": 13970 }, { "epoch": 0.11183910528715771, "grad_norm": 0.6863004762825042, "learning_rate": 9.995741840504978e-06, "loss": 0.4355, "step": 13980 }, { "epoch": 0.11191910464716283, "grad_norm": 0.6126957606848471, "learning_rate": 9.995684033335084e-06, "loss": 0.4157, "step": 13990 }, { "epoch": 0.11199910400716795, "grad_norm": 0.713475223808405, "learning_rate": 9.995625836591465e-06, "loss": 0.4197, "step": 14000 }, { "epoch": 0.11207910336717307, "grad_norm": 0.7595316958561168, "learning_rate": 9.995567250278657e-06, "loss": 0.4285, "step": 14010 }, { "epoch": 0.11215910272717819, "grad_norm": 0.835426360840669, "learning_rate": 9.995508274401229e-06, "loss": 0.4395, "step": 14020 }, { "epoch": 0.1122391020871833, "grad_norm": 0.7547739353783572, "learning_rate": 9.99544890896378e-06, "loss": 0.4037, "step": 14030 }, { "epoch": 0.11231910144718842, "grad_norm": 0.8038340835465744, "learning_rate": 9.99538915397094e-06, "loss": 0.4283, "step": 14040 }, { "epoch": 0.11239910080719354, "grad_norm": 0.6681474862967685, "learning_rate": 9.995329009427368e-06, "loss": 0.4241, "step": 14050 }, { "epoch": 0.11247910016719866, "grad_norm": 0.7610244089119994, "learning_rate": 9.995268475337754e-06, "loss": 0.445, "step": 14060 }, { "epoch": 0.11255909952720378, "grad_norm": 0.6569576694887086, "learning_rate": 9.99520755170682e-06, "loss": 0.4158, "step": 14070 }, { "epoch": 0.1126390988872089, "grad_norm": 0.666303149149089, "learning_rate": 9.995146238539317e-06, "loss": 0.4089, "step": 14080 }, { "epoch": 0.11271909824721402, "grad_norm": 0.7063184706470155, "learning_rate": 9.995084535840025e-06, "loss": 0.4264, "step": 14090 }, { "epoch": 0.11279909760721914, "grad_norm": 0.6836149165172635, "learning_rate": 9.995022443613757e-06, "loss": 0.4278, "step": 14100 }, { "epoch": 0.11287909696722426, "grad_norm": 0.7438352441537195, "learning_rate": 9.994959961865354e-06, "loss": 0.4265, "step": 14110 }, { "epoch": 0.11295909632722938, "grad_norm": 0.735980480294309, "learning_rate": 9.994897090599688e-06, "loss": 0.4359, "step": 14120 }, { "epoch": 0.1130390956872345, "grad_norm": 0.7055312215229406, "learning_rate": 9.994833829821661e-06, "loss": 0.438, "step": 14130 }, { "epoch": 0.11311909504723962, "grad_norm": 0.6805835889052743, "learning_rate": 9.99477017953621e-06, "loss": 0.4246, "step": 14140 }, { "epoch": 0.11319909440724474, "grad_norm": 0.6519631605914298, "learning_rate": 9.994706139748296e-06, "loss": 0.4271, "step": 14150 }, { "epoch": 0.11327909376724986, "grad_norm": 0.6689991115757256, "learning_rate": 9.994641710462913e-06, "loss": 0.4428, "step": 14160 }, { "epoch": 0.11335909312725498, "grad_norm": 0.6590404521992209, "learning_rate": 9.994576891685085e-06, "loss": 0.4367, "step": 14170 }, { "epoch": 0.1134390924872601, "grad_norm": 0.7009685758992132, "learning_rate": 9.994511683419869e-06, "loss": 0.4412, "step": 14180 }, { "epoch": 0.11351909184726522, "grad_norm": 0.8292667378687076, "learning_rate": 9.994446085672349e-06, "loss": 0.4004, "step": 14190 }, { "epoch": 0.11359909120727034, "grad_norm": 0.6351899924826423, "learning_rate": 9.994380098447638e-06, "loss": 0.4374, "step": 14200 }, { "epoch": 0.11367909056727546, "grad_norm": 0.7334932992260778, "learning_rate": 9.994313721750885e-06, "loss": 0.4391, "step": 14210 }, { "epoch": 0.11375908992728058, "grad_norm": 0.7053502647338515, "learning_rate": 9.994246955587264e-06, "loss": 0.4203, "step": 14220 }, { "epoch": 0.1138390892872857, "grad_norm": 0.6483237020949655, "learning_rate": 9.994179799961982e-06, "loss": 0.417, "step": 14230 }, { "epoch": 0.11391908864729082, "grad_norm": 0.7107548505469662, "learning_rate": 9.994112254880277e-06, "loss": 0.4287, "step": 14240 }, { "epoch": 0.11399908800729594, "grad_norm": 0.7059340155402544, "learning_rate": 9.994044320347415e-06, "loss": 0.4531, "step": 14250 }, { "epoch": 0.11407908736730106, "grad_norm": 0.7243355960943725, "learning_rate": 9.993975996368697e-06, "loss": 0.4286, "step": 14260 }, { "epoch": 0.11415908672730618, "grad_norm": 0.6951367330862728, "learning_rate": 9.993907282949447e-06, "loss": 0.4361, "step": 14270 }, { "epoch": 0.1142390860873113, "grad_norm": 0.7578093970025269, "learning_rate": 9.993838180095023e-06, "loss": 0.4263, "step": 14280 }, { "epoch": 0.11431908544731642, "grad_norm": 0.7830730519522279, "learning_rate": 9.993768687810818e-06, "loss": 0.4184, "step": 14290 }, { "epoch": 0.11439908480732154, "grad_norm": 0.8180960424776961, "learning_rate": 9.993698806102249e-06, "loss": 0.4301, "step": 14300 }, { "epoch": 0.11447908416732666, "grad_norm": 0.7236884509690854, "learning_rate": 9.993628534974761e-06, "loss": 0.4423, "step": 14310 }, { "epoch": 0.11455908352733178, "grad_norm": 0.7598443174577645, "learning_rate": 9.993557874433842e-06, "loss": 0.4165, "step": 14320 }, { "epoch": 0.1146390828873369, "grad_norm": 0.5761024668937649, "learning_rate": 9.993486824484998e-06, "loss": 0.4249, "step": 14330 }, { "epoch": 0.11471908224734202, "grad_norm": 0.6548137841386595, "learning_rate": 9.99341538513377e-06, "loss": 0.4071, "step": 14340 }, { "epoch": 0.11479908160734714, "grad_norm": 0.7257640524131996, "learning_rate": 9.993343556385728e-06, "loss": 0.4262, "step": 14350 }, { "epoch": 0.11487908096735226, "grad_norm": 0.809928702424015, "learning_rate": 9.993271338246475e-06, "loss": 0.4211, "step": 14360 }, { "epoch": 0.11495908032735738, "grad_norm": 0.731037670146136, "learning_rate": 9.993198730721643e-06, "loss": 0.4294, "step": 14370 }, { "epoch": 0.1150390796873625, "grad_norm": 0.854465746546888, "learning_rate": 9.993125733816891e-06, "loss": 0.4396, "step": 14380 }, { "epoch": 0.11511907904736762, "grad_norm": 0.588859103645739, "learning_rate": 9.993052347537916e-06, "loss": 0.4231, "step": 14390 }, { "epoch": 0.11519907840737274, "grad_norm": 0.6915958875781678, "learning_rate": 9.992978571890438e-06, "loss": 0.4227, "step": 14400 }, { "epoch": 0.11527907776737786, "grad_norm": 0.6838951608376179, "learning_rate": 9.99290440688021e-06, "loss": 0.4318, "step": 14410 }, { "epoch": 0.11535907712738298, "grad_norm": 0.7680148002212801, "learning_rate": 9.992829852513015e-06, "loss": 0.4285, "step": 14420 }, { "epoch": 0.1154390764873881, "grad_norm": 0.6958127641282632, "learning_rate": 9.992754908794669e-06, "loss": 0.4236, "step": 14430 }, { "epoch": 0.11551907584739322, "grad_norm": 0.8557641469570435, "learning_rate": 9.992679575731018e-06, "loss": 0.4063, "step": 14440 }, { "epoch": 0.11559907520739834, "grad_norm": 0.7687180005128482, "learning_rate": 9.99260385332793e-06, "loss": 0.4392, "step": 14450 }, { "epoch": 0.11567907456740346, "grad_norm": 0.8129302768210965, "learning_rate": 9.992527741591316e-06, "loss": 0.4319, "step": 14460 }, { "epoch": 0.11575907392740858, "grad_norm": 0.7301401343626619, "learning_rate": 9.992451240527108e-06, "loss": 0.4324, "step": 14470 }, { "epoch": 0.1158390732874137, "grad_norm": 0.6505577461364395, "learning_rate": 9.992374350141274e-06, "loss": 0.4227, "step": 14480 }, { "epoch": 0.11591907264741882, "grad_norm": 0.6509443836843094, "learning_rate": 9.99229707043981e-06, "loss": 0.4175, "step": 14490 }, { "epoch": 0.11599907200742395, "grad_norm": 0.6701312951483592, "learning_rate": 9.99221940142874e-06, "loss": 0.4177, "step": 14500 }, { "epoch": 0.11607907136742907, "grad_norm": 0.8456315060089477, "learning_rate": 9.992141343114122e-06, "loss": 0.4656, "step": 14510 }, { "epoch": 0.11615907072743419, "grad_norm": 0.6557994166407984, "learning_rate": 9.992062895502044e-06, "loss": 0.4167, "step": 14520 }, { "epoch": 0.1162390700874393, "grad_norm": 0.7583998319873114, "learning_rate": 9.991984058598623e-06, "loss": 0.4337, "step": 14530 }, { "epoch": 0.11631906944744443, "grad_norm": 0.6432284072945021, "learning_rate": 9.991904832410009e-06, "loss": 0.4373, "step": 14540 }, { "epoch": 0.11639906880744955, "grad_norm": 0.8143472823565598, "learning_rate": 9.991825216942375e-06, "loss": 0.4165, "step": 14550 }, { "epoch": 0.11647906816745467, "grad_norm": 0.7534721877602231, "learning_rate": 9.991745212201932e-06, "loss": 0.4504, "step": 14560 }, { "epoch": 0.11655906752745979, "grad_norm": 0.6960491140446398, "learning_rate": 9.991664818194922e-06, "loss": 0.4171, "step": 14570 }, { "epoch": 0.1166390668874649, "grad_norm": 0.8107229611138136, "learning_rate": 9.991584034927613e-06, "loss": 0.4274, "step": 14580 }, { "epoch": 0.11671906624747003, "grad_norm": 0.7397956874473819, "learning_rate": 9.9915028624063e-06, "loss": 0.4097, "step": 14590 }, { "epoch": 0.11679906560747515, "grad_norm": 0.6590713680150964, "learning_rate": 9.991421300637319e-06, "loss": 0.4121, "step": 14600 }, { "epoch": 0.11687906496748025, "grad_norm": 0.7041051634123378, "learning_rate": 9.991339349627027e-06, "loss": 0.4102, "step": 14610 }, { "epoch": 0.11695906432748537, "grad_norm": 0.8561085986833785, "learning_rate": 9.991257009381814e-06, "loss": 0.4264, "step": 14620 }, { "epoch": 0.1170390636874905, "grad_norm": 0.6886120654715369, "learning_rate": 9.991174279908105e-06, "loss": 0.4452, "step": 14630 }, { "epoch": 0.11711906304749561, "grad_norm": 0.7396942580337291, "learning_rate": 9.991091161212349e-06, "loss": 0.4201, "step": 14640 }, { "epoch": 0.11719906240750073, "grad_norm": 0.6323272480289289, "learning_rate": 9.991007653301026e-06, "loss": 0.4271, "step": 14650 }, { "epoch": 0.11727906176750585, "grad_norm": 0.7393558439166057, "learning_rate": 9.99092375618065e-06, "loss": 0.4252, "step": 14660 }, { "epoch": 0.11735906112751097, "grad_norm": 0.6620763655858679, "learning_rate": 9.990839469857764e-06, "loss": 0.4303, "step": 14670 }, { "epoch": 0.1174390604875161, "grad_norm": 0.680908855057083, "learning_rate": 9.990754794338941e-06, "loss": 0.4105, "step": 14680 }, { "epoch": 0.11751905984752122, "grad_norm": 0.7317958838030338, "learning_rate": 9.990669729630783e-06, "loss": 0.4063, "step": 14690 }, { "epoch": 0.11759905920752634, "grad_norm": 0.8936995718775301, "learning_rate": 9.990584275739924e-06, "loss": 0.423, "step": 14700 }, { "epoch": 0.11767905856753146, "grad_norm": 0.7030863885649662, "learning_rate": 9.990498432673028e-06, "loss": 0.4203, "step": 14710 }, { "epoch": 0.11775905792753658, "grad_norm": 0.6734890347217362, "learning_rate": 9.990412200436787e-06, "loss": 0.4269, "step": 14720 }, { "epoch": 0.1178390572875417, "grad_norm": 0.7278831939749703, "learning_rate": 9.99032557903793e-06, "loss": 0.4398, "step": 14730 }, { "epoch": 0.11791905664754682, "grad_norm": 0.617230022090087, "learning_rate": 9.990238568483208e-06, "loss": 0.4115, "step": 14740 }, { "epoch": 0.11799905600755194, "grad_norm": 0.605530322308733, "learning_rate": 9.99015116877941e-06, "loss": 0.4113, "step": 14750 }, { "epoch": 0.11807905536755706, "grad_norm": 0.7609912959650694, "learning_rate": 9.990063379933345e-06, "loss": 0.4325, "step": 14760 }, { "epoch": 0.11815905472756218, "grad_norm": 0.7484942054023486, "learning_rate": 9.989975201951866e-06, "loss": 0.4439, "step": 14770 }, { "epoch": 0.1182390540875673, "grad_norm": 0.6507569404432064, "learning_rate": 9.989886634841849e-06, "loss": 0.4058, "step": 14780 }, { "epoch": 0.11831905344757242, "grad_norm": 0.6344390219442255, "learning_rate": 9.989797678610194e-06, "loss": 0.41, "step": 14790 }, { "epoch": 0.11839905280757754, "grad_norm": 0.6926100304340502, "learning_rate": 9.989708333263845e-06, "loss": 0.4349, "step": 14800 }, { "epoch": 0.11847905216758266, "grad_norm": 0.6619863849170571, "learning_rate": 9.989618598809765e-06, "loss": 0.4026, "step": 14810 }, { "epoch": 0.11855905152758778, "grad_norm": 0.6380245801441544, "learning_rate": 9.989528475254953e-06, "loss": 0.4317, "step": 14820 }, { "epoch": 0.1186390508875929, "grad_norm": 0.7346059238291454, "learning_rate": 9.989437962606438e-06, "loss": 0.4359, "step": 14830 }, { "epoch": 0.11871905024759802, "grad_norm": 0.8759096061909492, "learning_rate": 9.989347060871279e-06, "loss": 0.4347, "step": 14840 }, { "epoch": 0.11879904960760314, "grad_norm": 0.6357001507400389, "learning_rate": 9.989255770056561e-06, "loss": 0.4118, "step": 14850 }, { "epoch": 0.11887904896760826, "grad_norm": 0.7520326159708226, "learning_rate": 9.989164090169406e-06, "loss": 0.4202, "step": 14860 }, { "epoch": 0.11895904832761338, "grad_norm": 0.7114703942330928, "learning_rate": 9.989072021216963e-06, "loss": 0.4283, "step": 14870 }, { "epoch": 0.1190390476876185, "grad_norm": 1.6847769403359465, "learning_rate": 9.988979563206413e-06, "loss": 0.4076, "step": 14880 }, { "epoch": 0.11911904704762362, "grad_norm": 0.6432715778706811, "learning_rate": 9.988886716144963e-06, "loss": 0.4288, "step": 14890 }, { "epoch": 0.11919904640762874, "grad_norm": 0.7904898855295737, "learning_rate": 9.988793480039855e-06, "loss": 0.4387, "step": 14900 }, { "epoch": 0.11927904576763386, "grad_norm": 0.69691557822379, "learning_rate": 9.98869985489836e-06, "loss": 0.4149, "step": 14910 }, { "epoch": 0.11935904512763898, "grad_norm": 0.6330243824425752, "learning_rate": 9.988605840727778e-06, "loss": 0.4395, "step": 14920 }, { "epoch": 0.1194390444876441, "grad_norm": 0.6208366937543087, "learning_rate": 9.988511437535443e-06, "loss": 0.3972, "step": 14930 }, { "epoch": 0.11951904384764922, "grad_norm": 0.7206309968046777, "learning_rate": 9.988416645328713e-06, "loss": 0.4107, "step": 14940 }, { "epoch": 0.11959904320765434, "grad_norm": 0.7217281755502427, "learning_rate": 9.988321464114983e-06, "loss": 0.4202, "step": 14950 }, { "epoch": 0.11967904256765946, "grad_norm": 0.7589331591230956, "learning_rate": 9.988225893901674e-06, "loss": 0.4388, "step": 14960 }, { "epoch": 0.11975904192766458, "grad_norm": 0.6615942551634113, "learning_rate": 9.98812993469624e-06, "loss": 0.4377, "step": 14970 }, { "epoch": 0.1198390412876697, "grad_norm": 0.6441419403554683, "learning_rate": 9.988033586506164e-06, "loss": 0.4199, "step": 14980 }, { "epoch": 0.11991904064767482, "grad_norm": 0.6941075123084262, "learning_rate": 9.987936849338957e-06, "loss": 0.4079, "step": 14990 }, { "epoch": 0.11999904000767994, "grad_norm": 0.6133403879727068, "learning_rate": 9.987839723202164e-06, "loss": 0.4416, "step": 15000 }, { "epoch": 0.12007903936768506, "grad_norm": 0.7903052610823281, "learning_rate": 9.987742208103362e-06, "loss": 0.4097, "step": 15010 }, { "epoch": 0.12015903872769018, "grad_norm": 0.7601806380082867, "learning_rate": 9.987644304050152e-06, "loss": 0.4132, "step": 15020 }, { "epoch": 0.1202390380876953, "grad_norm": 0.6836087843269174, "learning_rate": 9.98754601105017e-06, "loss": 0.431, "step": 15030 }, { "epoch": 0.12031903744770042, "grad_norm": 0.6052512814353624, "learning_rate": 9.98744732911108e-06, "loss": 0.4286, "step": 15040 }, { "epoch": 0.12039903680770554, "grad_norm": 0.675950771581173, "learning_rate": 9.987348258240579e-06, "loss": 0.4414, "step": 15050 }, { "epoch": 0.12047903616771066, "grad_norm": 0.659360788786191, "learning_rate": 9.987248798446391e-06, "loss": 0.4197, "step": 15060 }, { "epoch": 0.12055903552771578, "grad_norm": 0.6319385313113254, "learning_rate": 9.987148949736276e-06, "loss": 0.4358, "step": 15070 }, { "epoch": 0.1206390348877209, "grad_norm": 0.6628407312118545, "learning_rate": 9.987048712118014e-06, "loss": 0.3892, "step": 15080 }, { "epoch": 0.12071903424772602, "grad_norm": 0.7332393908994967, "learning_rate": 9.986948085599429e-06, "loss": 0.4212, "step": 15090 }, { "epoch": 0.12079903360773114, "grad_norm": 0.6751550158005795, "learning_rate": 9.986847070188361e-06, "loss": 0.3908, "step": 15100 }, { "epoch": 0.12087903296773626, "grad_norm": 0.6585656740517051, "learning_rate": 9.986745665892692e-06, "loss": 0.4234, "step": 15110 }, { "epoch": 0.12095903232774138, "grad_norm": 0.614883022688145, "learning_rate": 9.986643872720329e-06, "loss": 0.4094, "step": 15120 }, { "epoch": 0.1210390316877465, "grad_norm": 0.639083430935872, "learning_rate": 9.98654169067921e-06, "loss": 0.4065, "step": 15130 }, { "epoch": 0.12111903104775162, "grad_norm": 0.6862331365508938, "learning_rate": 9.986439119777301e-06, "loss": 0.4356, "step": 15140 }, { "epoch": 0.12119903040775674, "grad_norm": 0.794537332108475, "learning_rate": 9.986336160022604e-06, "loss": 0.4193, "step": 15150 }, { "epoch": 0.12127902976776186, "grad_norm": 0.9216123275635871, "learning_rate": 9.986232811423147e-06, "loss": 0.4192, "step": 15160 }, { "epoch": 0.12135902912776698, "grad_norm": 0.7451922484421855, "learning_rate": 9.986129073986988e-06, "loss": 0.4032, "step": 15170 }, { "epoch": 0.12143902848777209, "grad_norm": 0.8199947240557665, "learning_rate": 9.986024947722216e-06, "loss": 0.4155, "step": 15180 }, { "epoch": 0.12151902784777721, "grad_norm": 0.6880035778742486, "learning_rate": 9.985920432636955e-06, "loss": 0.4167, "step": 15190 }, { "epoch": 0.12159902720778233, "grad_norm": 0.7125995714219299, "learning_rate": 9.985815528739353e-06, "loss": 0.4278, "step": 15200 }, { "epoch": 0.12167902656778745, "grad_norm": 0.7358379594074334, "learning_rate": 9.98571023603759e-06, "loss": 0.4154, "step": 15210 }, { "epoch": 0.12175902592779257, "grad_norm": 0.7818218532095671, "learning_rate": 9.985604554539876e-06, "loss": 0.4267, "step": 15220 }, { "epoch": 0.12183902528779769, "grad_norm": 0.6702743920601784, "learning_rate": 9.985498484254454e-06, "loss": 0.4354, "step": 15230 }, { "epoch": 0.12191902464780281, "grad_norm": 0.7189566889385206, "learning_rate": 9.985392025189597e-06, "loss": 0.4236, "step": 15240 }, { "epoch": 0.12199902400780793, "grad_norm": 0.7328541799904194, "learning_rate": 9.985285177353605e-06, "loss": 0.41, "step": 15250 }, { "epoch": 0.12207902336781305, "grad_norm": 0.7471221439409887, "learning_rate": 9.98517794075481e-06, "loss": 0.4229, "step": 15260 }, { "epoch": 0.12215902272781817, "grad_norm": 0.7122453557991535, "learning_rate": 9.985070315401573e-06, "loss": 0.4023, "step": 15270 }, { "epoch": 0.12223902208782329, "grad_norm": 0.7170515218411437, "learning_rate": 9.98496230130229e-06, "loss": 0.4202, "step": 15280 }, { "epoch": 0.12231902144782841, "grad_norm": 0.8165288439123618, "learning_rate": 9.984853898465384e-06, "loss": 0.4409, "step": 15290 }, { "epoch": 0.12239902080783353, "grad_norm": 0.8086138438705647, "learning_rate": 9.984745106899304e-06, "loss": 0.4174, "step": 15300 }, { "epoch": 0.12247902016783865, "grad_norm": 0.6769408741170373, "learning_rate": 9.984635926612541e-06, "loss": 0.4469, "step": 15310 }, { "epoch": 0.12255901952784377, "grad_norm": 0.7289428607934879, "learning_rate": 9.984526357613604e-06, "loss": 0.4122, "step": 15320 }, { "epoch": 0.1226390188878489, "grad_norm": 0.6912743050747314, "learning_rate": 9.984416399911039e-06, "loss": 0.4348, "step": 15330 }, { "epoch": 0.12271901824785401, "grad_norm": 0.7366886093413917, "learning_rate": 9.984306053513419e-06, "loss": 0.4349, "step": 15340 }, { "epoch": 0.12279901760785913, "grad_norm": 0.7396704170304423, "learning_rate": 9.984195318429353e-06, "loss": 0.4185, "step": 15350 }, { "epoch": 0.12287901696786425, "grad_norm": 0.7429863020694563, "learning_rate": 9.984084194667471e-06, "loss": 0.443, "step": 15360 }, { "epoch": 0.12295901632786937, "grad_norm": 0.7258320859981011, "learning_rate": 9.983972682236443e-06, "loss": 0.4142, "step": 15370 }, { "epoch": 0.1230390156878745, "grad_norm": 0.7293540810287862, "learning_rate": 9.983860781144963e-06, "loss": 0.447, "step": 15380 }, { "epoch": 0.12311901504787962, "grad_norm": 0.7407123830861086, "learning_rate": 9.983748491401758e-06, "loss": 0.426, "step": 15390 }, { "epoch": 0.12319901440788474, "grad_norm": 0.711014233278983, "learning_rate": 9.983635813015585e-06, "loss": 0.4574, "step": 15400 }, { "epoch": 0.12327901376788986, "grad_norm": 0.6019508285908702, "learning_rate": 9.98352274599523e-06, "loss": 0.4376, "step": 15410 }, { "epoch": 0.12335901312789498, "grad_norm": 0.7112534440108637, "learning_rate": 9.983409290349509e-06, "loss": 0.4198, "step": 15420 }, { "epoch": 0.1234390124879001, "grad_norm": 0.6202924967396243, "learning_rate": 9.983295446087272e-06, "loss": 0.4144, "step": 15430 }, { "epoch": 0.12351901184790522, "grad_norm": 0.670500160638767, "learning_rate": 9.983181213217396e-06, "loss": 0.4184, "step": 15440 }, { "epoch": 0.12359901120791034, "grad_norm": 0.6470403480213167, "learning_rate": 9.983066591748789e-06, "loss": 0.4129, "step": 15450 }, { "epoch": 0.12367901056791546, "grad_norm": 0.580402957339901, "learning_rate": 9.982951581690388e-06, "loss": 0.399, "step": 15460 }, { "epoch": 0.12375900992792058, "grad_norm": 0.6681931671404, "learning_rate": 9.982836183051163e-06, "loss": 0.4196, "step": 15470 }, { "epoch": 0.1238390092879257, "grad_norm": 0.6482929248429685, "learning_rate": 9.982720395840114e-06, "loss": 0.4491, "step": 15480 }, { "epoch": 0.12391900864793082, "grad_norm": 0.6851093681295868, "learning_rate": 9.982604220066269e-06, "loss": 0.4546, "step": 15490 }, { "epoch": 0.12399900800793594, "grad_norm": 0.7542976443051863, "learning_rate": 9.982487655738688e-06, "loss": 0.4228, "step": 15500 }, { "epoch": 0.12407900736794106, "grad_norm": 0.6306764986149577, "learning_rate": 9.98237070286646e-06, "loss": 0.419, "step": 15510 }, { "epoch": 0.12415900672794618, "grad_norm": 0.6940681892117998, "learning_rate": 9.982253361458707e-06, "loss": 0.4088, "step": 15520 }, { "epoch": 0.1242390060879513, "grad_norm": 0.6609706403615433, "learning_rate": 9.982135631524578e-06, "loss": 0.4499, "step": 15530 }, { "epoch": 0.12431900544795642, "grad_norm": 0.6079520903469678, "learning_rate": 9.982017513073255e-06, "loss": 0.4162, "step": 15540 }, { "epoch": 0.12439900480796154, "grad_norm": 0.6666069803104857, "learning_rate": 9.981899006113949e-06, "loss": 0.4428, "step": 15550 }, { "epoch": 0.12447900416796666, "grad_norm": 0.7390171819839055, "learning_rate": 9.981780110655902e-06, "loss": 0.42, "step": 15560 }, { "epoch": 0.12455900352797178, "grad_norm": 0.6795431272937315, "learning_rate": 9.981660826708382e-06, "loss": 0.4336, "step": 15570 }, { "epoch": 0.1246390028879769, "grad_norm": 0.6198404250920456, "learning_rate": 9.981541154280693e-06, "loss": 0.4347, "step": 15580 }, { "epoch": 0.12471900224798202, "grad_norm": 0.7545592687089479, "learning_rate": 9.98142109338217e-06, "loss": 0.4346, "step": 15590 }, { "epoch": 0.12479900160798714, "grad_norm": 0.7157484691627801, "learning_rate": 9.981300644022174e-06, "loss": 0.4109, "step": 15600 }, { "epoch": 0.12487900096799226, "grad_norm": 0.6488308301334241, "learning_rate": 9.981179806210097e-06, "loss": 0.4272, "step": 15610 }, { "epoch": 0.12495900032799738, "grad_norm": 0.6217552659114982, "learning_rate": 9.981058579955363e-06, "loss": 0.4018, "step": 15620 }, { "epoch": 0.1250389996880025, "grad_norm": 0.7362393464694206, "learning_rate": 9.980936965267425e-06, "loss": 0.4212, "step": 15630 }, { "epoch": 0.1251189990480076, "grad_norm": 0.7718636743397164, "learning_rate": 9.980814962155766e-06, "loss": 0.4269, "step": 15640 }, { "epoch": 0.12519899840801274, "grad_norm": 0.7242609954326427, "learning_rate": 9.980692570629902e-06, "loss": 0.4089, "step": 15650 }, { "epoch": 0.12527899776801785, "grad_norm": 0.8573761156822443, "learning_rate": 9.980569790699375e-06, "loss": 0.4232, "step": 15660 }, { "epoch": 0.12535899712802298, "grad_norm": 0.6504101243526491, "learning_rate": 9.980446622373763e-06, "loss": 0.4005, "step": 15670 }, { "epoch": 0.1254389964880281, "grad_norm": 0.6963786233700641, "learning_rate": 9.980323065662668e-06, "loss": 0.4172, "step": 15680 }, { "epoch": 0.12551899584803322, "grad_norm": 0.6589240668306598, "learning_rate": 9.980199120575725e-06, "loss": 0.4173, "step": 15690 }, { "epoch": 0.12559899520803833, "grad_norm": 0.6864081029670148, "learning_rate": 9.9800747871226e-06, "loss": 0.4483, "step": 15700 }, { "epoch": 0.12567899456804346, "grad_norm": 0.7399547660376369, "learning_rate": 9.97995006531299e-06, "loss": 0.4265, "step": 15710 }, { "epoch": 0.12575899392804857, "grad_norm": 0.6943286734779276, "learning_rate": 9.979824955156623e-06, "loss": 0.3959, "step": 15720 }, { "epoch": 0.1258389932880537, "grad_norm": 0.6512677267141797, "learning_rate": 9.97969945666325e-06, "loss": 0.4236, "step": 15730 }, { "epoch": 0.1259189926480588, "grad_norm": 0.7575841625622318, "learning_rate": 9.979573569842662e-06, "loss": 0.4168, "step": 15740 }, { "epoch": 0.12599899200806394, "grad_norm": 0.6225444879488412, "learning_rate": 9.979447294704672e-06, "loss": 0.4231, "step": 15750 }, { "epoch": 0.12607899136806905, "grad_norm": 0.6830294839478451, "learning_rate": 9.979320631259131e-06, "loss": 0.416, "step": 15760 }, { "epoch": 0.12615899072807418, "grad_norm": 0.741447388123293, "learning_rate": 9.979193579515916e-06, "loss": 0.417, "step": 15770 }, { "epoch": 0.1262389900880793, "grad_norm": 0.6433284986689924, "learning_rate": 9.979066139484931e-06, "loss": 0.3957, "step": 15780 }, { "epoch": 0.12631898944808442, "grad_norm": 0.6726569244986157, "learning_rate": 9.978938311176118e-06, "loss": 0.4211, "step": 15790 }, { "epoch": 0.12639898880808953, "grad_norm": 0.7728688115423255, "learning_rate": 9.978810094599444e-06, "loss": 0.419, "step": 15800 }, { "epoch": 0.12647898816809466, "grad_norm": 0.7871680140218066, "learning_rate": 9.97868148976491e-06, "loss": 0.4319, "step": 15810 }, { "epoch": 0.12655898752809977, "grad_norm": 0.7489695879600826, "learning_rate": 9.97855249668254e-06, "loss": 0.4436, "step": 15820 }, { "epoch": 0.1266389868881049, "grad_norm": 0.7003021505882135, "learning_rate": 9.978423115362396e-06, "loss": 0.4016, "step": 15830 }, { "epoch": 0.12671898624811, "grad_norm": 0.6558644563504775, "learning_rate": 9.978293345814567e-06, "loss": 0.4323, "step": 15840 }, { "epoch": 0.12679898560811514, "grad_norm": 0.6712745730917415, "learning_rate": 9.978163188049172e-06, "loss": 0.4309, "step": 15850 }, { "epoch": 0.12687898496812025, "grad_norm": 0.6118941325399052, "learning_rate": 9.978032642076364e-06, "loss": 0.4212, "step": 15860 }, { "epoch": 0.12695898432812538, "grad_norm": 0.6470332550595835, "learning_rate": 9.97790170790632e-06, "loss": 0.4101, "step": 15870 }, { "epoch": 0.1270389836881305, "grad_norm": 0.6774571554597068, "learning_rate": 9.977770385549251e-06, "loss": 0.4304, "step": 15880 }, { "epoch": 0.12711898304813563, "grad_norm": 0.7278432951604766, "learning_rate": 9.9776386750154e-06, "loss": 0.418, "step": 15890 }, { "epoch": 0.12719898240814073, "grad_norm": 0.6363471373153042, "learning_rate": 9.977506576315034e-06, "loss": 0.439, "step": 15900 }, { "epoch": 0.12727898176814587, "grad_norm": 0.731603139506807, "learning_rate": 9.977374089458457e-06, "loss": 0.41, "step": 15910 }, { "epoch": 0.12735898112815097, "grad_norm": 0.6784394245288766, "learning_rate": 9.977241214456002e-06, "loss": 0.4286, "step": 15920 }, { "epoch": 0.1274389804881561, "grad_norm": 0.6710184175973648, "learning_rate": 9.977107951318028e-06, "loss": 0.4401, "step": 15930 }, { "epoch": 0.1275189798481612, "grad_norm": 0.6511219128465641, "learning_rate": 9.976974300054928e-06, "loss": 0.4158, "step": 15940 }, { "epoch": 0.12759897920816635, "grad_norm": 0.7134455834606865, "learning_rate": 9.976840260677126e-06, "loss": 0.416, "step": 15950 }, { "epoch": 0.12767897856817145, "grad_norm": 0.6274358378516481, "learning_rate": 9.976705833195073e-06, "loss": 0.4351, "step": 15960 }, { "epoch": 0.1277589779281766, "grad_norm": 0.6233543324603799, "learning_rate": 9.976571017619252e-06, "loss": 0.4051, "step": 15970 }, { "epoch": 0.1278389772881817, "grad_norm": 0.7689917454674953, "learning_rate": 9.976435813960175e-06, "loss": 0.427, "step": 15980 }, { "epoch": 0.12791897664818683, "grad_norm": 0.7037029174374836, "learning_rate": 9.97630022222839e-06, "loss": 0.4051, "step": 15990 }, { "epoch": 0.12799897600819193, "grad_norm": 0.7355124074445999, "learning_rate": 9.976164242434464e-06, "loss": 0.4385, "step": 16000 }, { "epoch": 0.12807897536819707, "grad_norm": 0.7373586231201685, "learning_rate": 9.976027874589009e-06, "loss": 0.4399, "step": 16010 }, { "epoch": 0.12815897472820217, "grad_norm": 0.6705774192271041, "learning_rate": 9.975891118702653e-06, "loss": 0.4133, "step": 16020 }, { "epoch": 0.1282389740882073, "grad_norm": 0.7187404065172107, "learning_rate": 9.97575397478606e-06, "loss": 0.4248, "step": 16030 }, { "epoch": 0.12831897344821241, "grad_norm": 0.8175441831994867, "learning_rate": 9.97561644284993e-06, "loss": 0.4054, "step": 16040 }, { "epoch": 0.12839897280821752, "grad_norm": 0.6714855496316486, "learning_rate": 9.975478522904985e-06, "loss": 0.4355, "step": 16050 }, { "epoch": 0.12847897216822265, "grad_norm": 0.7175651052788368, "learning_rate": 9.97534021496198e-06, "loss": 0.4285, "step": 16060 }, { "epoch": 0.12855897152822776, "grad_norm": 0.6626746913167869, "learning_rate": 9.9752015190317e-06, "loss": 0.4404, "step": 16070 }, { "epoch": 0.1286389708882329, "grad_norm": 0.6373844296826675, "learning_rate": 9.975062435124962e-06, "loss": 0.4065, "step": 16080 }, { "epoch": 0.128718970248238, "grad_norm": 0.6556354006821312, "learning_rate": 9.974922963252614e-06, "loss": 0.4227, "step": 16090 }, { "epoch": 0.12879896960824314, "grad_norm": 0.728033894624986, "learning_rate": 9.974783103425526e-06, "loss": 0.4351, "step": 16100 }, { "epoch": 0.12887896896824824, "grad_norm": 0.8698239007031705, "learning_rate": 9.974642855654612e-06, "loss": 0.4103, "step": 16110 }, { "epoch": 0.12895896832825338, "grad_norm": 0.6156764020748962, "learning_rate": 9.974502219950805e-06, "loss": 0.4087, "step": 16120 }, { "epoch": 0.12903896768825848, "grad_norm": 0.6875760497236818, "learning_rate": 9.974361196325073e-06, "loss": 0.4482, "step": 16130 }, { "epoch": 0.12911896704826362, "grad_norm": 0.8910887256168029, "learning_rate": 9.974219784788413e-06, "loss": 0.421, "step": 16140 }, { "epoch": 0.12919896640826872, "grad_norm": 0.7091507785110248, "learning_rate": 9.974077985351852e-06, "loss": 0.4156, "step": 16150 }, { "epoch": 0.12927896576827386, "grad_norm": 0.5767386146831242, "learning_rate": 9.973935798026449e-06, "loss": 0.4011, "step": 16160 }, { "epoch": 0.12935896512827896, "grad_norm": 0.6411707689141054, "learning_rate": 9.97379322282329e-06, "loss": 0.3954, "step": 16170 }, { "epoch": 0.1294389644882841, "grad_norm": 0.6894343362780373, "learning_rate": 9.973650259753496e-06, "loss": 0.4347, "step": 16180 }, { "epoch": 0.1295189638482892, "grad_norm": 0.763972177943864, "learning_rate": 9.973506908828212e-06, "loss": 0.4188, "step": 16190 }, { "epoch": 0.12959896320829434, "grad_norm": 0.7209911922246054, "learning_rate": 9.97336317005862e-06, "loss": 0.4381, "step": 16200 }, { "epoch": 0.12967896256829944, "grad_norm": 0.6110199086292246, "learning_rate": 9.973219043455928e-06, "loss": 0.4408, "step": 16210 }, { "epoch": 0.12975896192830458, "grad_norm": 0.6489180022971337, "learning_rate": 9.973074529031376e-06, "loss": 0.4398, "step": 16220 }, { "epoch": 0.12983896128830968, "grad_norm": 0.5765846553946111, "learning_rate": 9.972929626796234e-06, "loss": 0.4199, "step": 16230 }, { "epoch": 0.12991896064831482, "grad_norm": 0.7365227927097997, "learning_rate": 9.972784336761799e-06, "loss": 0.4279, "step": 16240 }, { "epoch": 0.12999896000831992, "grad_norm": 0.8056475819243906, "learning_rate": 9.972638658939404e-06, "loss": 0.4458, "step": 16250 }, { "epoch": 0.13007895936832506, "grad_norm": 0.6642137241754639, "learning_rate": 9.972492593340406e-06, "loss": 0.4142, "step": 16260 }, { "epoch": 0.13015895872833017, "grad_norm": 0.6992017427142256, "learning_rate": 9.972346139976198e-06, "loss": 0.4216, "step": 16270 }, { "epoch": 0.1302389580883353, "grad_norm": 0.7081521761770049, "learning_rate": 9.972199298858201e-06, "loss": 0.4056, "step": 16280 }, { "epoch": 0.1303189574483404, "grad_norm": 0.645120443362932, "learning_rate": 9.972052069997864e-06, "loss": 0.4214, "step": 16290 }, { "epoch": 0.13039895680834554, "grad_norm": 0.7280080907171033, "learning_rate": 9.97190445340667e-06, "loss": 0.4516, "step": 16300 }, { "epoch": 0.13047895616835065, "grad_norm": 0.7209813240822915, "learning_rate": 9.971756449096131e-06, "loss": 0.4239, "step": 16310 }, { "epoch": 0.13055895552835578, "grad_norm": 0.6666891330684156, "learning_rate": 9.971608057077786e-06, "loss": 0.4044, "step": 16320 }, { "epoch": 0.1306389548883609, "grad_norm": 0.6725698695157024, "learning_rate": 9.971459277363207e-06, "loss": 0.4171, "step": 16330 }, { "epoch": 0.13071895424836602, "grad_norm": 0.667630820820871, "learning_rate": 9.971310109963999e-06, "loss": 0.4147, "step": 16340 }, { "epoch": 0.13079895360837113, "grad_norm": 0.734895372277806, "learning_rate": 9.971160554891793e-06, "loss": 0.4394, "step": 16350 }, { "epoch": 0.13087895296837626, "grad_norm": 0.6339323330620656, "learning_rate": 9.971010612158252e-06, "loss": 0.4264, "step": 16360 }, { "epoch": 0.13095895232838137, "grad_norm": 0.7184877114561069, "learning_rate": 9.970860281775068e-06, "loss": 0.4175, "step": 16370 }, { "epoch": 0.1310389516883865, "grad_norm": 0.734440745009853, "learning_rate": 9.970709563753965e-06, "loss": 0.4195, "step": 16380 }, { "epoch": 0.1311189510483916, "grad_norm": 0.7144142101700475, "learning_rate": 9.970558458106695e-06, "loss": 0.4314, "step": 16390 }, { "epoch": 0.13119895040839674, "grad_norm": 0.7814264810795553, "learning_rate": 9.97040696484504e-06, "loss": 0.4194, "step": 16400 }, { "epoch": 0.13127894976840185, "grad_norm": 0.6311509675123381, "learning_rate": 9.97025508398082e-06, "loss": 0.4188, "step": 16410 }, { "epoch": 0.13135894912840698, "grad_norm": 0.7556482805182179, "learning_rate": 9.970102815525873e-06, "loss": 0.3911, "step": 16420 }, { "epoch": 0.1314389484884121, "grad_norm": 0.6295783619046341, "learning_rate": 9.969950159492075e-06, "loss": 0.4327, "step": 16430 }, { "epoch": 0.13151894784841722, "grad_norm": 0.6600338486089969, "learning_rate": 9.969797115891332e-06, "loss": 0.413, "step": 16440 }, { "epoch": 0.13159894720842233, "grad_norm": 1.20446685137683, "learning_rate": 9.969643684735574e-06, "loss": 0.4151, "step": 16450 }, { "epoch": 0.13167894656842746, "grad_norm": 0.6009765755253456, "learning_rate": 9.969489866036773e-06, "loss": 0.4227, "step": 16460 }, { "epoch": 0.13175894592843257, "grad_norm": 0.7241040821748824, "learning_rate": 9.96933565980692e-06, "loss": 0.433, "step": 16470 }, { "epoch": 0.1318389452884377, "grad_norm": 0.7594488490574846, "learning_rate": 9.969181066058039e-06, "loss": 0.4304, "step": 16480 }, { "epoch": 0.1319189446484428, "grad_norm": 0.6269055221529369, "learning_rate": 9.969026084802188e-06, "loss": 0.4221, "step": 16490 }, { "epoch": 0.13199894400844794, "grad_norm": 0.7810796668030976, "learning_rate": 9.96887071605145e-06, "loss": 0.434, "step": 16500 }, { "epoch": 0.13207894336845305, "grad_norm": 0.6115091403577689, "learning_rate": 9.968714959817944e-06, "loss": 0.3772, "step": 16510 }, { "epoch": 0.13215894272845818, "grad_norm": 0.5963152559146718, "learning_rate": 9.968558816113815e-06, "loss": 0.4055, "step": 16520 }, { "epoch": 0.1322389420884633, "grad_norm": 0.5959211749887702, "learning_rate": 9.968402284951241e-06, "loss": 0.4076, "step": 16530 }, { "epoch": 0.13231894144846842, "grad_norm": 0.5913953978467206, "learning_rate": 9.968245366342426e-06, "loss": 0.4078, "step": 16540 }, { "epoch": 0.13239894080847353, "grad_norm": 0.6817936533948004, "learning_rate": 9.968088060299607e-06, "loss": 0.4173, "step": 16550 }, { "epoch": 0.13247894016847866, "grad_norm": 0.7510489425253001, "learning_rate": 9.967930366835053e-06, "loss": 0.4076, "step": 16560 }, { "epoch": 0.13255893952848377, "grad_norm": 0.7629794144615467, "learning_rate": 9.967772285961062e-06, "loss": 0.4384, "step": 16570 }, { "epoch": 0.1326389388884889, "grad_norm": 0.6293070946995492, "learning_rate": 9.967613817689955e-06, "loss": 0.4236, "step": 16580 }, { "epoch": 0.132718938248494, "grad_norm": 0.6720771492870758, "learning_rate": 9.967454962034099e-06, "loss": 0.4229, "step": 16590 }, { "epoch": 0.13279893760849915, "grad_norm": 0.6638072867949891, "learning_rate": 9.967295719005876e-06, "loss": 0.446, "step": 16600 }, { "epoch": 0.13287893696850425, "grad_norm": 0.65142197980075, "learning_rate": 9.967136088617703e-06, "loss": 0.4209, "step": 16610 }, { "epoch": 0.13295893632850936, "grad_norm": 0.6985634983059252, "learning_rate": 9.966976070882033e-06, "loss": 0.4301, "step": 16620 }, { "epoch": 0.1330389356885145, "grad_norm": 0.6911334977041661, "learning_rate": 9.966815665811343e-06, "loss": 0.4429, "step": 16630 }, { "epoch": 0.1331189350485196, "grad_norm": 0.7039576142214207, "learning_rate": 9.966654873418139e-06, "loss": 0.4237, "step": 16640 }, { "epoch": 0.13319893440852473, "grad_norm": 0.695562463296846, "learning_rate": 9.966493693714962e-06, "loss": 0.4051, "step": 16650 }, { "epoch": 0.13327893376852984, "grad_norm": 0.6191772475776949, "learning_rate": 9.966332126714381e-06, "loss": 0.4191, "step": 16660 }, { "epoch": 0.13335893312853497, "grad_norm": 0.7073305805542554, "learning_rate": 9.966170172428995e-06, "loss": 0.4267, "step": 16670 }, { "epoch": 0.13343893248854008, "grad_norm": 0.665834458674945, "learning_rate": 9.966007830871437e-06, "loss": 0.3996, "step": 16680 }, { "epoch": 0.1335189318485452, "grad_norm": 0.6445338756290879, "learning_rate": 9.965845102054358e-06, "loss": 0.4196, "step": 16690 }, { "epoch": 0.13359893120855032, "grad_norm": 0.6350756319376837, "learning_rate": 9.965681985990459e-06, "loss": 0.446, "step": 16700 }, { "epoch": 0.13367893056855545, "grad_norm": 0.6913186662199368, "learning_rate": 9.96551848269245e-06, "loss": 0.4278, "step": 16710 }, { "epoch": 0.13375892992856056, "grad_norm": 0.6481764588696087, "learning_rate": 9.965354592173089e-06, "loss": 0.414, "step": 16720 }, { "epoch": 0.1338389292885657, "grad_norm": 0.735395294851404, "learning_rate": 9.965190314445152e-06, "loss": 0.3964, "step": 16730 }, { "epoch": 0.1339189286485708, "grad_norm": 0.6593254509990137, "learning_rate": 9.965025649521451e-06, "loss": 0.4491, "step": 16740 }, { "epoch": 0.13399892800857593, "grad_norm": 0.6954385078370502, "learning_rate": 9.964860597414829e-06, "loss": 0.4268, "step": 16750 }, { "epoch": 0.13407892736858104, "grad_norm": 0.7688463667970202, "learning_rate": 9.964695158138154e-06, "loss": 0.4221, "step": 16760 }, { "epoch": 0.13415892672858618, "grad_norm": 0.5684812280236109, "learning_rate": 9.964529331704328e-06, "loss": 0.4346, "step": 16770 }, { "epoch": 0.13423892608859128, "grad_norm": 0.6287180292549858, "learning_rate": 9.964363118126284e-06, "loss": 0.4485, "step": 16780 }, { "epoch": 0.13431892544859642, "grad_norm": 0.7559892436445406, "learning_rate": 9.964196517416982e-06, "loss": 0.4269, "step": 16790 }, { "epoch": 0.13439892480860152, "grad_norm": 0.6839715377542765, "learning_rate": 9.964029529589414e-06, "loss": 0.4059, "step": 16800 }, { "epoch": 0.13447892416860666, "grad_norm": 0.6727295659165289, "learning_rate": 9.963862154656602e-06, "loss": 0.4045, "step": 16810 }, { "epoch": 0.13455892352861176, "grad_norm": 0.7171288957560812, "learning_rate": 9.9636943926316e-06, "loss": 0.4253, "step": 16820 }, { "epoch": 0.1346389228886169, "grad_norm": 0.7008575294234646, "learning_rate": 9.96352624352749e-06, "loss": 0.4035, "step": 16830 }, { "epoch": 0.134718922248622, "grad_norm": 0.6563715326667081, "learning_rate": 9.963357707357381e-06, "loss": 0.4003, "step": 16840 }, { "epoch": 0.13479892160862714, "grad_norm": 0.7640464317888754, "learning_rate": 9.96318878413442e-06, "loss": 0.4171, "step": 16850 }, { "epoch": 0.13487892096863224, "grad_norm": 0.7020065078389928, "learning_rate": 9.96301947387178e-06, "loss": 0.4164, "step": 16860 }, { "epoch": 0.13495892032863738, "grad_norm": 0.6279049760055165, "learning_rate": 9.962849776582662e-06, "loss": 0.4322, "step": 16870 }, { "epoch": 0.13503891968864248, "grad_norm": 0.6661450357412667, "learning_rate": 9.9626796922803e-06, "loss": 0.4267, "step": 16880 }, { "epoch": 0.13511891904864762, "grad_norm": 0.5900077044461228, "learning_rate": 9.962509220977958e-06, "loss": 0.3974, "step": 16890 }, { "epoch": 0.13519891840865272, "grad_norm": 0.6717050453002483, "learning_rate": 9.962338362688929e-06, "loss": 0.4322, "step": 16900 }, { "epoch": 0.13527891776865786, "grad_norm": 0.7235337592522526, "learning_rate": 9.962167117426538e-06, "loss": 0.4598, "step": 16910 }, { "epoch": 0.13535891712866296, "grad_norm": 0.7189693414556314, "learning_rate": 9.961995485204137e-06, "loss": 0.4399, "step": 16920 }, { "epoch": 0.1354389164886681, "grad_norm": 0.7310067194286954, "learning_rate": 9.961823466035111e-06, "loss": 0.4255, "step": 16930 }, { "epoch": 0.1355189158486732, "grad_norm": 0.6229519370730912, "learning_rate": 9.961651059932876e-06, "loss": 0.3859, "step": 16940 }, { "epoch": 0.13559891520867834, "grad_norm": 0.6485839037154872, "learning_rate": 9.961478266910875e-06, "loss": 0.4293, "step": 16950 }, { "epoch": 0.13567891456868345, "grad_norm": 0.7380641045925007, "learning_rate": 9.961305086982585e-06, "loss": 0.4399, "step": 16960 }, { "epoch": 0.13575891392868858, "grad_norm": 0.6679573030220546, "learning_rate": 9.961131520161507e-06, "loss": 0.416, "step": 16970 }, { "epoch": 0.13583891328869369, "grad_norm": 0.6158763552946297, "learning_rate": 9.96095756646118e-06, "loss": 0.4186, "step": 16980 }, { "epoch": 0.13591891264869882, "grad_norm": 0.7754829489751865, "learning_rate": 9.960783225895166e-06, "loss": 0.4148, "step": 16990 }, { "epoch": 0.13599891200870393, "grad_norm": 0.666175589329955, "learning_rate": 9.960608498477063e-06, "loss": 0.4175, "step": 17000 }, { "epoch": 0.13607891136870906, "grad_norm": 0.6942622898488557, "learning_rate": 9.960433384220497e-06, "loss": 0.429, "step": 17010 }, { "epoch": 0.13615891072871417, "grad_norm": 0.7092552386159049, "learning_rate": 9.96025788313912e-06, "loss": 0.4028, "step": 17020 }, { "epoch": 0.1362389100887193, "grad_norm": 0.6744631849085897, "learning_rate": 9.96008199524662e-06, "loss": 0.4097, "step": 17030 }, { "epoch": 0.1363189094487244, "grad_norm": 0.8137755081491165, "learning_rate": 9.959905720556715e-06, "loss": 0.41, "step": 17040 }, { "epoch": 0.13639890880872954, "grad_norm": 0.6580656342764415, "learning_rate": 9.959729059083148e-06, "loss": 0.419, "step": 17050 }, { "epoch": 0.13647890816873465, "grad_norm": 0.6449297899587194, "learning_rate": 9.9595520108397e-06, "loss": 0.4311, "step": 17060 }, { "epoch": 0.13655890752873978, "grad_norm": 0.6066084500321643, "learning_rate": 9.95937457584017e-06, "loss": 0.4374, "step": 17070 }, { "epoch": 0.1366389068887449, "grad_norm": 0.6766096005660803, "learning_rate": 9.959196754098403e-06, "loss": 0.4183, "step": 17080 }, { "epoch": 0.13671890624875002, "grad_norm": 0.6808587066730529, "learning_rate": 9.959018545628264e-06, "loss": 0.4121, "step": 17090 }, { "epoch": 0.13679890560875513, "grad_norm": 0.6719453016733253, "learning_rate": 9.958839950443644e-06, "loss": 0.4417, "step": 17100 }, { "epoch": 0.13687890496876026, "grad_norm": 0.6963173197437312, "learning_rate": 9.958660968558478e-06, "loss": 0.4008, "step": 17110 }, { "epoch": 0.13695890432876537, "grad_norm": 2.281046334225392, "learning_rate": 9.958481599986717e-06, "loss": 0.4262, "step": 17120 }, { "epoch": 0.1370389036887705, "grad_norm": 0.7449279921879521, "learning_rate": 9.958301844742354e-06, "loss": 0.4144, "step": 17130 }, { "epoch": 0.1371189030487756, "grad_norm": 0.6204827112125751, "learning_rate": 9.958121702839403e-06, "loss": 0.4162, "step": 17140 }, { "epoch": 0.13719890240878074, "grad_norm": 0.7063695009582045, "learning_rate": 9.957941174291913e-06, "loss": 0.4025, "step": 17150 }, { "epoch": 0.13727890176878585, "grad_norm": 0.7041079269742345, "learning_rate": 9.957760259113964e-06, "loss": 0.4174, "step": 17160 }, { "epoch": 0.13735890112879098, "grad_norm": 0.6534485678557924, "learning_rate": 9.95757895731966e-06, "loss": 0.4111, "step": 17170 }, { "epoch": 0.1374389004887961, "grad_norm": 0.7280201968114527, "learning_rate": 9.957397268923141e-06, "loss": 0.4247, "step": 17180 }, { "epoch": 0.1375188998488012, "grad_norm": 0.6824310023782809, "learning_rate": 9.957215193938577e-06, "loss": 0.4462, "step": 17190 }, { "epoch": 0.13759889920880633, "grad_norm": 0.7228267975089621, "learning_rate": 9.957032732380166e-06, "loss": 0.4245, "step": 17200 }, { "epoch": 0.13767889856881144, "grad_norm": 0.6546245974157573, "learning_rate": 9.956849884262136e-06, "loss": 0.4184, "step": 17210 }, { "epoch": 0.13775889792881657, "grad_norm": 0.6375481407149561, "learning_rate": 9.956666649598745e-06, "loss": 0.4052, "step": 17220 }, { "epoch": 0.13783889728882168, "grad_norm": 0.7743068841639801, "learning_rate": 9.956483028404285e-06, "loss": 0.4198, "step": 17230 }, { "epoch": 0.1379188966488268, "grad_norm": 0.6352378486704557, "learning_rate": 9.956299020693071e-06, "loss": 0.4027, "step": 17240 }, { "epoch": 0.13799889600883192, "grad_norm": 0.8247477320503045, "learning_rate": 9.956114626479457e-06, "loss": 0.4339, "step": 17250 }, { "epoch": 0.13807889536883705, "grad_norm": 0.6993796016025633, "learning_rate": 9.955929845777817e-06, "loss": 0.4139, "step": 17260 }, { "epoch": 0.13815889472884216, "grad_norm": 0.7422250678974723, "learning_rate": 9.955744678602566e-06, "loss": 0.4162, "step": 17270 }, { "epoch": 0.1382388940888473, "grad_norm": 0.6611853374118389, "learning_rate": 9.95555912496814e-06, "loss": 0.41, "step": 17280 }, { "epoch": 0.1383188934488524, "grad_norm": 0.8120611280300498, "learning_rate": 9.95537318488901e-06, "loss": 0.4278, "step": 17290 }, { "epoch": 0.13839889280885753, "grad_norm": 0.6281970740950243, "learning_rate": 9.955186858379678e-06, "loss": 0.4412, "step": 17300 }, { "epoch": 0.13847889216886264, "grad_norm": 0.66310501186557, "learning_rate": 9.955000145454672e-06, "loss": 0.424, "step": 17310 }, { "epoch": 0.13855889152886777, "grad_norm": 0.6767832641516772, "learning_rate": 9.954813046128551e-06, "loss": 0.4245, "step": 17320 }, { "epoch": 0.13863889088887288, "grad_norm": 0.7349397435851933, "learning_rate": 9.954625560415907e-06, "loss": 0.4502, "step": 17330 }, { "epoch": 0.138718890248878, "grad_norm": 0.6643500430732979, "learning_rate": 9.95443768833136e-06, "loss": 0.413, "step": 17340 }, { "epoch": 0.13879888960888312, "grad_norm": 0.6826891789679187, "learning_rate": 9.954249429889562e-06, "loss": 0.4259, "step": 17350 }, { "epoch": 0.13887888896888825, "grad_norm": 0.7801169431056139, "learning_rate": 9.954060785105194e-06, "loss": 0.421, "step": 17360 }, { "epoch": 0.13895888832889336, "grad_norm": 0.6746399458442625, "learning_rate": 9.953871753992963e-06, "loss": 0.4316, "step": 17370 }, { "epoch": 0.1390388876888985, "grad_norm": 0.6633115969325047, "learning_rate": 9.953682336567615e-06, "loss": 0.4263, "step": 17380 }, { "epoch": 0.1391188870489036, "grad_norm": 0.6596676296748596, "learning_rate": 9.953492532843918e-06, "loss": 0.4505, "step": 17390 }, { "epoch": 0.13919888640890873, "grad_norm": 0.7851229273004637, "learning_rate": 9.953302342836674e-06, "loss": 0.4359, "step": 17400 }, { "epoch": 0.13927888576891384, "grad_norm": 0.663994576682789, "learning_rate": 9.953111766560717e-06, "loss": 0.4346, "step": 17410 }, { "epoch": 0.13935888512891897, "grad_norm": 0.7163112912513424, "learning_rate": 9.952920804030903e-06, "loss": 0.4207, "step": 17420 }, { "epoch": 0.13943888448892408, "grad_norm": 0.6560386471701628, "learning_rate": 9.952729455262128e-06, "loss": 0.3959, "step": 17430 }, { "epoch": 0.13951888384892921, "grad_norm": 0.6209575299933962, "learning_rate": 9.952537720269315e-06, "loss": 0.4236, "step": 17440 }, { "epoch": 0.13959888320893432, "grad_norm": 0.7607298452265604, "learning_rate": 9.95234559906741e-06, "loss": 0.4455, "step": 17450 }, { "epoch": 0.13967888256893946, "grad_norm": 0.6232561865125329, "learning_rate": 9.9521530916714e-06, "loss": 0.448, "step": 17460 }, { "epoch": 0.13975888192894456, "grad_norm": 0.6642202450043118, "learning_rate": 9.951960198096296e-06, "loss": 0.4226, "step": 17470 }, { "epoch": 0.1398388812889497, "grad_norm": 0.6610201852347611, "learning_rate": 9.95176691835714e-06, "loss": 0.4209, "step": 17480 }, { "epoch": 0.1399188806489548, "grad_norm": 0.734808813537641, "learning_rate": 9.951573252469003e-06, "loss": 0.4185, "step": 17490 }, { "epoch": 0.13999888000895994, "grad_norm": 0.627595390658135, "learning_rate": 9.95137920044699e-06, "loss": 0.3983, "step": 17500 }, { "epoch": 0.14007887936896504, "grad_norm": 0.6367424148558828, "learning_rate": 9.95118476230623e-06, "loss": 0.4173, "step": 17510 }, { "epoch": 0.14015887872897018, "grad_norm": 0.6387188872513514, "learning_rate": 9.95098993806189e-06, "loss": 0.4254, "step": 17520 }, { "epoch": 0.14023887808897528, "grad_norm": 0.654200120441543, "learning_rate": 9.950794727729161e-06, "loss": 0.4072, "step": 17530 }, { "epoch": 0.14031887744898042, "grad_norm": 0.6760902214698423, "learning_rate": 9.950599131323265e-06, "loss": 0.4177, "step": 17540 }, { "epoch": 0.14039887680898552, "grad_norm": 0.612141027869031, "learning_rate": 9.950403148859456e-06, "loss": 0.4119, "step": 17550 }, { "epoch": 0.14047887616899066, "grad_norm": 0.6078691358575782, "learning_rate": 9.950206780353016e-06, "loss": 0.4161, "step": 17560 }, { "epoch": 0.14055887552899576, "grad_norm": 0.6780476221983842, "learning_rate": 9.950010025819259e-06, "loss": 0.4232, "step": 17570 }, { "epoch": 0.1406388748890009, "grad_norm": 0.683830207063501, "learning_rate": 9.949812885273529e-06, "loss": 0.4195, "step": 17580 }, { "epoch": 0.140718874249006, "grad_norm": 0.6712848393015484, "learning_rate": 9.949615358731199e-06, "loss": 0.4098, "step": 17590 }, { "epoch": 0.14079887360901114, "grad_norm": 0.6918921360014099, "learning_rate": 9.949417446207671e-06, "loss": 0.4072, "step": 17600 }, { "epoch": 0.14087887296901624, "grad_norm": 0.6754848594423383, "learning_rate": 9.94921914771838e-06, "loss": 0.4067, "step": 17610 }, { "epoch": 0.14095887232902138, "grad_norm": 0.7166776761679865, "learning_rate": 9.949020463278791e-06, "loss": 0.4221, "step": 17620 }, { "epoch": 0.14103887168902648, "grad_norm": 0.6191287836079775, "learning_rate": 9.948821392904396e-06, "loss": 0.4145, "step": 17630 }, { "epoch": 0.14111887104903162, "grad_norm": 3.7742430058774494, "learning_rate": 9.948621936610719e-06, "loss": 0.3951, "step": 17640 }, { "epoch": 0.14119887040903673, "grad_norm": 0.6341946450489819, "learning_rate": 9.948422094413314e-06, "loss": 0.3965, "step": 17650 }, { "epoch": 0.14127886976904186, "grad_norm": 0.6387630767091315, "learning_rate": 9.948221866327765e-06, "loss": 0.4169, "step": 17660 }, { "epoch": 0.14135886912904697, "grad_norm": 0.6570574101636678, "learning_rate": 9.948021252369688e-06, "loss": 0.4152, "step": 17670 }, { "epoch": 0.1414388684890521, "grad_norm": 0.8009691261071905, "learning_rate": 9.947820252554726e-06, "loss": 0.4401, "step": 17680 }, { "epoch": 0.1415188678490572, "grad_norm": 0.7490427729255769, "learning_rate": 9.947618866898554e-06, "loss": 0.4198, "step": 17690 }, { "epoch": 0.14159886720906234, "grad_norm": 0.7121998025606829, "learning_rate": 9.947417095416873e-06, "loss": 0.4029, "step": 17700 }, { "epoch": 0.14167886656906745, "grad_norm": 0.8848395483020253, "learning_rate": 9.947214938125422e-06, "loss": 0.4313, "step": 17710 }, { "epoch": 0.14175886592907258, "grad_norm": 0.7222057249128577, "learning_rate": 9.947012395039966e-06, "loss": 0.4394, "step": 17720 }, { "epoch": 0.1418388652890777, "grad_norm": 0.7677011609837037, "learning_rate": 9.946809466176294e-06, "loss": 0.4364, "step": 17730 }, { "epoch": 0.14191886464908282, "grad_norm": 0.7244356835765315, "learning_rate": 9.946606151550237e-06, "loss": 0.3968, "step": 17740 }, { "epoch": 0.14199886400908793, "grad_norm": 0.8014649450378192, "learning_rate": 9.946402451177646e-06, "loss": 0.4093, "step": 17750 }, { "epoch": 0.14207886336909303, "grad_norm": 0.7091094688771357, "learning_rate": 9.946198365074408e-06, "loss": 0.4185, "step": 17760 }, { "epoch": 0.14215886272909817, "grad_norm": 0.6213023427229329, "learning_rate": 9.94599389325644e-06, "loss": 0.435, "step": 17770 }, { "epoch": 0.14223886208910327, "grad_norm": 0.6673370100597944, "learning_rate": 9.945789035739682e-06, "loss": 0.4142, "step": 17780 }, { "epoch": 0.1423188614491084, "grad_norm": 0.6269113138553218, "learning_rate": 9.945583792540113e-06, "loss": 0.4026, "step": 17790 }, { "epoch": 0.14239886080911351, "grad_norm": 0.7140000722512789, "learning_rate": 9.945378163673736e-06, "loss": 0.4358, "step": 17800 }, { "epoch": 0.14247886016911865, "grad_norm": 0.7076529930565503, "learning_rate": 9.945172149156587e-06, "loss": 0.4392, "step": 17810 }, { "epoch": 0.14255885952912375, "grad_norm": 0.8011994507316866, "learning_rate": 9.944965749004733e-06, "loss": 0.4489, "step": 17820 }, { "epoch": 0.1426388588891289, "grad_norm": 0.6197273068064891, "learning_rate": 9.944758963234267e-06, "loss": 0.4356, "step": 17830 }, { "epoch": 0.142718858249134, "grad_norm": 0.8338344070952848, "learning_rate": 9.944551791861316e-06, "loss": 0.4233, "step": 17840 }, { "epoch": 0.14279885760913913, "grad_norm": 0.654989926260727, "learning_rate": 9.944344234902038e-06, "loss": 0.39, "step": 17850 }, { "epoch": 0.14287885696914424, "grad_norm": 0.6770813726555264, "learning_rate": 9.944136292372614e-06, "loss": 0.4195, "step": 17860 }, { "epoch": 0.14295885632914937, "grad_norm": 0.6890662622795616, "learning_rate": 9.943927964289263e-06, "loss": 0.4266, "step": 17870 }, { "epoch": 0.14303885568915448, "grad_norm": 0.7032725032099638, "learning_rate": 9.943719250668231e-06, "loss": 0.4288, "step": 17880 }, { "epoch": 0.1431188550491596, "grad_norm": 0.707783849932999, "learning_rate": 9.943510151525793e-06, "loss": 0.4184, "step": 17890 }, { "epoch": 0.14319885440916472, "grad_norm": 0.6570768565295626, "learning_rate": 9.943300666878254e-06, "loss": 0.4094, "step": 17900 }, { "epoch": 0.14327885376916985, "grad_norm": 0.7861410948471093, "learning_rate": 9.94309079674195e-06, "loss": 0.4204, "step": 17910 }, { "epoch": 0.14335885312917496, "grad_norm": 0.7050491707514325, "learning_rate": 9.942880541133249e-06, "loss": 0.4277, "step": 17920 }, { "epoch": 0.1434388524891801, "grad_norm": 0.7052111509508628, "learning_rate": 9.942669900068547e-06, "loss": 0.4178, "step": 17930 }, { "epoch": 0.1435188518491852, "grad_norm": 0.7462753917985003, "learning_rate": 9.942458873564269e-06, "loss": 0.4467, "step": 17940 }, { "epoch": 0.14359885120919033, "grad_norm": 0.6769161699953002, "learning_rate": 9.942247461636871e-06, "loss": 0.398, "step": 17950 }, { "epoch": 0.14367885056919544, "grad_norm": 0.5752539109908564, "learning_rate": 9.942035664302841e-06, "loss": 0.4191, "step": 17960 }, { "epoch": 0.14375884992920057, "grad_norm": 0.8312637620151997, "learning_rate": 9.941823481578695e-06, "loss": 0.4167, "step": 17970 }, { "epoch": 0.14383884928920568, "grad_norm": 0.6166459621143665, "learning_rate": 9.941610913480977e-06, "loss": 0.4255, "step": 17980 }, { "epoch": 0.1439188486492108, "grad_norm": 0.609636131318356, "learning_rate": 9.941397960026266e-06, "loss": 0.4371, "step": 17990 }, { "epoch": 0.14399884800921592, "grad_norm": 0.6532302678231348, "learning_rate": 9.941184621231169e-06, "loss": 0.4256, "step": 18000 }, { "epoch": 0.14407884736922105, "grad_norm": 0.6943535677213631, "learning_rate": 9.940970897112318e-06, "loss": 0.4064, "step": 18010 }, { "epoch": 0.14415884672922616, "grad_norm": 0.6516865600865885, "learning_rate": 9.940756787686388e-06, "loss": 0.415, "step": 18020 }, { "epoch": 0.1442388460892313, "grad_norm": 0.6913145430206034, "learning_rate": 9.940542292970067e-06, "loss": 0.436, "step": 18030 }, { "epoch": 0.1443188454492364, "grad_norm": 0.6781833539543296, "learning_rate": 9.94032741298009e-06, "loss": 0.4304, "step": 18040 }, { "epoch": 0.14439884480924153, "grad_norm": 0.7428842438078624, "learning_rate": 9.940112147733204e-06, "loss": 0.4047, "step": 18050 }, { "epoch": 0.14447884416924664, "grad_norm": 0.7475338201286139, "learning_rate": 9.939896497246204e-06, "loss": 0.4373, "step": 18060 }, { "epoch": 0.14455884352925177, "grad_norm": 0.6266685090630157, "learning_rate": 9.939680461535906e-06, "loss": 0.4053, "step": 18070 }, { "epoch": 0.14463884288925688, "grad_norm": 0.6334195938175069, "learning_rate": 9.939464040619152e-06, "loss": 0.4425, "step": 18080 }, { "epoch": 0.14471884224926201, "grad_norm": 0.6082704070764767, "learning_rate": 9.939247234512823e-06, "loss": 0.4294, "step": 18090 }, { "epoch": 0.14479884160926712, "grad_norm": 0.8072838338820967, "learning_rate": 9.939030043233827e-06, "loss": 0.4403, "step": 18100 }, { "epoch": 0.14487884096927225, "grad_norm": 0.7446430835885324, "learning_rate": 9.938812466799095e-06, "loss": 0.4079, "step": 18110 }, { "epoch": 0.14495884032927736, "grad_norm": 0.6476819460333887, "learning_rate": 9.9385945052256e-06, "loss": 0.4405, "step": 18120 }, { "epoch": 0.1450388396892825, "grad_norm": 0.6108412065820912, "learning_rate": 9.938376158530338e-06, "loss": 0.4024, "step": 18130 }, { "epoch": 0.1451188390492876, "grad_norm": 0.6623134497215775, "learning_rate": 9.938157426730333e-06, "loss": 0.4179, "step": 18140 }, { "epoch": 0.14519883840929274, "grad_norm": 0.812572800613546, "learning_rate": 9.937938309842647e-06, "loss": 0.4229, "step": 18150 }, { "epoch": 0.14527883776929784, "grad_norm": 0.6658149568777475, "learning_rate": 9.937718807884362e-06, "loss": 0.3868, "step": 18160 }, { "epoch": 0.14535883712930298, "grad_norm": 0.6434995773129977, "learning_rate": 9.937498920872601e-06, "loss": 0.4236, "step": 18170 }, { "epoch": 0.14543883648930808, "grad_norm": 0.6539297250155948, "learning_rate": 9.937278648824505e-06, "loss": 0.4146, "step": 18180 }, { "epoch": 0.14551883584931322, "grad_norm": 0.6651784866755535, "learning_rate": 9.937057991757257e-06, "loss": 0.4163, "step": 18190 }, { "epoch": 0.14559883520931832, "grad_norm": 0.7105082505461652, "learning_rate": 9.93683694968806e-06, "loss": 0.4018, "step": 18200 }, { "epoch": 0.14567883456932346, "grad_norm": 0.750421008612481, "learning_rate": 9.936615522634152e-06, "loss": 0.4653, "step": 18210 }, { "epoch": 0.14575883392932856, "grad_norm": 0.808206588850391, "learning_rate": 9.936393710612803e-06, "loss": 0.4506, "step": 18220 }, { "epoch": 0.1458388332893337, "grad_norm": 0.7385039770828116, "learning_rate": 9.936171513641308e-06, "loss": 0.4306, "step": 18230 }, { "epoch": 0.1459188326493388, "grad_norm": 0.6833994507451365, "learning_rate": 9.935948931736995e-06, "loss": 0.4149, "step": 18240 }, { "epoch": 0.14599883200934394, "grad_norm": 0.7017748948505835, "learning_rate": 9.935725964917222e-06, "loss": 0.4004, "step": 18250 }, { "epoch": 0.14607883136934904, "grad_norm": 0.6477785506867206, "learning_rate": 9.935502613199376e-06, "loss": 0.4105, "step": 18260 }, { "epoch": 0.14615883072935418, "grad_norm": 0.6463874018944588, "learning_rate": 9.935278876600871e-06, "loss": 0.4121, "step": 18270 }, { "epoch": 0.14623883008935928, "grad_norm": 0.6169087780400192, "learning_rate": 9.93505475513916e-06, "loss": 0.3891, "step": 18280 }, { "epoch": 0.14631882944936442, "grad_norm": 0.7440745798538062, "learning_rate": 9.934830248831718e-06, "loss": 0.4336, "step": 18290 }, { "epoch": 0.14639882880936952, "grad_norm": 0.6929374264327003, "learning_rate": 9.934605357696054e-06, "loss": 0.4304, "step": 18300 }, { "epoch": 0.14647882816937466, "grad_norm": 0.6869158468924783, "learning_rate": 9.934380081749702e-06, "loss": 0.4434, "step": 18310 }, { "epoch": 0.14655882752937976, "grad_norm": 0.8074358014929749, "learning_rate": 9.934154421010233e-06, "loss": 0.4556, "step": 18320 }, { "epoch": 0.14663882688938487, "grad_norm": 0.5852491475537283, "learning_rate": 9.933928375495242e-06, "loss": 0.402, "step": 18330 }, { "epoch": 0.14671882624939, "grad_norm": 0.6061899588880231, "learning_rate": 9.933701945222359e-06, "loss": 0.4074, "step": 18340 }, { "epoch": 0.1467988256093951, "grad_norm": 0.6407094077859289, "learning_rate": 9.933475130209238e-06, "loss": 0.4293, "step": 18350 }, { "epoch": 0.14687882496940025, "grad_norm": 0.6014810190226926, "learning_rate": 9.933247930473569e-06, "loss": 0.4132, "step": 18360 }, { "epoch": 0.14695882432940535, "grad_norm": 0.6963921746335737, "learning_rate": 9.933020346033069e-06, "loss": 0.4193, "step": 18370 }, { "epoch": 0.14703882368941049, "grad_norm": 0.6655268880887207, "learning_rate": 9.932792376905486e-06, "loss": 0.4359, "step": 18380 }, { "epoch": 0.1471188230494156, "grad_norm": 0.5762303618537354, "learning_rate": 9.932564023108597e-06, "loss": 0.4167, "step": 18390 }, { "epoch": 0.14719882240942073, "grad_norm": 0.6395666788014646, "learning_rate": 9.93233528466021e-06, "loss": 0.4119, "step": 18400 }, { "epoch": 0.14727882176942583, "grad_norm": 0.5711130227368134, "learning_rate": 9.932106161578161e-06, "loss": 0.4097, "step": 18410 }, { "epoch": 0.14735882112943097, "grad_norm": 0.7766791353504565, "learning_rate": 9.93187665388032e-06, "loss": 0.4148, "step": 18420 }, { "epoch": 0.14743882048943607, "grad_norm": 0.6583668949356443, "learning_rate": 9.931646761584581e-06, "loss": 0.3827, "step": 18430 }, { "epoch": 0.1475188198494412, "grad_norm": 0.7789970345891685, "learning_rate": 9.931416484708876e-06, "loss": 0.4326, "step": 18440 }, { "epoch": 0.1475988192094463, "grad_norm": 0.6521048501944368, "learning_rate": 9.931185823271157e-06, "loss": 0.4492, "step": 18450 }, { "epoch": 0.14767881856945145, "grad_norm": 0.6543341431783585, "learning_rate": 9.930954777289416e-06, "loss": 0.4266, "step": 18460 }, { "epoch": 0.14775881792945655, "grad_norm": 0.6410525276911092, "learning_rate": 9.93072334678167e-06, "loss": 0.382, "step": 18470 }, { "epoch": 0.1478388172894617, "grad_norm": 0.646303912080973, "learning_rate": 9.930491531765964e-06, "loss": 0.441, "step": 18480 }, { "epoch": 0.1479188166494668, "grad_norm": 0.8305929965592703, "learning_rate": 9.930259332260376e-06, "loss": 0.4034, "step": 18490 }, { "epoch": 0.14799881600947193, "grad_norm": 0.6481954426856195, "learning_rate": 9.930026748283015e-06, "loss": 0.4041, "step": 18500 }, { "epoch": 0.14807881536947703, "grad_norm": 0.6623410605970118, "learning_rate": 9.929793779852017e-06, "loss": 0.4208, "step": 18510 }, { "epoch": 0.14815881472948217, "grad_norm": 0.681324728024364, "learning_rate": 9.929560426985549e-06, "loss": 0.4175, "step": 18520 }, { "epoch": 0.14823881408948728, "grad_norm": 0.7029147015798579, "learning_rate": 9.929326689701811e-06, "loss": 0.4232, "step": 18530 }, { "epoch": 0.1483188134494924, "grad_norm": 0.6585096978064959, "learning_rate": 9.929092568019028e-06, "loss": 0.4096, "step": 18540 }, { "epoch": 0.14839881280949752, "grad_norm": 0.6141299796881855, "learning_rate": 9.928858061955457e-06, "loss": 0.3987, "step": 18550 }, { "epoch": 0.14847881216950265, "grad_norm": 0.6466149067720965, "learning_rate": 9.928623171529388e-06, "loss": 0.3886, "step": 18560 }, { "epoch": 0.14855881152950776, "grad_norm": 0.7765486029081139, "learning_rate": 9.928387896759134e-06, "loss": 0.4146, "step": 18570 }, { "epoch": 0.1486388108895129, "grad_norm": 0.7622532125841667, "learning_rate": 9.928152237663045e-06, "loss": 0.42, "step": 18580 }, { "epoch": 0.148718810249518, "grad_norm": 0.793737091288824, "learning_rate": 9.927916194259497e-06, "loss": 0.4165, "step": 18590 }, { "epoch": 0.14879880960952313, "grad_norm": 0.7090957742920325, "learning_rate": 9.927679766566898e-06, "loss": 0.429, "step": 18600 }, { "epoch": 0.14887880896952824, "grad_norm": 0.609046121922208, "learning_rate": 9.927442954603686e-06, "loss": 0.4271, "step": 18610 }, { "epoch": 0.14895880832953337, "grad_norm": 1.0550106385589848, "learning_rate": 9.927205758388326e-06, "loss": 0.4119, "step": 18620 }, { "epoch": 0.14903880768953848, "grad_norm": 0.7074920045241557, "learning_rate": 9.926968177939317e-06, "loss": 0.4194, "step": 18630 }, { "epoch": 0.1491188070495436, "grad_norm": 0.6643036320620868, "learning_rate": 9.926730213275184e-06, "loss": 0.4275, "step": 18640 }, { "epoch": 0.14919880640954872, "grad_norm": 0.610677482924604, "learning_rate": 9.926491864414486e-06, "loss": 0.4157, "step": 18650 }, { "epoch": 0.14927880576955385, "grad_norm": 0.6242816109698098, "learning_rate": 9.92625313137581e-06, "loss": 0.4223, "step": 18660 }, { "epoch": 0.14935880512955896, "grad_norm": 0.6480846009688933, "learning_rate": 9.926014014177769e-06, "loss": 0.4287, "step": 18670 }, { "epoch": 0.1494388044895641, "grad_norm": 0.7115694270019974, "learning_rate": 9.925774512839015e-06, "loss": 0.4307, "step": 18680 }, { "epoch": 0.1495188038495692, "grad_norm": 0.7229984668674182, "learning_rate": 9.925534627378222e-06, "loss": 0.418, "step": 18690 }, { "epoch": 0.14959880320957433, "grad_norm": 0.6202698461183304, "learning_rate": 9.925294357814096e-06, "loss": 0.4296, "step": 18700 }, { "epoch": 0.14967880256957944, "grad_norm": 0.7737949730081566, "learning_rate": 9.925053704165376e-06, "loss": 0.4145, "step": 18710 }, { "epoch": 0.14975880192958457, "grad_norm": 0.7599669271278868, "learning_rate": 9.924812666450827e-06, "loss": 0.4353, "step": 18720 }, { "epoch": 0.14983880128958968, "grad_norm": 0.667445836967584, "learning_rate": 9.924571244689247e-06, "loss": 0.4267, "step": 18730 }, { "epoch": 0.1499188006495948, "grad_norm": 0.646325747467017, "learning_rate": 9.924329438899463e-06, "loss": 0.4186, "step": 18740 }, { "epoch": 0.14999880000959992, "grad_norm": 0.7616488383376159, "learning_rate": 9.924087249100328e-06, "loss": 0.4399, "step": 18750 }, { "epoch": 0.15007879936960505, "grad_norm": 0.6856951294640004, "learning_rate": 9.923844675310732e-06, "loss": 0.4367, "step": 18760 }, { "epoch": 0.15015879872961016, "grad_norm": 0.741113033674361, "learning_rate": 9.923601717549591e-06, "loss": 0.4273, "step": 18770 }, { "epoch": 0.1502387980896153, "grad_norm": 0.6069946815150408, "learning_rate": 9.92335837583585e-06, "loss": 0.4291, "step": 18780 }, { "epoch": 0.1503187974496204, "grad_norm": 0.6938531424181634, "learning_rate": 9.923114650188484e-06, "loss": 0.4168, "step": 18790 }, { "epoch": 0.15039879680962553, "grad_norm": 0.6206051460571814, "learning_rate": 9.922870540626504e-06, "loss": 0.4203, "step": 18800 }, { "epoch": 0.15047879616963064, "grad_norm": 0.7158848633963535, "learning_rate": 9.922626047168942e-06, "loss": 0.4241, "step": 18810 }, { "epoch": 0.15055879552963577, "grad_norm": 0.7936806504139352, "learning_rate": 9.922381169834864e-06, "loss": 0.4141, "step": 18820 }, { "epoch": 0.15063879488964088, "grad_norm": 0.6697272792432389, "learning_rate": 9.92213590864337e-06, "loss": 0.4122, "step": 18830 }, { "epoch": 0.15071879424964602, "grad_norm": 0.7681104483796211, "learning_rate": 9.92189026361358e-06, "loss": 0.424, "step": 18840 }, { "epoch": 0.15079879360965112, "grad_norm": 0.6602342210624249, "learning_rate": 9.921644234764657e-06, "loss": 0.4127, "step": 18850 }, { "epoch": 0.15087879296965626, "grad_norm": 0.6759153060125428, "learning_rate": 9.921397822115782e-06, "loss": 0.4272, "step": 18860 }, { "epoch": 0.15095879232966136, "grad_norm": 0.6153646672654235, "learning_rate": 9.921151025686171e-06, "loss": 0.4203, "step": 18870 }, { "epoch": 0.1510387916896665, "grad_norm": 0.6496778487555297, "learning_rate": 9.920903845495071e-06, "loss": 0.407, "step": 18880 }, { "epoch": 0.1511187910496716, "grad_norm": 0.738875875537896, "learning_rate": 9.920656281561755e-06, "loss": 0.4445, "step": 18890 }, { "epoch": 0.1511987904096767, "grad_norm": 0.6276685870995392, "learning_rate": 9.920408333905534e-06, "loss": 0.4168, "step": 18900 }, { "epoch": 0.15127878976968184, "grad_norm": 0.623640189505422, "learning_rate": 9.92016000254574e-06, "loss": 0.4336, "step": 18910 }, { "epoch": 0.15135878912968695, "grad_norm": 0.6974499148300083, "learning_rate": 9.919911287501737e-06, "loss": 0.4081, "step": 18920 }, { "epoch": 0.15143878848969208, "grad_norm": 0.7600333661173807, "learning_rate": 9.919662188792923e-06, "loss": 0.4117, "step": 18930 }, { "epoch": 0.1515187878496972, "grad_norm": 0.7229359241545309, "learning_rate": 9.919412706438722e-06, "loss": 0.4361, "step": 18940 }, { "epoch": 0.15159878720970232, "grad_norm": 2.51374632774997, "learning_rate": 9.919162840458588e-06, "loss": 0.4323, "step": 18950 }, { "epoch": 0.15167878656970743, "grad_norm": 0.702650376684448, "learning_rate": 9.918912590872009e-06, "loss": 0.4311, "step": 18960 }, { "epoch": 0.15175878592971256, "grad_norm": 0.697824508048587, "learning_rate": 9.918661957698497e-06, "loss": 0.4383, "step": 18970 }, { "epoch": 0.15183878528971767, "grad_norm": 0.6191201251804641, "learning_rate": 9.918410940957599e-06, "loss": 0.3928, "step": 18980 }, { "epoch": 0.1519187846497228, "grad_norm": 0.688628594544192, "learning_rate": 9.918159540668888e-06, "loss": 0.4275, "step": 18990 }, { "epoch": 0.1519987840097279, "grad_norm": 0.6095867063854213, "learning_rate": 9.917907756851973e-06, "loss": 0.4349, "step": 19000 }, { "epoch": 0.15207878336973304, "grad_norm": 0.6864628169825447, "learning_rate": 9.917655589526481e-06, "loss": 0.4326, "step": 19010 }, { "epoch": 0.15215878272973815, "grad_norm": 0.6578206560322402, "learning_rate": 9.917403038712083e-06, "loss": 0.4282, "step": 19020 }, { "epoch": 0.15223878208974329, "grad_norm": 0.6007826287889669, "learning_rate": 9.917150104428472e-06, "loss": 0.4244, "step": 19030 }, { "epoch": 0.1523187814497484, "grad_norm": 0.7916257122723118, "learning_rate": 9.91689678669537e-06, "loss": 0.414, "step": 19040 }, { "epoch": 0.15239878080975353, "grad_norm": 0.6624416252215327, "learning_rate": 9.916643085532533e-06, "loss": 0.432, "step": 19050 }, { "epoch": 0.15247878016975863, "grad_norm": 0.7156915349783781, "learning_rate": 9.916389000959746e-06, "loss": 0.4225, "step": 19060 }, { "epoch": 0.15255877952976377, "grad_norm": 0.6844939685781536, "learning_rate": 9.916134532996822e-06, "loss": 0.4119, "step": 19070 }, { "epoch": 0.15263877888976887, "grad_norm": 0.6826821546519932, "learning_rate": 9.915879681663605e-06, "loss": 0.4215, "step": 19080 }, { "epoch": 0.152718778249774, "grad_norm": 0.6695060667837924, "learning_rate": 9.915624446979968e-06, "loss": 0.4416, "step": 19090 }, { "epoch": 0.1527987776097791, "grad_norm": 1.059781334793182, "learning_rate": 9.915368828965817e-06, "loss": 0.4305, "step": 19100 }, { "epoch": 0.15287877696978425, "grad_norm": 0.6418531083575952, "learning_rate": 9.915112827641082e-06, "loss": 0.4103, "step": 19110 }, { "epoch": 0.15295877632978935, "grad_norm": 0.6196329782818689, "learning_rate": 9.914856443025731e-06, "loss": 0.4016, "step": 19120 }, { "epoch": 0.1530387756897945, "grad_norm": 0.5942223207665291, "learning_rate": 9.914599675139754e-06, "loss": 0.4312, "step": 19130 }, { "epoch": 0.1531187750497996, "grad_norm": 0.6690808905341676, "learning_rate": 9.914342524003176e-06, "loss": 0.4314, "step": 19140 }, { "epoch": 0.15319877440980473, "grad_norm": 0.65167713784443, "learning_rate": 9.914084989636048e-06, "loss": 0.4149, "step": 19150 }, { "epoch": 0.15327877376980983, "grad_norm": 0.6377924565542356, "learning_rate": 9.913827072058457e-06, "loss": 0.3861, "step": 19160 }, { "epoch": 0.15335877312981497, "grad_norm": 0.7711351843113318, "learning_rate": 9.913568771290511e-06, "loss": 0.4052, "step": 19170 }, { "epoch": 0.15343877248982007, "grad_norm": 0.7151603981022304, "learning_rate": 9.913310087352358e-06, "loss": 0.4276, "step": 19180 }, { "epoch": 0.1535187718498252, "grad_norm": 0.6076388209561798, "learning_rate": 9.913051020264166e-06, "loss": 0.3953, "step": 19190 }, { "epoch": 0.15359877120983031, "grad_norm": 0.6790042554540082, "learning_rate": 9.912791570046141e-06, "loss": 0.4121, "step": 19200 }, { "epoch": 0.15367877056983545, "grad_norm": 1.4111220401556113, "learning_rate": 9.912531736718512e-06, "loss": 0.4148, "step": 19210 }, { "epoch": 0.15375876992984056, "grad_norm": 0.6197086659054074, "learning_rate": 9.912271520301545e-06, "loss": 0.4345, "step": 19220 }, { "epoch": 0.1538387692898457, "grad_norm": 0.7296811802032571, "learning_rate": 9.912010920815531e-06, "loss": 0.4299, "step": 19230 }, { "epoch": 0.1539187686498508, "grad_norm": 0.7008581262429644, "learning_rate": 9.91174993828079e-06, "loss": 0.4411, "step": 19240 }, { "epoch": 0.15399876800985593, "grad_norm": 0.6782707426643735, "learning_rate": 9.911488572717679e-06, "loss": 0.4175, "step": 19250 }, { "epoch": 0.15407876736986104, "grad_norm": 0.613768385187734, "learning_rate": 9.911226824146573e-06, "loss": 0.4379, "step": 19260 }, { "epoch": 0.15415876672986617, "grad_norm": 0.6792812530147077, "learning_rate": 9.91096469258789e-06, "loss": 0.4163, "step": 19270 }, { "epoch": 0.15423876608987128, "grad_norm": 0.6340420390296784, "learning_rate": 9.910702178062069e-06, "loss": 0.4293, "step": 19280 }, { "epoch": 0.1543187654498764, "grad_norm": 0.6691451864118001, "learning_rate": 9.910439280589578e-06, "loss": 0.4336, "step": 19290 }, { "epoch": 0.15439876480988152, "grad_norm": 0.7092221077905225, "learning_rate": 9.910176000190924e-06, "loss": 0.4429, "step": 19300 }, { "epoch": 0.15447876416988665, "grad_norm": 0.6636126281408945, "learning_rate": 9.909912336886636e-06, "loss": 0.4223, "step": 19310 }, { "epoch": 0.15455876352989176, "grad_norm": 0.7437527205652605, "learning_rate": 9.909648290697273e-06, "loss": 0.4235, "step": 19320 }, { "epoch": 0.1546387628898969, "grad_norm": 0.6235975551538735, "learning_rate": 9.909383861643428e-06, "loss": 0.4175, "step": 19330 }, { "epoch": 0.154718762249902, "grad_norm": 0.6525941577497524, "learning_rate": 9.909119049745722e-06, "loss": 0.4055, "step": 19340 }, { "epoch": 0.15479876160990713, "grad_norm": 0.6924315230904519, "learning_rate": 9.908853855024805e-06, "loss": 0.416, "step": 19350 }, { "epoch": 0.15487876096991224, "grad_norm": 0.7436902046406285, "learning_rate": 9.908588277501357e-06, "loss": 0.4501, "step": 19360 }, { "epoch": 0.15495876032991737, "grad_norm": 0.6747585227996752, "learning_rate": 9.908322317196086e-06, "loss": 0.4303, "step": 19370 }, { "epoch": 0.15503875968992248, "grad_norm": 0.6505401952199028, "learning_rate": 9.908055974129737e-06, "loss": 0.3987, "step": 19380 }, { "epoch": 0.1551187590499276, "grad_norm": 0.7232210033377714, "learning_rate": 9.907789248323079e-06, "loss": 0.4105, "step": 19390 }, { "epoch": 0.15519875840993272, "grad_norm": 0.6065449758901142, "learning_rate": 9.907522139796908e-06, "loss": 0.4087, "step": 19400 }, { "epoch": 0.15527875776993785, "grad_norm": 0.6332443985863848, "learning_rate": 9.907254648572057e-06, "loss": 0.3907, "step": 19410 }, { "epoch": 0.15535875712994296, "grad_norm": 0.6416739913646538, "learning_rate": 9.906986774669385e-06, "loss": 0.4232, "step": 19420 }, { "epoch": 0.1554387564899481, "grad_norm": 0.7050298830362252, "learning_rate": 9.906718518109781e-06, "loss": 0.4424, "step": 19430 }, { "epoch": 0.1555187558499532, "grad_norm": 0.6945183375155903, "learning_rate": 9.906449878914164e-06, "loss": 0.4361, "step": 19440 }, { "epoch": 0.15559875520995833, "grad_norm": 0.6841655646851803, "learning_rate": 9.906180857103482e-06, "loss": 0.4212, "step": 19450 }, { "epoch": 0.15567875456996344, "grad_norm": 0.6358134161020284, "learning_rate": 9.905911452698718e-06, "loss": 0.4263, "step": 19460 }, { "epoch": 0.15575875392996855, "grad_norm": 0.6028520209460554, "learning_rate": 9.905641665720875e-06, "loss": 0.4296, "step": 19470 }, { "epoch": 0.15583875328997368, "grad_norm": 0.8251075930376949, "learning_rate": 9.905371496190997e-06, "loss": 0.4498, "step": 19480 }, { "epoch": 0.1559187526499788, "grad_norm": 0.6788543075363247, "learning_rate": 9.905100944130148e-06, "loss": 0.4189, "step": 19490 }, { "epoch": 0.15599875200998392, "grad_norm": 2.8684315630517347, "learning_rate": 9.904830009559428e-06, "loss": 0.4645, "step": 19500 }, { "epoch": 0.15607875136998903, "grad_norm": 0.5983336818178295, "learning_rate": 9.904558692499965e-06, "loss": 0.4123, "step": 19510 }, { "epoch": 0.15615875072999416, "grad_norm": 0.6735393274477663, "learning_rate": 9.904286992972919e-06, "loss": 0.4141, "step": 19520 }, { "epoch": 0.15623875008999927, "grad_norm": 0.6794216799547581, "learning_rate": 9.904014910999473e-06, "loss": 0.3999, "step": 19530 }, { "epoch": 0.1563187494500044, "grad_norm": 0.7071465076188671, "learning_rate": 9.903742446600848e-06, "loss": 0.4117, "step": 19540 }, { "epoch": 0.1563987488100095, "grad_norm": 0.8261308088635718, "learning_rate": 9.90346959979829e-06, "loss": 0.4496, "step": 19550 }, { "epoch": 0.15647874817001464, "grad_norm": 0.6008300144263209, "learning_rate": 9.903196370613077e-06, "loss": 0.4311, "step": 19560 }, { "epoch": 0.15655874753001975, "grad_norm": 0.723558702178556, "learning_rate": 9.902922759066516e-06, "loss": 0.417, "step": 19570 }, { "epoch": 0.15663874689002488, "grad_norm": 0.6051032090916167, "learning_rate": 9.902648765179942e-06, "loss": 0.4407, "step": 19580 }, { "epoch": 0.15671874625003, "grad_norm": 0.6343354970598317, "learning_rate": 9.902374388974725e-06, "loss": 0.4099, "step": 19590 }, { "epoch": 0.15679874561003512, "grad_norm": 0.7627634194753621, "learning_rate": 9.902099630472257e-06, "loss": 0.4279, "step": 19600 }, { "epoch": 0.15687874497004023, "grad_norm": 0.6540801322169564, "learning_rate": 9.901824489693968e-06, "loss": 0.4044, "step": 19610 }, { "epoch": 0.15695874433004536, "grad_norm": 0.6359347984458683, "learning_rate": 9.901548966661312e-06, "loss": 0.4206, "step": 19620 }, { "epoch": 0.15703874369005047, "grad_norm": 0.7036453969421722, "learning_rate": 9.901273061395776e-06, "loss": 0.4249, "step": 19630 }, { "epoch": 0.1571187430500556, "grad_norm": 0.6879714131878684, "learning_rate": 9.900996773918876e-06, "loss": 0.4134, "step": 19640 }, { "epoch": 0.1571987424100607, "grad_norm": 0.6221043234635352, "learning_rate": 9.900720104252156e-06, "loss": 0.4236, "step": 19650 }, { "epoch": 0.15727874177006584, "grad_norm": 0.6479579837042654, "learning_rate": 9.90044305241719e-06, "loss": 0.4062, "step": 19660 }, { "epoch": 0.15735874113007095, "grad_norm": 0.6818804034945459, "learning_rate": 9.900165618435587e-06, "loss": 0.4192, "step": 19670 }, { "epoch": 0.15743874049007608, "grad_norm": 0.6666552286978819, "learning_rate": 9.899887802328979e-06, "loss": 0.4233, "step": 19680 }, { "epoch": 0.1575187398500812, "grad_norm": 0.5720731978142004, "learning_rate": 9.899609604119032e-06, "loss": 0.4215, "step": 19690 }, { "epoch": 0.15759873921008632, "grad_norm": 0.6373352262490029, "learning_rate": 9.89933102382744e-06, "loss": 0.4131, "step": 19700 }, { "epoch": 0.15767873857009143, "grad_norm": 0.7549057169058867, "learning_rate": 9.899052061475927e-06, "loss": 0.4069, "step": 19710 }, { "epoch": 0.15775873793009657, "grad_norm": 0.7076921899428039, "learning_rate": 9.898772717086247e-06, "loss": 0.419, "step": 19720 }, { "epoch": 0.15783873729010167, "grad_norm": 0.6374567869611129, "learning_rate": 9.898492990680184e-06, "loss": 0.4037, "step": 19730 }, { "epoch": 0.1579187366501068, "grad_norm": 0.6705022951663401, "learning_rate": 9.898212882279553e-06, "loss": 0.4102, "step": 19740 }, { "epoch": 0.1579987360101119, "grad_norm": 0.6624905234267924, "learning_rate": 9.897932391906195e-06, "loss": 0.4255, "step": 19750 }, { "epoch": 0.15807873537011705, "grad_norm": 0.6963037739171467, "learning_rate": 9.897651519581986e-06, "loss": 0.4161, "step": 19760 }, { "epoch": 0.15815873473012215, "grad_norm": 0.7573234723866183, "learning_rate": 9.897370265328824e-06, "loss": 0.4129, "step": 19770 }, { "epoch": 0.1582387340901273, "grad_norm": 0.687526631594494, "learning_rate": 9.897088629168649e-06, "loss": 0.408, "step": 19780 }, { "epoch": 0.1583187334501324, "grad_norm": 0.6907017538656096, "learning_rate": 9.896806611123419e-06, "loss": 0.4091, "step": 19790 }, { "epoch": 0.15839873281013753, "grad_norm": 0.6968912270893669, "learning_rate": 9.896524211215126e-06, "loss": 0.4236, "step": 19800 }, { "epoch": 0.15847873217014263, "grad_norm": 0.8343162270870814, "learning_rate": 9.896241429465793e-06, "loss": 0.43, "step": 19810 }, { "epoch": 0.15855873153014777, "grad_norm": 0.6230944209703888, "learning_rate": 9.895958265897474e-06, "loss": 0.4113, "step": 19820 }, { "epoch": 0.15863873089015287, "grad_norm": 0.619499934851276, "learning_rate": 9.895674720532247e-06, "loss": 0.4043, "step": 19830 }, { "epoch": 0.158718730250158, "grad_norm": 0.6965984390126737, "learning_rate": 9.895390793392227e-06, "loss": 0.4172, "step": 19840 }, { "epoch": 0.15879872961016311, "grad_norm": 0.6660943015807749, "learning_rate": 9.895106484499552e-06, "loss": 0.4337, "step": 19850 }, { "epoch": 0.15887872897016825, "grad_norm": 0.6201646071197764, "learning_rate": 9.894821793876396e-06, "loss": 0.3848, "step": 19860 }, { "epoch": 0.15895872833017335, "grad_norm": 0.7074211945493103, "learning_rate": 9.894536721544957e-06, "loss": 0.4324, "step": 19870 }, { "epoch": 0.1590387276901785, "grad_norm": 0.6884715534110039, "learning_rate": 9.894251267527468e-06, "loss": 0.3996, "step": 19880 }, { "epoch": 0.1591187270501836, "grad_norm": 2.1261052030237866, "learning_rate": 9.893965431846187e-06, "loss": 0.4314, "step": 19890 }, { "epoch": 0.15919872641018873, "grad_norm": 0.7032645027805966, "learning_rate": 9.893679214523405e-06, "loss": 0.3893, "step": 19900 }, { "epoch": 0.15927872577019384, "grad_norm": 0.6616461132108233, "learning_rate": 9.893392615581442e-06, "loss": 0.4259, "step": 19910 }, { "epoch": 0.15935872513019897, "grad_norm": 0.6869940731148656, "learning_rate": 9.89310563504265e-06, "loss": 0.4377, "step": 19920 }, { "epoch": 0.15943872449020408, "grad_norm": 0.6784485815296981, "learning_rate": 9.892818272929403e-06, "loss": 0.4147, "step": 19930 }, { "epoch": 0.1595187238502092, "grad_norm": 0.6933884292063681, "learning_rate": 9.892530529264113e-06, "loss": 0.4295, "step": 19940 }, { "epoch": 0.15959872321021432, "grad_norm": 0.5777650118898544, "learning_rate": 9.89224240406922e-06, "loss": 0.4162, "step": 19950 }, { "epoch": 0.15967872257021945, "grad_norm": 1.451560361439652, "learning_rate": 9.891953897367192e-06, "loss": 0.4116, "step": 19960 }, { "epoch": 0.15975872193022456, "grad_norm": 0.7147839332767886, "learning_rate": 9.891665009180526e-06, "loss": 0.4136, "step": 19970 }, { "epoch": 0.1598387212902297, "grad_norm": 0.6974144751197976, "learning_rate": 9.89137573953175e-06, "loss": 0.4093, "step": 19980 }, { "epoch": 0.1599187206502348, "grad_norm": 0.6711069686124641, "learning_rate": 9.891086088443424e-06, "loss": 0.4188, "step": 19990 }, { "epoch": 0.15999872001023993, "grad_norm": 0.6342333531855742, "learning_rate": 9.890796055938131e-06, "loss": 0.4196, "step": 20000 }, { "epoch": 0.16007871937024504, "grad_norm": 0.780835540774204, "learning_rate": 9.890505642038495e-06, "loss": 0.4219, "step": 20010 }, { "epoch": 0.16015871873025017, "grad_norm": 0.8254057721811602, "learning_rate": 9.89021484676716e-06, "loss": 0.4271, "step": 20020 }, { "epoch": 0.16023871809025528, "grad_norm": 0.699660982733276, "learning_rate": 9.889923670146802e-06, "loss": 0.4251, "step": 20030 }, { "epoch": 0.1603187174502604, "grad_norm": 0.757023045698631, "learning_rate": 9.889632112200127e-06, "loss": 0.4259, "step": 20040 }, { "epoch": 0.16039871681026552, "grad_norm": 0.6675579750083735, "learning_rate": 9.889340172949872e-06, "loss": 0.4106, "step": 20050 }, { "epoch": 0.16047871617027062, "grad_norm": 0.6285765297189194, "learning_rate": 9.889047852418804e-06, "loss": 0.4164, "step": 20060 }, { "epoch": 0.16055871553027576, "grad_norm": 0.6786769093011259, "learning_rate": 9.888755150629718e-06, "loss": 0.4097, "step": 20070 }, { "epoch": 0.16063871489028086, "grad_norm": 0.588957147656328, "learning_rate": 9.888462067605441e-06, "loss": 0.4311, "step": 20080 }, { "epoch": 0.160718714250286, "grad_norm": 0.6950250713687153, "learning_rate": 9.888168603368826e-06, "loss": 0.4368, "step": 20090 }, { "epoch": 0.1607987136102911, "grad_norm": 0.7600871409637181, "learning_rate": 9.88787475794276e-06, "loss": 0.4359, "step": 20100 }, { "epoch": 0.16087871297029624, "grad_norm": 0.7943995964549239, "learning_rate": 9.887580531350153e-06, "loss": 0.4318, "step": 20110 }, { "epoch": 0.16095871233030135, "grad_norm": 0.6541345064158348, "learning_rate": 9.887285923613957e-06, "loss": 0.3964, "step": 20120 }, { "epoch": 0.16103871169030648, "grad_norm": 0.609186507268122, "learning_rate": 9.886990934757137e-06, "loss": 0.4292, "step": 20130 }, { "epoch": 0.16111871105031159, "grad_norm": 0.7514983688538684, "learning_rate": 9.886695564802707e-06, "loss": 0.4392, "step": 20140 }, { "epoch": 0.16119871041031672, "grad_norm": 0.6127638806823319, "learning_rate": 9.886399813773692e-06, "loss": 0.4165, "step": 20150 }, { "epoch": 0.16127870977032183, "grad_norm": 0.7147071813905149, "learning_rate": 9.88610368169316e-06, "loss": 0.4497, "step": 20160 }, { "epoch": 0.16135870913032696, "grad_norm": 0.6971525588698978, "learning_rate": 9.885807168584202e-06, "loss": 0.4088, "step": 20170 }, { "epoch": 0.16143870849033207, "grad_norm": 0.6527547710371207, "learning_rate": 9.88551027446994e-06, "loss": 0.4113, "step": 20180 }, { "epoch": 0.1615187078503372, "grad_norm": 0.8253877356257711, "learning_rate": 9.88521299937353e-06, "loss": 0.4321, "step": 20190 }, { "epoch": 0.1615987072103423, "grad_norm": 0.6485164487053928, "learning_rate": 9.884915343318151e-06, "loss": 0.4476, "step": 20200 }, { "epoch": 0.16167870657034744, "grad_norm": 0.6790829642369671, "learning_rate": 9.884617306327016e-06, "loss": 0.4264, "step": 20210 }, { "epoch": 0.16175870593035255, "grad_norm": 0.6246860321293612, "learning_rate": 9.884318888423365e-06, "loss": 0.3888, "step": 20220 }, { "epoch": 0.16183870529035768, "grad_norm": 0.7052765511324864, "learning_rate": 9.884020089630472e-06, "loss": 0.4, "step": 20230 }, { "epoch": 0.1619187046503628, "grad_norm": 0.7689990996229819, "learning_rate": 9.883720909971637e-06, "loss": 0.4293, "step": 20240 }, { "epoch": 0.16199870401036792, "grad_norm": 0.5869335267478856, "learning_rate": 9.883421349470188e-06, "loss": 0.404, "step": 20250 }, { "epoch": 0.16207870337037303, "grad_norm": 0.6761344257538274, "learning_rate": 9.883121408149488e-06, "loss": 0.4207, "step": 20260 }, { "epoch": 0.16215870273037816, "grad_norm": 0.684907510438804, "learning_rate": 9.882821086032929e-06, "loss": 0.4384, "step": 20270 }, { "epoch": 0.16223870209038327, "grad_norm": 0.5715802217609504, "learning_rate": 9.882520383143924e-06, "loss": 0.3905, "step": 20280 }, { "epoch": 0.1623187014503884, "grad_norm": 0.7006832264529822, "learning_rate": 9.88221929950593e-06, "loss": 0.4178, "step": 20290 }, { "epoch": 0.1623987008103935, "grad_norm": 0.6638707026058015, "learning_rate": 9.88191783514242e-06, "loss": 0.4092, "step": 20300 }, { "epoch": 0.16247870017039864, "grad_norm": 0.6458501556077648, "learning_rate": 9.881615990076907e-06, "loss": 0.4196, "step": 20310 }, { "epoch": 0.16255869953040375, "grad_norm": 0.6394392948865875, "learning_rate": 9.881313764332928e-06, "loss": 0.4102, "step": 20320 }, { "epoch": 0.16263869889040888, "grad_norm": 0.6681630869657679, "learning_rate": 9.88101115793405e-06, "loss": 0.4013, "step": 20330 }, { "epoch": 0.162718698250414, "grad_norm": 0.6456403911518246, "learning_rate": 9.880708170903875e-06, "loss": 0.414, "step": 20340 }, { "epoch": 0.16279869761041912, "grad_norm": 0.5612343088706239, "learning_rate": 9.880404803266027e-06, "loss": 0.4165, "step": 20350 }, { "epoch": 0.16287869697042423, "grad_norm": 0.5601409095360574, "learning_rate": 9.880101055044162e-06, "loss": 0.4118, "step": 20360 }, { "epoch": 0.16295869633042936, "grad_norm": 0.6339092054278243, "learning_rate": 9.879796926261969e-06, "loss": 0.4097, "step": 20370 }, { "epoch": 0.16303869569043447, "grad_norm": 0.7998427221813685, "learning_rate": 9.879492416943165e-06, "loss": 0.4106, "step": 20380 }, { "epoch": 0.1631186950504396, "grad_norm": 0.6740427799839903, "learning_rate": 9.879187527111496e-06, "loss": 0.4158, "step": 20390 }, { "epoch": 0.1631986944104447, "grad_norm": 0.7451770261928884, "learning_rate": 9.878882256790735e-06, "loss": 0.4207, "step": 20400 }, { "epoch": 0.16327869377044985, "grad_norm": 0.6634127098754018, "learning_rate": 9.878576606004694e-06, "loss": 0.4155, "step": 20410 }, { "epoch": 0.16335869313045495, "grad_norm": 0.631512060321296, "learning_rate": 9.878270574777203e-06, "loss": 0.3992, "step": 20420 }, { "epoch": 0.16343869249046009, "grad_norm": 0.7391960273176623, "learning_rate": 9.877964163132128e-06, "loss": 0.4504, "step": 20430 }, { "epoch": 0.1635186918504652, "grad_norm": 0.7619721365424883, "learning_rate": 9.877657371093365e-06, "loss": 0.4217, "step": 20440 }, { "epoch": 0.16359869121047033, "grad_norm": 0.6250523695420881, "learning_rate": 9.877350198684837e-06, "loss": 0.4137, "step": 20450 }, { "epoch": 0.16367869057047543, "grad_norm": 0.677053532146452, "learning_rate": 9.877042645930497e-06, "loss": 0.4279, "step": 20460 }, { "epoch": 0.16375868993048057, "grad_norm": 0.7175157875508115, "learning_rate": 9.87673471285433e-06, "loss": 0.4278, "step": 20470 }, { "epoch": 0.16383868929048567, "grad_norm": 0.8022673899838942, "learning_rate": 9.87642639948035e-06, "loss": 0.4092, "step": 20480 }, { "epoch": 0.1639186886504908, "grad_norm": 0.6974903513010864, "learning_rate": 9.876117705832599e-06, "loss": 0.4311, "step": 20490 }, { "epoch": 0.1639986880104959, "grad_norm": 0.6392725248189788, "learning_rate": 9.87580863193515e-06, "loss": 0.415, "step": 20500 }, { "epoch": 0.16407868737050105, "grad_norm": 0.6441065385219644, "learning_rate": 9.875499177812105e-06, "loss": 0.4113, "step": 20510 }, { "epoch": 0.16415868673050615, "grad_norm": 0.6466402340757798, "learning_rate": 9.875189343487594e-06, "loss": 0.4072, "step": 20520 }, { "epoch": 0.1642386860905113, "grad_norm": 0.6881311013969161, "learning_rate": 9.874879128985781e-06, "loss": 0.4193, "step": 20530 }, { "epoch": 0.1643186854505164, "grad_norm": 0.6642112666770111, "learning_rate": 9.874568534330858e-06, "loss": 0.4281, "step": 20540 }, { "epoch": 0.16439868481052153, "grad_norm": 0.766826325044091, "learning_rate": 9.874257559547041e-06, "loss": 0.4113, "step": 20550 }, { "epoch": 0.16447868417052663, "grad_norm": 0.7121511768285113, "learning_rate": 9.873946204658586e-06, "loss": 0.4017, "step": 20560 }, { "epoch": 0.16455868353053177, "grad_norm": 0.6453462721430974, "learning_rate": 9.87363446968977e-06, "loss": 0.4304, "step": 20570 }, { "epoch": 0.16463868289053687, "grad_norm": 0.6771364428273431, "learning_rate": 9.873322354664903e-06, "loss": 0.4453, "step": 20580 }, { "epoch": 0.164718682250542, "grad_norm": 0.625734362419927, "learning_rate": 9.873009859608325e-06, "loss": 0.4129, "step": 20590 }, { "epoch": 0.16479868161054712, "grad_norm": 0.6616539762961875, "learning_rate": 9.872696984544405e-06, "loss": 0.4022, "step": 20600 }, { "epoch": 0.16487868097055225, "grad_norm": 0.6637604412092278, "learning_rate": 9.872383729497542e-06, "loss": 0.4275, "step": 20610 }, { "epoch": 0.16495868033055736, "grad_norm": 0.6359333279308668, "learning_rate": 9.872070094492163e-06, "loss": 0.437, "step": 20620 }, { "epoch": 0.16503867969056246, "grad_norm": 0.6004857625618191, "learning_rate": 9.871756079552727e-06, "loss": 0.3928, "step": 20630 }, { "epoch": 0.1651186790505676, "grad_norm": 0.851439896948023, "learning_rate": 9.87144168470372e-06, "loss": 0.4379, "step": 20640 }, { "epoch": 0.1651986784105727, "grad_norm": 0.6354804499570842, "learning_rate": 9.87112690996966e-06, "loss": 0.4212, "step": 20650 }, { "epoch": 0.16527867777057784, "grad_norm": 0.6471895447390291, "learning_rate": 9.870811755375098e-06, "loss": 0.398, "step": 20660 }, { "epoch": 0.16535867713058294, "grad_norm": 0.6729764795940805, "learning_rate": 9.870496220944603e-06, "loss": 0.4218, "step": 20670 }, { "epoch": 0.16543867649058808, "grad_norm": 0.6696774124035492, "learning_rate": 9.870180306702786e-06, "loss": 0.4279, "step": 20680 }, { "epoch": 0.16551867585059318, "grad_norm": 0.6526007720451934, "learning_rate": 9.869864012674281e-06, "loss": 0.4001, "step": 20690 }, { "epoch": 0.16559867521059832, "grad_norm": 0.6071604340036563, "learning_rate": 9.869547338883753e-06, "loss": 0.421, "step": 20700 }, { "epoch": 0.16567867457060342, "grad_norm": 0.6550218062200076, "learning_rate": 9.869230285355897e-06, "loss": 0.4006, "step": 20710 }, { "epoch": 0.16575867393060856, "grad_norm": 0.6906618657556405, "learning_rate": 9.868912852115437e-06, "loss": 0.4202, "step": 20720 }, { "epoch": 0.16583867329061366, "grad_norm": 0.6958240428401097, "learning_rate": 9.86859503918713e-06, "loss": 0.4306, "step": 20730 }, { "epoch": 0.1659186726506188, "grad_norm": 0.7166017745351712, "learning_rate": 9.868276846595757e-06, "loss": 0.4003, "step": 20740 }, { "epoch": 0.1659986720106239, "grad_norm": 0.7267176672830176, "learning_rate": 9.867958274366131e-06, "loss": 0.4224, "step": 20750 }, { "epoch": 0.16607867137062904, "grad_norm": 0.7613273619737106, "learning_rate": 9.867639322523097e-06, "loss": 0.434, "step": 20760 }, { "epoch": 0.16615867073063414, "grad_norm": 0.6957624422057648, "learning_rate": 9.867319991091527e-06, "loss": 0.4228, "step": 20770 }, { "epoch": 0.16623867009063928, "grad_norm": 0.6466411512203996, "learning_rate": 9.86700028009632e-06, "loss": 0.4073, "step": 20780 }, { "epoch": 0.16631866945064439, "grad_norm": 0.6110542071627687, "learning_rate": 9.866680189562411e-06, "loss": 0.4179, "step": 20790 }, { "epoch": 0.16639866881064952, "grad_norm": 0.7167634355164806, "learning_rate": 9.866359719514762e-06, "loss": 0.4252, "step": 20800 }, { "epoch": 0.16647866817065463, "grad_norm": 0.6433445727088574, "learning_rate": 9.866038869978362e-06, "loss": 0.3981, "step": 20810 }, { "epoch": 0.16655866753065976, "grad_norm": 0.55203968629429, "learning_rate": 9.865717640978231e-06, "loss": 0.4265, "step": 20820 }, { "epoch": 0.16663866689066487, "grad_norm": 0.7093269675596925, "learning_rate": 9.86539603253942e-06, "loss": 0.3928, "step": 20830 }, { "epoch": 0.16671866625067, "grad_norm": 0.6420163096057138, "learning_rate": 9.86507404468701e-06, "loss": 0.4185, "step": 20840 }, { "epoch": 0.1667986656106751, "grad_norm": 0.7088283905917728, "learning_rate": 9.864751677446108e-06, "loss": 0.4195, "step": 20850 }, { "epoch": 0.16687866497068024, "grad_norm": 0.663289380242178, "learning_rate": 9.864428930841855e-06, "loss": 0.4238, "step": 20860 }, { "epoch": 0.16695866433068535, "grad_norm": 0.6404454211279905, "learning_rate": 9.864105804899417e-06, "loss": 0.4397, "step": 20870 }, { "epoch": 0.16703866369069048, "grad_norm": 0.6334100044866823, "learning_rate": 9.863782299643993e-06, "loss": 0.4302, "step": 20880 }, { "epoch": 0.1671186630506956, "grad_norm": 0.6827872074992797, "learning_rate": 9.863458415100813e-06, "loss": 0.4208, "step": 20890 }, { "epoch": 0.16719866241070072, "grad_norm": 0.5918952231187546, "learning_rate": 9.863134151295131e-06, "loss": 0.4231, "step": 20900 }, { "epoch": 0.16727866177070583, "grad_norm": 0.6576998248532123, "learning_rate": 9.862809508252233e-06, "loss": 0.4178, "step": 20910 }, { "epoch": 0.16735866113071096, "grad_norm": 0.6642129320316221, "learning_rate": 9.86248448599744e-06, "loss": 0.3956, "step": 20920 }, { "epoch": 0.16743866049071607, "grad_norm": 0.6203056675049313, "learning_rate": 9.862159084556094e-06, "loss": 0.4132, "step": 20930 }, { "epoch": 0.1675186598507212, "grad_norm": 0.5993274699529821, "learning_rate": 9.861833303953571e-06, "loss": 0.4027, "step": 20940 }, { "epoch": 0.1675986592107263, "grad_norm": 0.7352606683860438, "learning_rate": 9.861507144215279e-06, "loss": 0.4497, "step": 20950 }, { "epoch": 0.16767865857073144, "grad_norm": 0.6713334328622578, "learning_rate": 9.861180605366646e-06, "loss": 0.449, "step": 20960 }, { "epoch": 0.16775865793073655, "grad_norm": 0.6120524782499914, "learning_rate": 9.860853687433145e-06, "loss": 0.4058, "step": 20970 }, { "epoch": 0.16783865729074168, "grad_norm": 0.6521575522229164, "learning_rate": 9.860526390440263e-06, "loss": 0.4112, "step": 20980 }, { "epoch": 0.1679186566507468, "grad_norm": 0.6489227848611745, "learning_rate": 9.860198714413526e-06, "loss": 0.4036, "step": 20990 }, { "epoch": 0.16799865601075192, "grad_norm": 0.6222815847223343, "learning_rate": 9.859870659378485e-06, "loss": 0.4149, "step": 21000 }, { "epoch": 0.16807865537075703, "grad_norm": 0.6269812228899984, "learning_rate": 9.859542225360725e-06, "loss": 0.4168, "step": 21010 }, { "epoch": 0.16815865473076216, "grad_norm": 0.6360890760308789, "learning_rate": 9.859213412385857e-06, "loss": 0.4386, "step": 21020 }, { "epoch": 0.16823865409076727, "grad_norm": 0.6635615865986949, "learning_rate": 9.858884220479521e-06, "loss": 0.4575, "step": 21030 }, { "epoch": 0.1683186534507724, "grad_norm": 0.6091521878376732, "learning_rate": 9.858554649667391e-06, "loss": 0.4097, "step": 21040 }, { "epoch": 0.1683986528107775, "grad_norm": 0.6958811940680891, "learning_rate": 9.858224699975163e-06, "loss": 0.4232, "step": 21050 }, { "epoch": 0.16847865217078264, "grad_norm": 0.6674873650954012, "learning_rate": 9.857894371428572e-06, "loss": 0.4188, "step": 21060 }, { "epoch": 0.16855865153078775, "grad_norm": 0.6984262246341041, "learning_rate": 9.857563664053376e-06, "loss": 0.401, "step": 21070 }, { "epoch": 0.16863865089079288, "grad_norm": 0.6589178238046456, "learning_rate": 9.857232577875362e-06, "loss": 0.4148, "step": 21080 }, { "epoch": 0.168718650250798, "grad_norm": 0.6768208022662984, "learning_rate": 9.856901112920352e-06, "loss": 0.4074, "step": 21090 }, { "epoch": 0.16879864961080313, "grad_norm": 0.6743988384932385, "learning_rate": 9.856569269214193e-06, "loss": 0.4232, "step": 21100 }, { "epoch": 0.16887864897080823, "grad_norm": 0.709997656678218, "learning_rate": 9.856237046782763e-06, "loss": 0.4349, "step": 21110 }, { "epoch": 0.16895864833081337, "grad_norm": 0.5782495910257708, "learning_rate": 9.85590444565197e-06, "loss": 0.4058, "step": 21120 }, { "epoch": 0.16903864769081847, "grad_norm": 0.702098417950197, "learning_rate": 9.85557146584775e-06, "loss": 0.421, "step": 21130 }, { "epoch": 0.1691186470508236, "grad_norm": 0.5986230280088658, "learning_rate": 9.855238107396069e-06, "loss": 0.4282, "step": 21140 }, { "epoch": 0.1691986464108287, "grad_norm": 0.6245908916954509, "learning_rate": 9.854904370322924e-06, "loss": 0.4068, "step": 21150 }, { "epoch": 0.16927864577083385, "grad_norm": 0.6488672850064721, "learning_rate": 9.85457025465434e-06, "loss": 0.4359, "step": 21160 }, { "epoch": 0.16935864513083895, "grad_norm": 0.7165827335052202, "learning_rate": 9.854235760416371e-06, "loss": 0.4224, "step": 21170 }, { "epoch": 0.1694386444908441, "grad_norm": 0.7010549893358896, "learning_rate": 9.853900887635105e-06, "loss": 0.3951, "step": 21180 }, { "epoch": 0.1695186438508492, "grad_norm": 0.7336919955745709, "learning_rate": 9.853565636336654e-06, "loss": 0.4018, "step": 21190 }, { "epoch": 0.1695986432108543, "grad_norm": 0.624707017318217, "learning_rate": 9.85323000654716e-06, "loss": 0.4053, "step": 21200 }, { "epoch": 0.16967864257085943, "grad_norm": 0.6428943357119492, "learning_rate": 9.852893998292797e-06, "loss": 0.4081, "step": 21210 }, { "epoch": 0.16975864193086454, "grad_norm": 0.6797274280567792, "learning_rate": 9.852557611599767e-06, "loss": 0.4134, "step": 21220 }, { "epoch": 0.16983864129086967, "grad_norm": 0.6248927949625309, "learning_rate": 9.852220846494305e-06, "loss": 0.4553, "step": 21230 }, { "epoch": 0.16991864065087478, "grad_norm": 0.6314186026621478, "learning_rate": 9.851883703002672e-06, "loss": 0.4332, "step": 21240 }, { "epoch": 0.16999864001087991, "grad_norm": 0.6891424052770855, "learning_rate": 9.851546181151157e-06, "loss": 0.4002, "step": 21250 }, { "epoch": 0.17007863937088502, "grad_norm": 0.6697197267918983, "learning_rate": 9.85120828096608e-06, "loss": 0.4197, "step": 21260 }, { "epoch": 0.17015863873089015, "grad_norm": 0.8654402510148103, "learning_rate": 9.850870002473796e-06, "loss": 0.4316, "step": 21270 }, { "epoch": 0.17023863809089526, "grad_norm": 0.6500430234036061, "learning_rate": 9.850531345700678e-06, "loss": 0.39, "step": 21280 }, { "epoch": 0.1703186374509004, "grad_norm": 0.5842861242903146, "learning_rate": 9.850192310673141e-06, "loss": 0.4117, "step": 21290 }, { "epoch": 0.1703986368109055, "grad_norm": 0.6102854579060022, "learning_rate": 9.84985289741762e-06, "loss": 0.4225, "step": 21300 }, { "epoch": 0.17047863617091064, "grad_norm": 0.6656799137395649, "learning_rate": 9.849513105960586e-06, "loss": 0.4161, "step": 21310 }, { "epoch": 0.17055863553091574, "grad_norm": 0.5709327134017188, "learning_rate": 9.849172936328533e-06, "loss": 0.409, "step": 21320 }, { "epoch": 0.17063863489092088, "grad_norm": 0.650428425258576, "learning_rate": 9.848832388547991e-06, "loss": 0.4207, "step": 21330 }, { "epoch": 0.17071863425092598, "grad_norm": 0.6345631108275166, "learning_rate": 9.848491462645517e-06, "loss": 0.39, "step": 21340 }, { "epoch": 0.17079863361093112, "grad_norm": 0.6473191922482092, "learning_rate": 9.848150158647695e-06, "loss": 0.4055, "step": 21350 }, { "epoch": 0.17087863297093622, "grad_norm": 0.6583798621706369, "learning_rate": 9.847808476581142e-06, "loss": 0.429, "step": 21360 }, { "epoch": 0.17095863233094136, "grad_norm": 0.6639797014684946, "learning_rate": 9.847466416472502e-06, "loss": 0.4307, "step": 21370 }, { "epoch": 0.17103863169094646, "grad_norm": 0.5793763106903596, "learning_rate": 9.84712397834845e-06, "loss": 0.4051, "step": 21380 }, { "epoch": 0.1711186310509516, "grad_norm": 0.5990813221821706, "learning_rate": 9.84678116223569e-06, "loss": 0.4113, "step": 21390 }, { "epoch": 0.1711986304109567, "grad_norm": 0.5974876260588344, "learning_rate": 9.846437968160955e-06, "loss": 0.4516, "step": 21400 }, { "epoch": 0.17127862977096184, "grad_norm": 0.6787908480648698, "learning_rate": 9.84609439615101e-06, "loss": 0.4207, "step": 21410 }, { "epoch": 0.17135862913096694, "grad_norm": 0.6097815471297748, "learning_rate": 9.845750446232645e-06, "loss": 0.4267, "step": 21420 }, { "epoch": 0.17143862849097208, "grad_norm": 0.6875382315011543, "learning_rate": 9.845406118432686e-06, "loss": 0.4225, "step": 21430 }, { "epoch": 0.17151862785097718, "grad_norm": 0.6796653201379381, "learning_rate": 9.845061412777978e-06, "loss": 0.3988, "step": 21440 }, { "epoch": 0.17159862721098232, "grad_norm": 0.6930412900121274, "learning_rate": 9.844716329295406e-06, "loss": 0.4251, "step": 21450 }, { "epoch": 0.17167862657098742, "grad_norm": 0.6795393459898248, "learning_rate": 9.84437086801188e-06, "loss": 0.4643, "step": 21460 }, { "epoch": 0.17175862593099256, "grad_norm": 0.6777441396927966, "learning_rate": 9.844025028954338e-06, "loss": 0.4241, "step": 21470 }, { "epoch": 0.17183862529099767, "grad_norm": 0.739983833361851, "learning_rate": 9.843678812149752e-06, "loss": 0.4285, "step": 21480 }, { "epoch": 0.1719186246510028, "grad_norm": 0.8026813610091998, "learning_rate": 9.84333221762512e-06, "loss": 0.4205, "step": 21490 }, { "epoch": 0.1719986240110079, "grad_norm": 0.6295173925472176, "learning_rate": 9.842985245407469e-06, "loss": 0.4049, "step": 21500 }, { "epoch": 0.17207862337101304, "grad_norm": 0.6480828911640615, "learning_rate": 9.842637895523856e-06, "loss": 0.3995, "step": 21510 }, { "epoch": 0.17215862273101815, "grad_norm": 0.6831783327999864, "learning_rate": 9.84229016800137e-06, "loss": 0.4277, "step": 21520 }, { "epoch": 0.17223862209102328, "grad_norm": 0.6188530683768096, "learning_rate": 9.841942062867125e-06, "loss": 0.4012, "step": 21530 }, { "epoch": 0.1723186214510284, "grad_norm": 0.660468223504737, "learning_rate": 9.841593580148271e-06, "loss": 0.4316, "step": 21540 }, { "epoch": 0.17239862081103352, "grad_norm": 0.7663739728674932, "learning_rate": 9.841244719871979e-06, "loss": 0.4018, "step": 21550 }, { "epoch": 0.17247862017103863, "grad_norm": 0.648199329564841, "learning_rate": 9.840895482065458e-06, "loss": 0.4273, "step": 21560 }, { "epoch": 0.17255861953104376, "grad_norm": 0.6829401703239013, "learning_rate": 9.840545866755938e-06, "loss": 0.4273, "step": 21570 }, { "epoch": 0.17263861889104887, "grad_norm": 0.7966637592267048, "learning_rate": 9.840195873970686e-06, "loss": 0.4195, "step": 21580 }, { "epoch": 0.172718618251054, "grad_norm": 0.7607273394278121, "learning_rate": 9.839845503736994e-06, "loss": 0.4038, "step": 21590 }, { "epoch": 0.1727986176110591, "grad_norm": 0.6748400872580287, "learning_rate": 9.839494756082185e-06, "loss": 0.4392, "step": 21600 }, { "epoch": 0.17287861697106424, "grad_norm": 0.7018101302129129, "learning_rate": 9.83914363103361e-06, "loss": 0.4367, "step": 21610 }, { "epoch": 0.17295861633106935, "grad_norm": 0.6928062171469738, "learning_rate": 9.838792128618649e-06, "loss": 0.4201, "step": 21620 }, { "epoch": 0.17303861569107448, "grad_norm": 0.702322334872589, "learning_rate": 9.838440248864717e-06, "loss": 0.4153, "step": 21630 }, { "epoch": 0.1731186150510796, "grad_norm": 0.6793339648502152, "learning_rate": 9.838087991799253e-06, "loss": 0.4196, "step": 21640 }, { "epoch": 0.17319861441108472, "grad_norm": 0.6406462161288965, "learning_rate": 9.837735357449725e-06, "loss": 0.4053, "step": 21650 }, { "epoch": 0.17327861377108983, "grad_norm": 0.6558773895187269, "learning_rate": 9.837382345843633e-06, "loss": 0.4147, "step": 21660 }, { "epoch": 0.17335861313109496, "grad_norm": 0.6818770527981431, "learning_rate": 9.837028957008506e-06, "loss": 0.4022, "step": 21670 }, { "epoch": 0.17343861249110007, "grad_norm": 0.6650376942748226, "learning_rate": 9.836675190971901e-06, "loss": 0.4076, "step": 21680 }, { "epoch": 0.1735186118511052, "grad_norm": 0.7548751504355825, "learning_rate": 9.836321047761406e-06, "loss": 0.431, "step": 21690 }, { "epoch": 0.1735986112111103, "grad_norm": 0.5992471355750535, "learning_rate": 9.835966527404638e-06, "loss": 0.3858, "step": 21700 }, { "epoch": 0.17367861057111544, "grad_norm": 0.7244035698035862, "learning_rate": 9.835611629929244e-06, "loss": 0.4343, "step": 21710 }, { "epoch": 0.17375860993112055, "grad_norm": 0.5947393053588823, "learning_rate": 9.835256355362898e-06, "loss": 0.4035, "step": 21720 }, { "epoch": 0.17383860929112568, "grad_norm": 0.6545280839345484, "learning_rate": 9.834900703733307e-06, "loss": 0.4338, "step": 21730 }, { "epoch": 0.1739186086511308, "grad_norm": 0.8243175516585616, "learning_rate": 9.834544675068204e-06, "loss": 0.4454, "step": 21740 }, { "epoch": 0.17399860801113592, "grad_norm": 0.6935919200960454, "learning_rate": 9.834188269395353e-06, "loss": 0.4218, "step": 21750 }, { "epoch": 0.17407860737114103, "grad_norm": 0.6999871816281334, "learning_rate": 9.833831486742547e-06, "loss": 0.4078, "step": 21760 }, { "epoch": 0.17415860673114614, "grad_norm": 0.6451209545645427, "learning_rate": 9.83347432713761e-06, "loss": 0.431, "step": 21770 }, { "epoch": 0.17423860609115127, "grad_norm": 0.7671320032611695, "learning_rate": 9.83311679060839e-06, "loss": 0.4326, "step": 21780 }, { "epoch": 0.17431860545115638, "grad_norm": 0.8775087601130925, "learning_rate": 9.832758877182774e-06, "loss": 0.4216, "step": 21790 }, { "epoch": 0.1743986048111615, "grad_norm": 0.6239401369249699, "learning_rate": 9.832400586888671e-06, "loss": 0.4058, "step": 21800 }, { "epoch": 0.17447860417116662, "grad_norm": 0.6387700674412592, "learning_rate": 9.832041919754018e-06, "loss": 0.4052, "step": 21810 }, { "epoch": 0.17455860353117175, "grad_norm": 0.7481127276938104, "learning_rate": 9.831682875806789e-06, "loss": 0.4421, "step": 21820 }, { "epoch": 0.17463860289117686, "grad_norm": 0.6768978416402571, "learning_rate": 9.83132345507498e-06, "loss": 0.4067, "step": 21830 }, { "epoch": 0.174718602251182, "grad_norm": 0.59653741252301, "learning_rate": 9.83096365758662e-06, "loss": 0.425, "step": 21840 }, { "epoch": 0.1747986016111871, "grad_norm": 0.6122587825525018, "learning_rate": 9.83060348336977e-06, "loss": 0.4139, "step": 21850 }, { "epoch": 0.17487860097119223, "grad_norm": 0.6479325926428358, "learning_rate": 9.830242932452511e-06, "loss": 0.4133, "step": 21860 }, { "epoch": 0.17495860033119734, "grad_norm": 0.6863158077187407, "learning_rate": 9.829882004862965e-06, "loss": 0.4388, "step": 21870 }, { "epoch": 0.17503859969120247, "grad_norm": 0.5475445829465457, "learning_rate": 9.829520700629277e-06, "loss": 0.414, "step": 21880 }, { "epoch": 0.17511859905120758, "grad_norm": 0.659778955481443, "learning_rate": 9.829159019779619e-06, "loss": 0.4481, "step": 21890 }, { "epoch": 0.1751985984112127, "grad_norm": 0.8138138869440129, "learning_rate": 9.828796962342198e-06, "loss": 0.4506, "step": 21900 }, { "epoch": 0.17527859777121782, "grad_norm": 0.7907130275329811, "learning_rate": 9.828434528345246e-06, "loss": 0.4211, "step": 21910 }, { "epoch": 0.17535859713122295, "grad_norm": 0.6874854601910443, "learning_rate": 9.82807171781703e-06, "loss": 0.4262, "step": 21920 }, { "epoch": 0.17543859649122806, "grad_norm": 0.6243224145852562, "learning_rate": 9.82770853078584e-06, "loss": 0.4292, "step": 21930 }, { "epoch": 0.1755185958512332, "grad_norm": 0.6890455399729368, "learning_rate": 9.82734496728e-06, "loss": 0.4171, "step": 21940 }, { "epoch": 0.1755985952112383, "grad_norm": 0.7360634382963682, "learning_rate": 9.826981027327857e-06, "loss": 0.4476, "step": 21950 }, { "epoch": 0.17567859457124343, "grad_norm": 0.704668115381533, "learning_rate": 9.826616710957798e-06, "loss": 0.4461, "step": 21960 }, { "epoch": 0.17575859393124854, "grad_norm": 0.708932958048363, "learning_rate": 9.826252018198228e-06, "loss": 0.4298, "step": 21970 }, { "epoch": 0.17583859329125368, "grad_norm": 0.6961732861914091, "learning_rate": 9.82588694907759e-06, "loss": 0.4247, "step": 21980 }, { "epoch": 0.17591859265125878, "grad_norm": 0.6964619086285836, "learning_rate": 9.82552150362435e-06, "loss": 0.4326, "step": 21990 }, { "epoch": 0.17599859201126392, "grad_norm": 0.6381665047590903, "learning_rate": 9.825155681867009e-06, "loss": 0.422, "step": 22000 }, { "epoch": 0.17607859137126902, "grad_norm": 0.7022699566573609, "learning_rate": 9.824789483834092e-06, "loss": 0.444, "step": 22010 }, { "epoch": 0.17615859073127416, "grad_norm": 0.7385068250758567, "learning_rate": 9.824422909554159e-06, "loss": 0.4219, "step": 22020 }, { "epoch": 0.17623859009127926, "grad_norm": 0.6704951871005033, "learning_rate": 9.824055959055792e-06, "loss": 0.4278, "step": 22030 }, { "epoch": 0.1763185894512844, "grad_norm": 0.74129321485581, "learning_rate": 9.82368863236761e-06, "loss": 0.4506, "step": 22040 }, { "epoch": 0.1763985888112895, "grad_norm": 0.6470101614756335, "learning_rate": 9.823320929518257e-06, "loss": 0.4296, "step": 22050 }, { "epoch": 0.17647858817129464, "grad_norm": 0.6620453430365683, "learning_rate": 9.822952850536405e-06, "loss": 0.4189, "step": 22060 }, { "epoch": 0.17655858753129974, "grad_norm": 0.6796583122859854, "learning_rate": 9.822584395450761e-06, "loss": 0.4226, "step": 22070 }, { "epoch": 0.17663858689130488, "grad_norm": 0.6787277259894382, "learning_rate": 9.822215564290054e-06, "loss": 0.4154, "step": 22080 }, { "epoch": 0.17671858625130998, "grad_norm": 0.7674431238632897, "learning_rate": 9.821846357083052e-06, "loss": 0.4022, "step": 22090 }, { "epoch": 0.17679858561131512, "grad_norm": 0.88932938938137, "learning_rate": 9.821476773858542e-06, "loss": 0.4385, "step": 22100 }, { "epoch": 0.17687858497132022, "grad_norm": 0.5979251763835611, "learning_rate": 9.821106814645344e-06, "loss": 0.409, "step": 22110 }, { "epoch": 0.17695858433132536, "grad_norm": 0.6166385289532681, "learning_rate": 9.820736479472313e-06, "loss": 0.4099, "step": 22120 }, { "epoch": 0.17703858369133046, "grad_norm": 0.6316835448620166, "learning_rate": 9.820365768368324e-06, "loss": 0.4181, "step": 22130 }, { "epoch": 0.1771185830513356, "grad_norm": 0.547410661838065, "learning_rate": 9.819994681362288e-06, "loss": 0.4233, "step": 22140 }, { "epoch": 0.1771985824113407, "grad_norm": 0.8580222845501216, "learning_rate": 9.819623218483143e-06, "loss": 0.406, "step": 22150 }, { "epoch": 0.17727858177134584, "grad_norm": 0.5979768260782184, "learning_rate": 9.819251379759855e-06, "loss": 0.4207, "step": 22160 }, { "epoch": 0.17735858113135095, "grad_norm": 0.6654591218154604, "learning_rate": 9.818879165221423e-06, "loss": 0.4201, "step": 22170 }, { "epoch": 0.17743858049135608, "grad_norm": 0.8153716695702263, "learning_rate": 9.81850657489687e-06, "loss": 0.4233, "step": 22180 }, { "epoch": 0.17751857985136119, "grad_norm": 0.6970413972949652, "learning_rate": 9.818133608815253e-06, "loss": 0.4377, "step": 22190 }, { "epoch": 0.17759857921136632, "grad_norm": 0.6562200150954057, "learning_rate": 9.817760267005658e-06, "loss": 0.4172, "step": 22200 }, { "epoch": 0.17767857857137143, "grad_norm": 0.6725056334076237, "learning_rate": 9.817386549497199e-06, "loss": 0.414, "step": 22210 }, { "epoch": 0.17775857793137656, "grad_norm": 0.6157693383937598, "learning_rate": 9.817012456319016e-06, "loss": 0.4049, "step": 22220 }, { "epoch": 0.17783857729138167, "grad_norm": 0.7229268257750433, "learning_rate": 9.816637987500285e-06, "loss": 0.4185, "step": 22230 }, { "epoch": 0.1779185766513868, "grad_norm": 0.639922481446001, "learning_rate": 9.816263143070206e-06, "loss": 0.4073, "step": 22240 }, { "epoch": 0.1779985760113919, "grad_norm": 0.7874294034487778, "learning_rate": 9.81588792305801e-06, "loss": 0.4166, "step": 22250 }, { "epoch": 0.17807857537139704, "grad_norm": 0.6212533300768333, "learning_rate": 9.815512327492959e-06, "loss": 0.4212, "step": 22260 }, { "epoch": 0.17815857473140215, "grad_norm": 0.6303052903224167, "learning_rate": 9.815136356404341e-06, "loss": 0.4105, "step": 22270 }, { "epoch": 0.17823857409140728, "grad_norm": 0.6611509342717248, "learning_rate": 9.814760009821476e-06, "loss": 0.3979, "step": 22280 }, { "epoch": 0.1783185734514124, "grad_norm": 0.673264434316889, "learning_rate": 9.814383287773712e-06, "loss": 0.4179, "step": 22290 }, { "epoch": 0.17839857281141752, "grad_norm": 0.6815700994480983, "learning_rate": 9.814006190290428e-06, "loss": 0.4226, "step": 22300 }, { "epoch": 0.17847857217142263, "grad_norm": 0.7089739216772585, "learning_rate": 9.813628717401026e-06, "loss": 0.4265, "step": 22310 }, { "epoch": 0.17855857153142776, "grad_norm": 0.6548152001735226, "learning_rate": 9.813250869134949e-06, "loss": 0.4258, "step": 22320 }, { "epoch": 0.17863857089143287, "grad_norm": 0.6732925250136134, "learning_rate": 9.812872645521658e-06, "loss": 0.4319, "step": 22330 }, { "epoch": 0.17871857025143797, "grad_norm": 0.5925716391013746, "learning_rate": 9.812494046590647e-06, "loss": 0.3997, "step": 22340 }, { "epoch": 0.1787985696114431, "grad_norm": 0.5807517113186615, "learning_rate": 9.812115072371444e-06, "loss": 0.4263, "step": 22350 }, { "epoch": 0.17887856897144822, "grad_norm": 0.5775415671647396, "learning_rate": 9.811735722893598e-06, "loss": 0.4366, "step": 22360 }, { "epoch": 0.17895856833145335, "grad_norm": 0.6917112572654127, "learning_rate": 9.811355998186694e-06, "loss": 0.4229, "step": 22370 }, { "epoch": 0.17903856769145846, "grad_norm": 0.5563816395703086, "learning_rate": 9.810975898280344e-06, "loss": 0.4029, "step": 22380 }, { "epoch": 0.1791185670514636, "grad_norm": 0.6947037018870933, "learning_rate": 9.810595423204186e-06, "loss": 0.419, "step": 22390 }, { "epoch": 0.1791985664114687, "grad_norm": 0.5947091443253888, "learning_rate": 9.810214572987893e-06, "loss": 0.4054, "step": 22400 }, { "epoch": 0.17927856577147383, "grad_norm": 0.7589316421331842, "learning_rate": 9.809833347661162e-06, "loss": 0.4297, "step": 22410 }, { "epoch": 0.17935856513147894, "grad_norm": 0.6409481565196362, "learning_rate": 9.809451747253724e-06, "loss": 0.4356, "step": 22420 }, { "epoch": 0.17943856449148407, "grad_norm": 0.7253626291535487, "learning_rate": 9.809069771795338e-06, "loss": 0.4283, "step": 22430 }, { "epoch": 0.17951856385148918, "grad_norm": 0.6418094833177213, "learning_rate": 9.808687421315788e-06, "loss": 0.4198, "step": 22440 }, { "epoch": 0.1795985632114943, "grad_norm": 0.6320739850072938, "learning_rate": 9.808304695844893e-06, "loss": 0.4144, "step": 22450 }, { "epoch": 0.17967856257149942, "grad_norm": 0.7005775882618699, "learning_rate": 9.807921595412495e-06, "loss": 0.4454, "step": 22460 }, { "epoch": 0.17975856193150455, "grad_norm": 0.6407637226732068, "learning_rate": 9.807538120048475e-06, "loss": 0.4228, "step": 22470 }, { "epoch": 0.17983856129150966, "grad_norm": 0.6261658054778512, "learning_rate": 9.807154269782732e-06, "loss": 0.4118, "step": 22480 }, { "epoch": 0.1799185606515148, "grad_norm": 0.8330905748633054, "learning_rate": 9.806770044645201e-06, "loss": 0.452, "step": 22490 }, { "epoch": 0.1799985600115199, "grad_norm": 0.6908022044797482, "learning_rate": 9.806385444665847e-06, "loss": 0.4176, "step": 22500 }, { "epoch": 0.18007855937152503, "grad_norm": 0.6501611326921196, "learning_rate": 9.806000469874659e-06, "loss": 0.4462, "step": 22510 }, { "epoch": 0.18015855873153014, "grad_norm": 0.64053233671516, "learning_rate": 9.805615120301658e-06, "loss": 0.3934, "step": 22520 }, { "epoch": 0.18023855809153527, "grad_norm": 0.6483016289003748, "learning_rate": 9.805229395976896e-06, "loss": 0.4012, "step": 22530 }, { "epoch": 0.18031855745154038, "grad_norm": 0.6953071973201105, "learning_rate": 9.804843296930451e-06, "loss": 0.4254, "step": 22540 }, { "epoch": 0.1803985568115455, "grad_norm": 0.6134274757125947, "learning_rate": 9.804456823192435e-06, "loss": 0.4279, "step": 22550 }, { "epoch": 0.18047855617155062, "grad_norm": 0.6124909681901709, "learning_rate": 9.804069974792982e-06, "loss": 0.4194, "step": 22560 }, { "epoch": 0.18055855553155575, "grad_norm": 0.7625941884781346, "learning_rate": 9.80368275176226e-06, "loss": 0.4372, "step": 22570 }, { "epoch": 0.18063855489156086, "grad_norm": 0.6584970107132536, "learning_rate": 9.803295154130468e-06, "loss": 0.4238, "step": 22580 }, { "epoch": 0.180718554251566, "grad_norm": 0.6333689123606011, "learning_rate": 9.802907181927832e-06, "loss": 0.4432, "step": 22590 }, { "epoch": 0.1807985536115711, "grad_norm": 0.6645890179068339, "learning_rate": 9.802518835184602e-06, "loss": 0.4345, "step": 22600 }, { "epoch": 0.18087855297157623, "grad_norm": 0.6841460917619889, "learning_rate": 9.802130113931065e-06, "loss": 0.4028, "step": 22610 }, { "epoch": 0.18095855233158134, "grad_norm": 0.6688973080196331, "learning_rate": 9.801741018197536e-06, "loss": 0.4323, "step": 22620 }, { "epoch": 0.18103855169158647, "grad_norm": 0.5789282007195338, "learning_rate": 9.801351548014355e-06, "loss": 0.4165, "step": 22630 }, { "epoch": 0.18111855105159158, "grad_norm": 0.7076937516287767, "learning_rate": 9.800961703411896e-06, "loss": 0.4357, "step": 22640 }, { "epoch": 0.18119855041159671, "grad_norm": 0.6233193958822589, "learning_rate": 9.800571484420556e-06, "loss": 0.4275, "step": 22650 }, { "epoch": 0.18127854977160182, "grad_norm": 0.6233849757776556, "learning_rate": 9.800180891070768e-06, "loss": 0.39, "step": 22660 }, { "epoch": 0.18135854913160696, "grad_norm": 0.6215524901516042, "learning_rate": 9.79978992339299e-06, "loss": 0.4025, "step": 22670 }, { "epoch": 0.18143854849161206, "grad_norm": 0.7424830744895599, "learning_rate": 9.799398581417714e-06, "loss": 0.4343, "step": 22680 }, { "epoch": 0.1815185478516172, "grad_norm": 0.6760382234352256, "learning_rate": 9.799006865175453e-06, "loss": 0.4202, "step": 22690 }, { "epoch": 0.1815985472116223, "grad_norm": 0.7318122185356655, "learning_rate": 9.798614774696755e-06, "loss": 0.4463, "step": 22700 }, { "epoch": 0.18167854657162744, "grad_norm": 0.6878267933268647, "learning_rate": 9.7982223100122e-06, "loss": 0.4312, "step": 22710 }, { "epoch": 0.18175854593163254, "grad_norm": 0.7565589888778603, "learning_rate": 9.797829471152386e-06, "loss": 0.4145, "step": 22720 }, { "epoch": 0.18183854529163768, "grad_norm": 0.7321110856794595, "learning_rate": 9.797436258147953e-06, "loss": 0.4313, "step": 22730 }, { "epoch": 0.18191854465164278, "grad_norm": 0.7194047372536025, "learning_rate": 9.797042671029563e-06, "loss": 0.415, "step": 22740 }, { "epoch": 0.18199854401164792, "grad_norm": 0.6620258124403724, "learning_rate": 9.796648709827908e-06, "loss": 0.4113, "step": 22750 }, { "epoch": 0.18207854337165302, "grad_norm": 0.5622803248223572, "learning_rate": 9.796254374573711e-06, "loss": 0.4182, "step": 22760 }, { "epoch": 0.18215854273165816, "grad_norm": 0.7040431650291215, "learning_rate": 9.795859665297722e-06, "loss": 0.4462, "step": 22770 }, { "epoch": 0.18223854209166326, "grad_norm": 0.6046300762206999, "learning_rate": 9.795464582030722e-06, "loss": 0.3991, "step": 22780 }, { "epoch": 0.1823185414516684, "grad_norm": 0.7299453457082254, "learning_rate": 9.79506912480352e-06, "loss": 0.3964, "step": 22790 }, { "epoch": 0.1823985408116735, "grad_norm": 0.6542477112105434, "learning_rate": 9.794673293646957e-06, "loss": 0.4046, "step": 22800 }, { "epoch": 0.18247854017167864, "grad_norm": 0.6595068457370842, "learning_rate": 9.794277088591897e-06, "loss": 0.4034, "step": 22810 }, { "epoch": 0.18255853953168374, "grad_norm": 0.7098589827210651, "learning_rate": 9.793880509669238e-06, "loss": 0.4304, "step": 22820 }, { "epoch": 0.18263853889168888, "grad_norm": 0.6436132183040066, "learning_rate": 9.793483556909906e-06, "loss": 0.4273, "step": 22830 }, { "epoch": 0.18271853825169398, "grad_norm": 0.6928767006582077, "learning_rate": 9.793086230344856e-06, "loss": 0.405, "step": 22840 }, { "epoch": 0.18279853761169912, "grad_norm": 0.7278628245318264, "learning_rate": 9.792688530005077e-06, "loss": 0.4365, "step": 22850 }, { "epoch": 0.18287853697170423, "grad_norm": 0.7776862194011042, "learning_rate": 9.792290455921574e-06, "loss": 0.4003, "step": 22860 }, { "epoch": 0.18295853633170936, "grad_norm": 0.6849127981164824, "learning_rate": 9.791892008125397e-06, "loss": 0.4431, "step": 22870 }, { "epoch": 0.18303853569171447, "grad_norm": 0.5710923938674742, "learning_rate": 9.791493186647613e-06, "loss": 0.413, "step": 22880 }, { "epoch": 0.1831185350517196, "grad_norm": 0.6296960117871836, "learning_rate": 9.791093991519325e-06, "loss": 0.4045, "step": 22890 }, { "epoch": 0.1831985344117247, "grad_norm": 0.6404502230364351, "learning_rate": 9.790694422771665e-06, "loss": 0.4359, "step": 22900 }, { "epoch": 0.1832785337717298, "grad_norm": 0.7031749238029242, "learning_rate": 9.790294480435789e-06, "loss": 0.4131, "step": 22910 }, { "epoch": 0.18335853313173495, "grad_norm": 0.6898341533846726, "learning_rate": 9.789894164542885e-06, "loss": 0.4093, "step": 22920 }, { "epoch": 0.18343853249174005, "grad_norm": 0.7325258783654517, "learning_rate": 9.789493475124173e-06, "loss": 0.4369, "step": 22930 }, { "epoch": 0.1835185318517452, "grad_norm": 0.5511885917220254, "learning_rate": 9.789092412210899e-06, "loss": 0.4104, "step": 22940 }, { "epoch": 0.1835985312117503, "grad_norm": 0.6941561448270874, "learning_rate": 9.788690975834337e-06, "loss": 0.4081, "step": 22950 }, { "epoch": 0.18367853057175543, "grad_norm": 0.6642248264146592, "learning_rate": 9.788289166025792e-06, "loss": 0.4172, "step": 22960 }, { "epoch": 0.18375852993176053, "grad_norm": 0.761856315806979, "learning_rate": 9.787886982816601e-06, "loss": 0.4183, "step": 22970 }, { "epoch": 0.18383852929176567, "grad_norm": 0.7155985955631408, "learning_rate": 9.787484426238125e-06, "loss": 0.4395, "step": 22980 }, { "epoch": 0.18391852865177077, "grad_norm": 0.6409264015933958, "learning_rate": 9.787081496321755e-06, "loss": 0.3978, "step": 22990 }, { "epoch": 0.1839985280117759, "grad_norm": 0.6103677955063597, "learning_rate": 9.786678193098915e-06, "loss": 0.4114, "step": 23000 }, { "epoch": 0.18407852737178101, "grad_norm": 0.664796202230127, "learning_rate": 9.786274516601053e-06, "loss": 0.4228, "step": 23010 }, { "epoch": 0.18415852673178615, "grad_norm": 0.6796478434432554, "learning_rate": 9.78587046685965e-06, "loss": 0.4191, "step": 23020 }, { "epoch": 0.18423852609179125, "grad_norm": 0.7187027117825648, "learning_rate": 9.785466043906214e-06, "loss": 0.4163, "step": 23030 }, { "epoch": 0.1843185254517964, "grad_norm": 0.6911468423053378, "learning_rate": 9.785061247772281e-06, "loss": 0.4203, "step": 23040 }, { "epoch": 0.1843985248118015, "grad_norm": 0.6404986904009609, "learning_rate": 9.784656078489421e-06, "loss": 0.4011, "step": 23050 }, { "epoch": 0.18447852417180663, "grad_norm": 0.723770533913787, "learning_rate": 9.78425053608923e-06, "loss": 0.4183, "step": 23060 }, { "epoch": 0.18455852353181174, "grad_norm": 0.6557594395484189, "learning_rate": 9.783844620603329e-06, "loss": 0.4104, "step": 23070 }, { "epoch": 0.18463852289181687, "grad_norm": 0.6455993187851263, "learning_rate": 9.783438332063376e-06, "loss": 0.3994, "step": 23080 }, { "epoch": 0.18471852225182198, "grad_norm": 0.6380821967522415, "learning_rate": 9.783031670501053e-06, "loss": 0.3999, "step": 23090 }, { "epoch": 0.1847985216118271, "grad_norm": 0.6084370846514118, "learning_rate": 9.782624635948072e-06, "loss": 0.3947, "step": 23100 }, { "epoch": 0.18487852097183222, "grad_norm": 0.7009862159161125, "learning_rate": 9.782217228436176e-06, "loss": 0.4178, "step": 23110 }, { "epoch": 0.18495852033183735, "grad_norm": 0.7330198551913317, "learning_rate": 9.781809447997133e-06, "loss": 0.427, "step": 23120 }, { "epoch": 0.18503851969184246, "grad_norm": 0.5956033874455163, "learning_rate": 9.781401294662744e-06, "loss": 0.4104, "step": 23130 }, { "epoch": 0.1851185190518476, "grad_norm": 0.6200131566898018, "learning_rate": 9.780992768464836e-06, "loss": 0.4249, "step": 23140 }, { "epoch": 0.1851985184118527, "grad_norm": 0.6246919274935985, "learning_rate": 9.780583869435269e-06, "loss": 0.4193, "step": 23150 }, { "epoch": 0.18527851777185783, "grad_norm": 0.6387820628417522, "learning_rate": 9.78017459760593e-06, "loss": 0.441, "step": 23160 }, { "epoch": 0.18535851713186294, "grad_norm": 0.6495111255836188, "learning_rate": 9.779764953008733e-06, "loss": 0.4632, "step": 23170 }, { "epoch": 0.18543851649186807, "grad_norm": 0.6948665907438786, "learning_rate": 9.779354935675622e-06, "loss": 0.4551, "step": 23180 }, { "epoch": 0.18551851585187318, "grad_norm": 0.684828791931822, "learning_rate": 9.778944545638575e-06, "loss": 0.4199, "step": 23190 }, { "epoch": 0.1855985152118783, "grad_norm": 0.5995018700434361, "learning_rate": 9.778533782929592e-06, "loss": 0.4017, "step": 23200 }, { "epoch": 0.18567851457188342, "grad_norm": 0.6394747951487022, "learning_rate": 9.778122647580704e-06, "loss": 0.3979, "step": 23210 }, { "epoch": 0.18575851393188855, "grad_norm": 0.6835735257915984, "learning_rate": 9.777711139623976e-06, "loss": 0.4178, "step": 23220 }, { "epoch": 0.18583851329189366, "grad_norm": 0.6288277777440676, "learning_rate": 9.777299259091494e-06, "loss": 0.4313, "step": 23230 }, { "epoch": 0.1859185126518988, "grad_norm": 0.741158681171851, "learning_rate": 9.776887006015383e-06, "loss": 0.4391, "step": 23240 }, { "epoch": 0.1859985120119039, "grad_norm": 0.6867261635616976, "learning_rate": 9.776474380427785e-06, "loss": 0.3943, "step": 23250 }, { "epoch": 0.18607851137190903, "grad_norm": 0.6191598064507902, "learning_rate": 9.776061382360882e-06, "loss": 0.4208, "step": 23260 }, { "epoch": 0.18615851073191414, "grad_norm": 0.6082245741886207, "learning_rate": 9.775648011846877e-06, "loss": 0.4137, "step": 23270 }, { "epoch": 0.18623851009191927, "grad_norm": 0.6268870105389427, "learning_rate": 9.77523426891801e-06, "loss": 0.3983, "step": 23280 }, { "epoch": 0.18631850945192438, "grad_norm": 0.6320128155077398, "learning_rate": 9.77482015360654e-06, "loss": 0.4294, "step": 23290 }, { "epoch": 0.18639850881192951, "grad_norm": 0.6243219157049249, "learning_rate": 9.774405665944763e-06, "loss": 0.4185, "step": 23300 }, { "epoch": 0.18647850817193462, "grad_norm": 0.6692278455924682, "learning_rate": 9.773990805965003e-06, "loss": 0.4181, "step": 23310 }, { "epoch": 0.18655850753193975, "grad_norm": 0.6203174659738125, "learning_rate": 9.773575573699612e-06, "loss": 0.4079, "step": 23320 }, { "epoch": 0.18663850689194486, "grad_norm": 0.6730391410981093, "learning_rate": 9.773159969180968e-06, "loss": 0.435, "step": 23330 }, { "epoch": 0.18671850625195, "grad_norm": 0.6597855375174398, "learning_rate": 9.77274399244148e-06, "loss": 0.4153, "step": 23340 }, { "epoch": 0.1867985056119551, "grad_norm": 0.7304174398393379, "learning_rate": 9.77232764351359e-06, "loss": 0.4569, "step": 23350 }, { "epoch": 0.18687850497196024, "grad_norm": 0.6651901884678096, "learning_rate": 9.771910922429765e-06, "loss": 0.4303, "step": 23360 }, { "epoch": 0.18695850433196534, "grad_norm": 0.6891534002118381, "learning_rate": 9.771493829222501e-06, "loss": 0.416, "step": 23370 }, { "epoch": 0.18703850369197048, "grad_norm": 0.6873361433358967, "learning_rate": 9.771076363924322e-06, "loss": 0.4361, "step": 23380 }, { "epoch": 0.18711850305197558, "grad_norm": 0.9126118927369602, "learning_rate": 9.770658526567787e-06, "loss": 0.4055, "step": 23390 }, { "epoch": 0.18719850241198072, "grad_norm": 0.8171064334722744, "learning_rate": 9.770240317185476e-06, "loss": 0.4591, "step": 23400 }, { "epoch": 0.18727850177198582, "grad_norm": 0.6772283970457216, "learning_rate": 9.769821735810003e-06, "loss": 0.4033, "step": 23410 }, { "epoch": 0.18735850113199096, "grad_norm": 0.8115055178200209, "learning_rate": 9.769402782474012e-06, "loss": 0.4065, "step": 23420 }, { "epoch": 0.18743850049199606, "grad_norm": 0.6764649656519383, "learning_rate": 9.768983457210171e-06, "loss": 0.4399, "step": 23430 }, { "epoch": 0.1875184998520012, "grad_norm": 0.7120739271021983, "learning_rate": 9.768563760051182e-06, "loss": 0.3936, "step": 23440 }, { "epoch": 0.1875984992120063, "grad_norm": 0.7104985564262041, "learning_rate": 9.768143691029771e-06, "loss": 0.4297, "step": 23450 }, { "epoch": 0.18767849857201144, "grad_norm": 0.7513875475639388, "learning_rate": 9.767723250178698e-06, "loss": 0.3969, "step": 23460 }, { "epoch": 0.18775849793201654, "grad_norm": 0.6618177060930741, "learning_rate": 9.76730243753075e-06, "loss": 0.4135, "step": 23470 }, { "epoch": 0.18783849729202165, "grad_norm": 0.715513302621575, "learning_rate": 9.766881253118741e-06, "loss": 0.4048, "step": 23480 }, { "epoch": 0.18791849665202678, "grad_norm": 0.7179264536635402, "learning_rate": 9.76645969697552e-06, "loss": 0.4325, "step": 23490 }, { "epoch": 0.1879984960120319, "grad_norm": 0.6543080279387782, "learning_rate": 9.766037769133955e-06, "loss": 0.4424, "step": 23500 }, { "epoch": 0.18807849537203702, "grad_norm": 0.6042883752974674, "learning_rate": 9.765615469626953e-06, "loss": 0.4053, "step": 23510 }, { "epoch": 0.18815849473204213, "grad_norm": 0.7427211313215107, "learning_rate": 9.765192798487443e-06, "loss": 0.4038, "step": 23520 }, { "epoch": 0.18823849409204726, "grad_norm": 0.5743677315249099, "learning_rate": 9.764769755748388e-06, "loss": 0.4107, "step": 23530 }, { "epoch": 0.18831849345205237, "grad_norm": 0.641299404986638, "learning_rate": 9.764346341442777e-06, "loss": 0.4234, "step": 23540 }, { "epoch": 0.1883984928120575, "grad_norm": 0.6736829049078522, "learning_rate": 9.76392255560363e-06, "loss": 0.3728, "step": 23550 }, { "epoch": 0.1884784921720626, "grad_norm": 0.7287849929522744, "learning_rate": 9.763498398263992e-06, "loss": 0.4094, "step": 23560 }, { "epoch": 0.18855849153206775, "grad_norm": 0.5957925473853981, "learning_rate": 9.763073869456941e-06, "loss": 0.3928, "step": 23570 }, { "epoch": 0.18863849089207285, "grad_norm": 0.6860630406087256, "learning_rate": 9.762648969215583e-06, "loss": 0.4413, "step": 23580 }, { "epoch": 0.18871849025207799, "grad_norm": 0.6972122087226942, "learning_rate": 9.762223697573053e-06, "loss": 0.3891, "step": 23590 }, { "epoch": 0.1887984896120831, "grad_norm": 0.6472334999115853, "learning_rate": 9.761798054562513e-06, "loss": 0.4122, "step": 23600 }, { "epoch": 0.18887848897208823, "grad_norm": 0.699613811707028, "learning_rate": 9.761372040217156e-06, "loss": 0.439, "step": 23610 }, { "epoch": 0.18895848833209333, "grad_norm": 0.6438048868968133, "learning_rate": 9.760945654570205e-06, "loss": 0.4212, "step": 23620 }, { "epoch": 0.18903848769209847, "grad_norm": 0.6780467012109197, "learning_rate": 9.76051889765491e-06, "loss": 0.3861, "step": 23630 }, { "epoch": 0.18911848705210357, "grad_norm": 0.6983837236706077, "learning_rate": 9.760091769504547e-06, "loss": 0.424, "step": 23640 }, { "epoch": 0.1891984864121087, "grad_norm": 0.6557663461403099, "learning_rate": 9.75966427015243e-06, "loss": 0.4114, "step": 23650 }, { "epoch": 0.1892784857721138, "grad_norm": 0.6425282267988605, "learning_rate": 9.759236399631891e-06, "loss": 0.427, "step": 23660 }, { "epoch": 0.18935848513211895, "grad_norm": 0.6672980042777499, "learning_rate": 9.758808157976299e-06, "loss": 0.4312, "step": 23670 }, { "epoch": 0.18943848449212405, "grad_norm": 0.682625279776428, "learning_rate": 9.758379545219047e-06, "loss": 0.4282, "step": 23680 }, { "epoch": 0.1895184838521292, "grad_norm": 0.7322752260078838, "learning_rate": 9.757950561393563e-06, "loss": 0.4004, "step": 23690 }, { "epoch": 0.1895984832121343, "grad_norm": 0.6575014493571472, "learning_rate": 9.757521206533297e-06, "loss": 0.3974, "step": 23700 }, { "epoch": 0.18967848257213943, "grad_norm": 0.7117310687518655, "learning_rate": 9.757091480671732e-06, "loss": 0.4389, "step": 23710 }, { "epoch": 0.18975848193214453, "grad_norm": 0.6480475156284033, "learning_rate": 9.756661383842378e-06, "loss": 0.4288, "step": 23720 }, { "epoch": 0.18983848129214967, "grad_norm": 0.6618981283255325, "learning_rate": 9.756230916078777e-06, "loss": 0.4344, "step": 23730 }, { "epoch": 0.18991848065215478, "grad_norm": 0.6253729396780634, "learning_rate": 9.755800077414497e-06, "loss": 0.4132, "step": 23740 }, { "epoch": 0.1899984800121599, "grad_norm": 0.7895544881973147, "learning_rate": 9.755368867883133e-06, "loss": 0.4323, "step": 23750 }, { "epoch": 0.19007847937216502, "grad_norm": 0.6803315067892887, "learning_rate": 9.754937287518314e-06, "loss": 0.4305, "step": 23760 }, { "epoch": 0.19015847873217015, "grad_norm": 0.701524655722687, "learning_rate": 9.754505336353695e-06, "loss": 0.4335, "step": 23770 }, { "epoch": 0.19023847809217526, "grad_norm": 0.5971190852195598, "learning_rate": 9.75407301442296e-06, "loss": 0.4497, "step": 23780 }, { "epoch": 0.1903184774521804, "grad_norm": 0.6839558340728604, "learning_rate": 9.753640321759824e-06, "loss": 0.3997, "step": 23790 }, { "epoch": 0.1903984768121855, "grad_norm": 0.5595322585773694, "learning_rate": 9.753207258398028e-06, "loss": 0.4096, "step": 23800 }, { "epoch": 0.19047847617219063, "grad_norm": 0.6047897676224536, "learning_rate": 9.752773824371343e-06, "loss": 0.4179, "step": 23810 }, { "epoch": 0.19055847553219574, "grad_norm": 0.6192454175888245, "learning_rate": 9.752340019713568e-06, "loss": 0.4337, "step": 23820 }, { "epoch": 0.19063847489220087, "grad_norm": 0.6875259827443391, "learning_rate": 9.751905844458536e-06, "loss": 0.4216, "step": 23830 }, { "epoch": 0.19071847425220598, "grad_norm": 0.7126368945155328, "learning_rate": 9.751471298640102e-06, "loss": 0.4216, "step": 23840 }, { "epoch": 0.1907984736122111, "grad_norm": 0.6524334115786673, "learning_rate": 9.751036382292151e-06, "loss": 0.4228, "step": 23850 }, { "epoch": 0.19087847297221622, "grad_norm": 0.6239885965393115, "learning_rate": 9.750601095448603e-06, "loss": 0.4156, "step": 23860 }, { "epoch": 0.19095847233222135, "grad_norm": 0.625339262638674, "learning_rate": 9.750165438143399e-06, "loss": 0.4524, "step": 23870 }, { "epoch": 0.19103847169222646, "grad_norm": 0.5754335285633934, "learning_rate": 9.749729410410513e-06, "loss": 0.4092, "step": 23880 }, { "epoch": 0.1911184710522316, "grad_norm": 0.6083567195046587, "learning_rate": 9.74929301228395e-06, "loss": 0.4123, "step": 23890 }, { "epoch": 0.1911984704122367, "grad_norm": 0.7529011060265992, "learning_rate": 9.748856243797738e-06, "loss": 0.4443, "step": 23900 }, { "epoch": 0.19127846977224183, "grad_norm": 0.6810351414608107, "learning_rate": 9.748419104985939e-06, "loss": 0.3807, "step": 23910 }, { "epoch": 0.19135846913224694, "grad_norm": 0.6322898979483265, "learning_rate": 9.74798159588264e-06, "loss": 0.4215, "step": 23920 }, { "epoch": 0.19143846849225207, "grad_norm": 0.7218760700619105, "learning_rate": 9.747543716521964e-06, "loss": 0.4303, "step": 23930 }, { "epoch": 0.19151846785225718, "grad_norm": 0.8033802720607761, "learning_rate": 9.74710546693805e-06, "loss": 0.4048, "step": 23940 }, { "epoch": 0.1915984672122623, "grad_norm": 0.6951990613881304, "learning_rate": 9.746666847165079e-06, "loss": 0.3995, "step": 23950 }, { "epoch": 0.19167846657226742, "grad_norm": 0.6170380107843149, "learning_rate": 9.746227857237254e-06, "loss": 0.4007, "step": 23960 }, { "epoch": 0.19175846593227255, "grad_norm": 0.6479963187983167, "learning_rate": 9.745788497188809e-06, "loss": 0.3847, "step": 23970 }, { "epoch": 0.19183846529227766, "grad_norm": 0.7324201105639484, "learning_rate": 9.745348767054005e-06, "loss": 0.3964, "step": 23980 }, { "epoch": 0.1919184646522828, "grad_norm": 0.7030649637677768, "learning_rate": 9.744908666867133e-06, "loss": 0.4296, "step": 23990 }, { "epoch": 0.1919984640122879, "grad_norm": 0.6834437110344929, "learning_rate": 9.744468196662516e-06, "loss": 0.4234, "step": 24000 }, { "epoch": 0.19207846337229303, "grad_norm": 0.6670924116700259, "learning_rate": 9.744027356474498e-06, "loss": 0.41, "step": 24010 }, { "epoch": 0.19215846273229814, "grad_norm": 0.637454839069396, "learning_rate": 9.743586146337458e-06, "loss": 0.4223, "step": 24020 }, { "epoch": 0.19223846209230327, "grad_norm": 0.6348930438764381, "learning_rate": 9.743144566285807e-06, "loss": 0.4369, "step": 24030 }, { "epoch": 0.19231846145230838, "grad_norm": 0.5925767504015235, "learning_rate": 9.742702616353974e-06, "loss": 0.4302, "step": 24040 }, { "epoch": 0.1923984608123135, "grad_norm": 0.6186073621280193, "learning_rate": 9.742260296576427e-06, "loss": 0.4157, "step": 24050 }, { "epoch": 0.19247846017231862, "grad_norm": 0.598015192493807, "learning_rate": 9.741817606987658e-06, "loss": 0.412, "step": 24060 }, { "epoch": 0.19255845953232373, "grad_norm": 0.6834189966299719, "learning_rate": 9.74137454762219e-06, "loss": 0.4247, "step": 24070 }, { "epoch": 0.19263845889232886, "grad_norm": 0.5786371784176386, "learning_rate": 9.74093111851457e-06, "loss": 0.3971, "step": 24080 }, { "epoch": 0.19271845825233397, "grad_norm": 0.5856366238798119, "learning_rate": 9.740487319699381e-06, "loss": 0.4392, "step": 24090 }, { "epoch": 0.1927984576123391, "grad_norm": 0.7023451732463148, "learning_rate": 9.74004315121123e-06, "loss": 0.4228, "step": 24100 }, { "epoch": 0.1928784569723442, "grad_norm": 0.7493297171999604, "learning_rate": 9.739598613084755e-06, "loss": 0.4226, "step": 24110 }, { "epoch": 0.19295845633234934, "grad_norm": 0.778548523196326, "learning_rate": 9.739153705354621e-06, "loss": 0.4306, "step": 24120 }, { "epoch": 0.19303845569235445, "grad_norm": 0.6013134286631945, "learning_rate": 9.738708428055524e-06, "loss": 0.4223, "step": 24130 }, { "epoch": 0.19311845505235958, "grad_norm": 0.7114308082444633, "learning_rate": 9.738262781222188e-06, "loss": 0.4277, "step": 24140 }, { "epoch": 0.1931984544123647, "grad_norm": 0.6253607335828972, "learning_rate": 9.737816764889363e-06, "loss": 0.406, "step": 24150 }, { "epoch": 0.19327845377236982, "grad_norm": 0.6800250832929505, "learning_rate": 9.73737037909183e-06, "loss": 0.4357, "step": 24160 }, { "epoch": 0.19335845313237493, "grad_norm": 0.7417105368855442, "learning_rate": 9.736923623864405e-06, "loss": 0.4205, "step": 24170 }, { "epoch": 0.19343845249238006, "grad_norm": 0.5969936735943242, "learning_rate": 9.73647649924192e-06, "loss": 0.416, "step": 24180 }, { "epoch": 0.19351845185238517, "grad_norm": 0.7917059714641248, "learning_rate": 9.736029005259245e-06, "loss": 0.4378, "step": 24190 }, { "epoch": 0.1935984512123903, "grad_norm": 0.6722981023984748, "learning_rate": 9.735581141951279e-06, "loss": 0.4193, "step": 24200 }, { "epoch": 0.1936784505723954, "grad_norm": 0.6415535179633877, "learning_rate": 9.735132909352943e-06, "loss": 0.417, "step": 24210 }, { "epoch": 0.19375844993240054, "grad_norm": 0.7273376739987748, "learning_rate": 9.734684307499194e-06, "loss": 0.4363, "step": 24220 }, { "epoch": 0.19383844929240565, "grad_norm": 0.6574916232122794, "learning_rate": 9.734235336425013e-06, "loss": 0.4235, "step": 24230 }, { "epoch": 0.19391844865241079, "grad_norm": 0.6407610365887395, "learning_rate": 9.733785996165414e-06, "loss": 0.426, "step": 24240 }, { "epoch": 0.1939984480124159, "grad_norm": 0.607818073744983, "learning_rate": 9.733336286755438e-06, "loss": 0.4174, "step": 24250 }, { "epoch": 0.19407844737242103, "grad_norm": 0.6751283532087085, "learning_rate": 9.732886208230151e-06, "loss": 0.394, "step": 24260 }, { "epoch": 0.19415844673242613, "grad_norm": 0.6058373903077847, "learning_rate": 9.732435760624652e-06, "loss": 0.4163, "step": 24270 }, { "epoch": 0.19423844609243127, "grad_norm": 0.6046916891991383, "learning_rate": 9.731984943974071e-06, "loss": 0.4269, "step": 24280 }, { "epoch": 0.19431844545243637, "grad_norm": 0.704930099746068, "learning_rate": 9.731533758313558e-06, "loss": 0.4124, "step": 24290 }, { "epoch": 0.1943984448124415, "grad_norm": 0.644121326617495, "learning_rate": 9.731082203678303e-06, "loss": 0.4384, "step": 24300 }, { "epoch": 0.1944784441724466, "grad_norm": 0.645939341885001, "learning_rate": 9.730630280103514e-06, "loss": 0.4267, "step": 24310 }, { "epoch": 0.19455844353245175, "grad_norm": 0.6912898954123319, "learning_rate": 9.730177987624439e-06, "loss": 0.4118, "step": 24320 }, { "epoch": 0.19463844289245685, "grad_norm": 0.7211679495227433, "learning_rate": 9.729725326276342e-06, "loss": 0.4326, "step": 24330 }, { "epoch": 0.194718442252462, "grad_norm": 0.601869543066784, "learning_rate": 9.729272296094528e-06, "loss": 0.4166, "step": 24340 }, { "epoch": 0.1947984416124671, "grad_norm": 0.6886813819969987, "learning_rate": 9.728818897114323e-06, "loss": 0.4322, "step": 24350 }, { "epoch": 0.19487844097247223, "grad_norm": 0.5806767880377387, "learning_rate": 9.728365129371082e-06, "loss": 0.4113, "step": 24360 }, { "epoch": 0.19495844033247733, "grad_norm": 0.6740623197310501, "learning_rate": 9.727910992900194e-06, "loss": 0.4438, "step": 24370 }, { "epoch": 0.19503843969248247, "grad_norm": 0.6056757387160903, "learning_rate": 9.727456487737073e-06, "loss": 0.4078, "step": 24380 }, { "epoch": 0.19511843905248757, "grad_norm": 0.6339896415731686, "learning_rate": 9.72700161391716e-06, "loss": 0.4243, "step": 24390 }, { "epoch": 0.1951984384124927, "grad_norm": 0.6793628715402941, "learning_rate": 9.72654637147593e-06, "loss": 0.4227, "step": 24400 }, { "epoch": 0.19527843777249781, "grad_norm": 0.6707672678058187, "learning_rate": 9.72609076044888e-06, "loss": 0.4002, "step": 24410 }, { "epoch": 0.19535843713250295, "grad_norm": 0.6085266898513201, "learning_rate": 9.725634780871543e-06, "loss": 0.4074, "step": 24420 }, { "epoch": 0.19543843649250806, "grad_norm": 0.6115409377950413, "learning_rate": 9.725178432779475e-06, "loss": 0.436, "step": 24430 }, { "epoch": 0.1955184358525132, "grad_norm": 0.651767984644892, "learning_rate": 9.724721716208266e-06, "loss": 0.4293, "step": 24440 }, { "epoch": 0.1955984352125183, "grad_norm": 0.6979729370613138, "learning_rate": 9.724264631193527e-06, "loss": 0.4156, "step": 24450 }, { "epoch": 0.19567843457252343, "grad_norm": 0.5503065214074888, "learning_rate": 9.723807177770905e-06, "loss": 0.4219, "step": 24460 }, { "epoch": 0.19575843393252854, "grad_norm": 0.6903694377795363, "learning_rate": 9.723349355976075e-06, "loss": 0.424, "step": 24470 }, { "epoch": 0.19583843329253367, "grad_norm": 0.6328713241916966, "learning_rate": 9.722891165844736e-06, "loss": 0.4253, "step": 24480 }, { "epoch": 0.19591843265253878, "grad_norm": 0.663080512469401, "learning_rate": 9.72243260741262e-06, "loss": 0.4286, "step": 24490 }, { "epoch": 0.1959984320125439, "grad_norm": 0.6251735200080217, "learning_rate": 9.721973680715486e-06, "loss": 0.4205, "step": 24500 }, { "epoch": 0.19607843137254902, "grad_norm": 0.699683094991078, "learning_rate": 9.721514385789121e-06, "loss": 0.4286, "step": 24510 }, { "epoch": 0.19615843073255415, "grad_norm": 0.6299522698164168, "learning_rate": 9.721054722669346e-06, "loss": 0.4366, "step": 24520 }, { "epoch": 0.19623843009255926, "grad_norm": 0.6313333294982585, "learning_rate": 9.720594691392e-06, "loss": 0.4023, "step": 24530 }, { "epoch": 0.1963184294525644, "grad_norm": 0.7036917573186507, "learning_rate": 9.720134291992962e-06, "loss": 0.4252, "step": 24540 }, { "epoch": 0.1963984288125695, "grad_norm": 0.6182756560444083, "learning_rate": 9.719673524508133e-06, "loss": 0.426, "step": 24550 }, { "epoch": 0.19647842817257463, "grad_norm": 0.6538582521252009, "learning_rate": 9.719212388973445e-06, "loss": 0.4177, "step": 24560 }, { "epoch": 0.19655842753257974, "grad_norm": 0.7413626390872093, "learning_rate": 9.718750885424857e-06, "loss": 0.3916, "step": 24570 }, { "epoch": 0.19663842689258487, "grad_norm": 0.7592732385136617, "learning_rate": 9.71828901389836e-06, "loss": 0.4311, "step": 24580 }, { "epoch": 0.19671842625258998, "grad_norm": 0.6365179945575712, "learning_rate": 9.717826774429972e-06, "loss": 0.4353, "step": 24590 }, { "epoch": 0.1967984256125951, "grad_norm": 0.7594017247320181, "learning_rate": 9.717364167055737e-06, "loss": 0.4241, "step": 24600 }, { "epoch": 0.19687842497260022, "grad_norm": 0.6154807361109508, "learning_rate": 9.716901191811732e-06, "loss": 0.4087, "step": 24610 }, { "epoch": 0.19695842433260535, "grad_norm": 0.6263355869259893, "learning_rate": 9.716437848734059e-06, "loss": 0.4109, "step": 24620 }, { "epoch": 0.19703842369261046, "grad_norm": 0.6444827424178753, "learning_rate": 9.715974137858853e-06, "loss": 0.4063, "step": 24630 }, { "epoch": 0.19711842305261557, "grad_norm": 0.8130741094021344, "learning_rate": 9.715510059222275e-06, "loss": 0.4274, "step": 24640 }, { "epoch": 0.1971984224126207, "grad_norm": 0.6229825375871947, "learning_rate": 9.71504561286051e-06, "loss": 0.4206, "step": 24650 }, { "epoch": 0.1972784217726258, "grad_norm": 0.5794036048585276, "learning_rate": 9.714580798809783e-06, "loss": 0.409, "step": 24660 }, { "epoch": 0.19735842113263094, "grad_norm": 1.0670632240772226, "learning_rate": 9.714115617106337e-06, "loss": 0.4023, "step": 24670 }, { "epoch": 0.19743842049263605, "grad_norm": 0.8292739800997653, "learning_rate": 9.713650067786449e-06, "loss": 0.4257, "step": 24680 }, { "epoch": 0.19751841985264118, "grad_norm": 0.6335715087580069, "learning_rate": 9.713184150886424e-06, "loss": 0.4412, "step": 24690 }, { "epoch": 0.1975984192126463, "grad_norm": 0.7356748758325906, "learning_rate": 9.712717866442593e-06, "loss": 0.4038, "step": 24700 }, { "epoch": 0.19767841857265142, "grad_norm": 0.64982696169169, "learning_rate": 9.712251214491322e-06, "loss": 0.4182, "step": 24710 }, { "epoch": 0.19775841793265653, "grad_norm": 0.7195763141996282, "learning_rate": 9.711784195068998e-06, "loss": 0.4064, "step": 24720 }, { "epoch": 0.19783841729266166, "grad_norm": 0.6913336431886736, "learning_rate": 9.711316808212042e-06, "loss": 0.4177, "step": 24730 }, { "epoch": 0.19791841665266677, "grad_norm": 0.7101492382927275, "learning_rate": 9.7108490539569e-06, "loss": 0.4152, "step": 24740 }, { "epoch": 0.1979984160126719, "grad_norm": 0.671792737958965, "learning_rate": 9.710380932340047e-06, "loss": 0.3992, "step": 24750 }, { "epoch": 0.198078415372677, "grad_norm": 0.6807651050718, "learning_rate": 9.709912443397993e-06, "loss": 0.3981, "step": 24760 }, { "epoch": 0.19815841473268214, "grad_norm": 0.739702045506844, "learning_rate": 9.709443587167269e-06, "loss": 0.418, "step": 24770 }, { "epoch": 0.19823841409268725, "grad_norm": 0.6300815274589572, "learning_rate": 9.708974363684436e-06, "loss": 0.4114, "step": 24780 }, { "epoch": 0.19831841345269238, "grad_norm": 0.6981162896862142, "learning_rate": 9.708504772986089e-06, "loss": 0.3944, "step": 24790 }, { "epoch": 0.1983984128126975, "grad_norm": 0.6371814815117279, "learning_rate": 9.708034815108844e-06, "loss": 0.4506, "step": 24800 }, { "epoch": 0.19847841217270262, "grad_norm": 0.6741152201278648, "learning_rate": 9.70756449008935e-06, "loss": 0.4477, "step": 24810 }, { "epoch": 0.19855841153270773, "grad_norm": 0.6432518511905773, "learning_rate": 9.707093797964284e-06, "loss": 0.4343, "step": 24820 }, { "epoch": 0.19863841089271286, "grad_norm": 0.7435757739695158, "learning_rate": 9.706622738770354e-06, "loss": 0.422, "step": 24830 }, { "epoch": 0.19871841025271797, "grad_norm": 0.6620238258210652, "learning_rate": 9.70615131254429e-06, "loss": 0.4143, "step": 24840 }, { "epoch": 0.1987984096127231, "grad_norm": 0.6665616006636125, "learning_rate": 9.705679519322858e-06, "loss": 0.4172, "step": 24850 }, { "epoch": 0.1988784089727282, "grad_norm": 0.8998536257151302, "learning_rate": 9.705207359142848e-06, "loss": 0.4257, "step": 24860 }, { "epoch": 0.19895840833273334, "grad_norm": 0.654551981772404, "learning_rate": 9.70473483204108e-06, "loss": 0.4075, "step": 24870 }, { "epoch": 0.19903840769273845, "grad_norm": 0.6319103726736927, "learning_rate": 9.704261938054403e-06, "loss": 0.4317, "step": 24880 }, { "epoch": 0.19911840705274358, "grad_norm": 0.6777024599293059, "learning_rate": 9.703788677219695e-06, "loss": 0.4284, "step": 24890 }, { "epoch": 0.1991984064127487, "grad_norm": 0.7894041524752506, "learning_rate": 9.70331504957386e-06, "loss": 0.4171, "step": 24900 }, { "epoch": 0.19927840577275382, "grad_norm": 0.6620076712454602, "learning_rate": 9.702841055153834e-06, "loss": 0.3921, "step": 24910 }, { "epoch": 0.19935840513275893, "grad_norm": 0.580376667423271, "learning_rate": 9.702366693996578e-06, "loss": 0.3868, "step": 24920 }, { "epoch": 0.19943840449276407, "grad_norm": 0.6042236081467772, "learning_rate": 9.701891966139088e-06, "loss": 0.4042, "step": 24930 }, { "epoch": 0.19951840385276917, "grad_norm": 1.51825040352895, "learning_rate": 9.70141687161838e-06, "loss": 0.4035, "step": 24940 }, { "epoch": 0.1995984032127743, "grad_norm": 0.6055805717627498, "learning_rate": 9.700941410471505e-06, "loss": 0.4103, "step": 24950 }, { "epoch": 0.1996784025727794, "grad_norm": 0.6251199202569909, "learning_rate": 9.70046558273554e-06, "loss": 0.4256, "step": 24960 }, { "epoch": 0.19975840193278455, "grad_norm": 0.6377613882627591, "learning_rate": 9.699989388447589e-06, "loss": 0.4245, "step": 24970 }, { "epoch": 0.19983840129278965, "grad_norm": 0.6379566377705614, "learning_rate": 9.69951282764479e-06, "loss": 0.4315, "step": 24980 }, { "epoch": 0.1999184006527948, "grad_norm": 0.9381078013299984, "learning_rate": 9.699035900364304e-06, "loss": 0.4071, "step": 24990 }, { "epoch": 0.1999984000127999, "grad_norm": 0.7111229187670621, "learning_rate": 9.698558606643324e-06, "loss": 0.4082, "step": 25000 }, { "epoch": 0.20007839937280503, "grad_norm": 0.6305140819822208, "learning_rate": 9.698080946519069e-06, "loss": 0.3983, "step": 25010 }, { "epoch": 0.20015839873281013, "grad_norm": 0.763463341868667, "learning_rate": 9.697602920028789e-06, "loss": 0.427, "step": 25020 }, { "epoch": 0.20023839809281527, "grad_norm": 0.6587327038038345, "learning_rate": 9.69712452720976e-06, "loss": 0.4139, "step": 25030 }, { "epoch": 0.20031839745282037, "grad_norm": 0.7064612644129061, "learning_rate": 9.696645768099291e-06, "loss": 0.4311, "step": 25040 }, { "epoch": 0.2003983968128255, "grad_norm": 0.6534864379982582, "learning_rate": 9.696166642734713e-06, "loss": 0.3986, "step": 25050 }, { "epoch": 0.20047839617283061, "grad_norm": 0.6891395360801902, "learning_rate": 9.695687151153392e-06, "loss": 0.4307, "step": 25060 }, { "epoch": 0.20055839553283575, "grad_norm": 0.6827013819691701, "learning_rate": 9.695207293392718e-06, "loss": 0.4498, "step": 25070 }, { "epoch": 0.20063839489284085, "grad_norm": 0.7762025337597214, "learning_rate": 9.694727069490113e-06, "loss": 0.4415, "step": 25080 }, { "epoch": 0.200718394252846, "grad_norm": 0.6549365726257796, "learning_rate": 9.694246479483026e-06, "loss": 0.4326, "step": 25090 }, { "epoch": 0.2007983936128511, "grad_norm": 0.714601434408728, "learning_rate": 9.69376552340893e-06, "loss": 0.4352, "step": 25100 }, { "epoch": 0.20087839297285623, "grad_norm": 0.6128003512609382, "learning_rate": 9.693284201305337e-06, "loss": 0.4156, "step": 25110 }, { "epoch": 0.20095839233286134, "grad_norm": 0.6623836769821344, "learning_rate": 9.692802513209779e-06, "loss": 0.4065, "step": 25120 }, { "epoch": 0.20103839169286647, "grad_norm": 0.649563930553542, "learning_rate": 9.692320459159817e-06, "loss": 0.4361, "step": 25130 }, { "epoch": 0.20111839105287158, "grad_norm": 0.698301502784952, "learning_rate": 9.691838039193044e-06, "loss": 0.4208, "step": 25140 }, { "epoch": 0.2011983904128767, "grad_norm": 0.6699531796916289, "learning_rate": 9.69135525334708e-06, "loss": 0.4286, "step": 25150 }, { "epoch": 0.20127838977288182, "grad_norm": 0.6142924837340475, "learning_rate": 9.690872101659577e-06, "loss": 0.4331, "step": 25160 }, { "epoch": 0.20135838913288695, "grad_norm": 0.6237859834199788, "learning_rate": 9.690388584168207e-06, "loss": 0.3944, "step": 25170 }, { "epoch": 0.20143838849289206, "grad_norm": 0.6386731730781353, "learning_rate": 9.689904700910678e-06, "loss": 0.4374, "step": 25180 }, { "epoch": 0.2015183878528972, "grad_norm": 0.6125277657827306, "learning_rate": 9.689420451924724e-06, "loss": 0.407, "step": 25190 }, { "epoch": 0.2015983872129023, "grad_norm": 0.601779457014253, "learning_rate": 9.688935837248108e-06, "loss": 0.4166, "step": 25200 }, { "epoch": 0.2016783865729074, "grad_norm": 0.6075468850546083, "learning_rate": 9.68845085691862e-06, "loss": 0.3965, "step": 25210 }, { "epoch": 0.20175838593291254, "grad_norm": 0.8623427849308739, "learning_rate": 9.687965510974084e-06, "loss": 0.4182, "step": 25220 }, { "epoch": 0.20183838529291764, "grad_norm": 0.6673438820193229, "learning_rate": 9.687479799452343e-06, "loss": 0.415, "step": 25230 }, { "epoch": 0.20191838465292278, "grad_norm": 0.6328262055012936, "learning_rate": 9.686993722391276e-06, "loss": 0.4177, "step": 25240 }, { "epoch": 0.20199838401292788, "grad_norm": 3.300000228367903, "learning_rate": 9.686507279828788e-06, "loss": 0.427, "step": 25250 }, { "epoch": 0.20207838337293302, "grad_norm": 0.6722836305760621, "learning_rate": 9.686020471802813e-06, "loss": 0.418, "step": 25260 }, { "epoch": 0.20215838273293812, "grad_norm": 0.6649171938113846, "learning_rate": 9.685533298351316e-06, "loss": 0.433, "step": 25270 }, { "epoch": 0.20223838209294326, "grad_norm": 0.6984104783862518, "learning_rate": 9.685045759512281e-06, "loss": 0.438, "step": 25280 }, { "epoch": 0.20231838145294836, "grad_norm": 0.665593691881875, "learning_rate": 9.684557855323732e-06, "loss": 0.4179, "step": 25290 }, { "epoch": 0.2023983808129535, "grad_norm": 0.6015792980706866, "learning_rate": 9.684069585823719e-06, "loss": 0.4296, "step": 25300 }, { "epoch": 0.2024783801729586, "grad_norm": 0.6705350049049668, "learning_rate": 9.683580951050313e-06, "loss": 0.4193, "step": 25310 }, { "epoch": 0.20255837953296374, "grad_norm": 0.7440029947278692, "learning_rate": 9.683091951041621e-06, "loss": 0.403, "step": 25320 }, { "epoch": 0.20263837889296885, "grad_norm": 0.7434955531734009, "learning_rate": 9.682602585835779e-06, "loss": 0.4479, "step": 25330 }, { "epoch": 0.20271837825297398, "grad_norm": 0.7070503200276549, "learning_rate": 9.682112855470945e-06, "loss": 0.3928, "step": 25340 }, { "epoch": 0.20279837761297909, "grad_norm": 0.6621517425767705, "learning_rate": 9.681622759985307e-06, "loss": 0.4094, "step": 25350 }, { "epoch": 0.20287837697298422, "grad_norm": 0.6213498078895016, "learning_rate": 9.68113229941709e-06, "loss": 0.43, "step": 25360 }, { "epoch": 0.20295837633298933, "grad_norm": 0.7552929204390433, "learning_rate": 9.680641473804537e-06, "loss": 0.3871, "step": 25370 }, { "epoch": 0.20303837569299446, "grad_norm": 0.6871622678287057, "learning_rate": 9.680150283185926e-06, "loss": 0.4031, "step": 25380 }, { "epoch": 0.20311837505299957, "grad_norm": 0.7670288423252903, "learning_rate": 9.679658727599558e-06, "loss": 0.4167, "step": 25390 }, { "epoch": 0.2031983744130047, "grad_norm": 0.6267504495725282, "learning_rate": 9.679166807083769e-06, "loss": 0.4272, "step": 25400 }, { "epoch": 0.2032783737730098, "grad_norm": 0.5870101578123149, "learning_rate": 9.678674521676916e-06, "loss": 0.3962, "step": 25410 }, { "epoch": 0.20335837313301494, "grad_norm": 0.6225919010719906, "learning_rate": 9.678181871417392e-06, "loss": 0.3927, "step": 25420 }, { "epoch": 0.20343837249302005, "grad_norm": 0.6275468502063714, "learning_rate": 9.677688856343614e-06, "loss": 0.4192, "step": 25430 }, { "epoch": 0.20351837185302518, "grad_norm": 0.6181877107774313, "learning_rate": 9.677195476494026e-06, "loss": 0.4139, "step": 25440 }, { "epoch": 0.2035983712130303, "grad_norm": 0.5781839341790055, "learning_rate": 9.676701731907105e-06, "loss": 0.4056, "step": 25450 }, { "epoch": 0.20367837057303542, "grad_norm": 0.7063683269980677, "learning_rate": 9.676207622621356e-06, "loss": 0.4216, "step": 25460 }, { "epoch": 0.20375836993304053, "grad_norm": 0.6208883323446223, "learning_rate": 9.675713148675306e-06, "loss": 0.4142, "step": 25470 }, { "epoch": 0.20383836929304566, "grad_norm": 0.6480111786970353, "learning_rate": 9.67521831010752e-06, "loss": 0.4638, "step": 25480 }, { "epoch": 0.20391836865305077, "grad_norm": 0.5812761157729379, "learning_rate": 9.67472310695658e-06, "loss": 0.4213, "step": 25490 }, { "epoch": 0.2039983680130559, "grad_norm": 0.6718604515779194, "learning_rate": 9.67422753926111e-06, "loss": 0.4327, "step": 25500 }, { "epoch": 0.204078367373061, "grad_norm": 0.606450307105251, "learning_rate": 9.673731607059753e-06, "loss": 0.3643, "step": 25510 }, { "epoch": 0.20415836673306614, "grad_norm": 0.7275879657214983, "learning_rate": 9.673235310391181e-06, "loss": 0.416, "step": 25520 }, { "epoch": 0.20423836609307125, "grad_norm": 0.6570071206739083, "learning_rate": 9.672738649294097e-06, "loss": 0.4206, "step": 25530 }, { "epoch": 0.20431836545307638, "grad_norm": 0.5906165501102874, "learning_rate": 9.672241623807235e-06, "loss": 0.4184, "step": 25540 }, { "epoch": 0.2043983648130815, "grad_norm": 0.6764123846489025, "learning_rate": 9.671744233969348e-06, "loss": 0.428, "step": 25550 }, { "epoch": 0.20447836417308662, "grad_norm": 0.6930407644945245, "learning_rate": 9.671246479819227e-06, "loss": 0.4104, "step": 25560 }, { "epoch": 0.20455836353309173, "grad_norm": 0.5907015226814545, "learning_rate": 9.67074836139569e-06, "loss": 0.4214, "step": 25570 }, { "epoch": 0.20463836289309686, "grad_norm": 0.5431345805115071, "learning_rate": 9.670249878737577e-06, "loss": 0.41, "step": 25580 }, { "epoch": 0.20471836225310197, "grad_norm": 0.7363298130924799, "learning_rate": 9.669751031883763e-06, "loss": 0.4121, "step": 25590 }, { "epoch": 0.2047983616131071, "grad_norm": 0.6126140166117313, "learning_rate": 9.66925182087315e-06, "loss": 0.4059, "step": 25600 }, { "epoch": 0.2048783609731122, "grad_norm": 0.6877595485978726, "learning_rate": 9.668752245744663e-06, "loss": 0.4101, "step": 25610 }, { "epoch": 0.20495836033311735, "grad_norm": 0.6647184626576979, "learning_rate": 9.668252306537265e-06, "loss": 0.4255, "step": 25620 }, { "epoch": 0.20503835969312245, "grad_norm": 0.6395230377657424, "learning_rate": 9.667752003289942e-06, "loss": 0.3986, "step": 25630 }, { "epoch": 0.20511835905312759, "grad_norm": 0.6233392406621595, "learning_rate": 9.667251336041705e-06, "loss": 0.4443, "step": 25640 }, { "epoch": 0.2051983584131327, "grad_norm": 0.7400259356926424, "learning_rate": 9.666750304831598e-06, "loss": 0.4066, "step": 25650 }, { "epoch": 0.20527835777313783, "grad_norm": 0.5812309212173733, "learning_rate": 9.666248909698696e-06, "loss": 0.4224, "step": 25660 }, { "epoch": 0.20535835713314293, "grad_norm": 0.5943178961082685, "learning_rate": 9.665747150682096e-06, "loss": 0.4246, "step": 25670 }, { "epoch": 0.20543835649314807, "grad_norm": 0.7352988371726545, "learning_rate": 9.665245027820926e-06, "loss": 0.4396, "step": 25680 }, { "epoch": 0.20551835585315317, "grad_norm": 0.6837848805371955, "learning_rate": 9.664742541154345e-06, "loss": 0.4363, "step": 25690 }, { "epoch": 0.2055983552131583, "grad_norm": 0.6563290907421928, "learning_rate": 9.664239690721533e-06, "loss": 0.4384, "step": 25700 }, { "epoch": 0.2056783545731634, "grad_norm": 0.6383060240790711, "learning_rate": 9.663736476561707e-06, "loss": 0.406, "step": 25710 }, { "epoch": 0.20575835393316855, "grad_norm": 0.6296300203735304, "learning_rate": 9.66323289871411e-06, "loss": 0.437, "step": 25720 }, { "epoch": 0.20583835329317365, "grad_norm": 0.6938264457299264, "learning_rate": 9.662728957218008e-06, "loss": 0.4306, "step": 25730 }, { "epoch": 0.2059183526531788, "grad_norm": 0.603255645750617, "learning_rate": 9.662224652112703e-06, "loss": 0.4406, "step": 25740 }, { "epoch": 0.2059983520131839, "grad_norm": 0.6231985172341633, "learning_rate": 9.661719983437522e-06, "loss": 0.4094, "step": 25750 }, { "epoch": 0.20607835137318903, "grad_norm": 0.6779602236489993, "learning_rate": 9.661214951231815e-06, "loss": 0.4107, "step": 25760 }, { "epoch": 0.20615835073319413, "grad_norm": 0.8550254467386939, "learning_rate": 9.660709555534971e-06, "loss": 0.407, "step": 25770 }, { "epoch": 0.20623835009319924, "grad_norm": 0.5880575248946542, "learning_rate": 9.660203796386399e-06, "loss": 0.4278, "step": 25780 }, { "epoch": 0.20631834945320437, "grad_norm": 0.6626003955892934, "learning_rate": 9.659697673825538e-06, "loss": 0.4215, "step": 25790 }, { "epoch": 0.20639834881320948, "grad_norm": 0.6744334757014303, "learning_rate": 9.659191187891861e-06, "loss": 0.4067, "step": 25800 }, { "epoch": 0.20647834817321462, "grad_norm": 0.6596575467807398, "learning_rate": 9.658684338624862e-06, "loss": 0.3902, "step": 25810 }, { "epoch": 0.20655834753321972, "grad_norm": 0.6243270667412644, "learning_rate": 9.658177126064065e-06, "loss": 0.4047, "step": 25820 }, { "epoch": 0.20663834689322486, "grad_norm": 0.7486859230872163, "learning_rate": 9.657669550249024e-06, "loss": 0.4339, "step": 25830 }, { "epoch": 0.20671834625322996, "grad_norm": 0.6064459205599133, "learning_rate": 9.657161611219323e-06, "loss": 0.4158, "step": 25840 }, { "epoch": 0.2067983456132351, "grad_norm": 0.7153943385453182, "learning_rate": 9.656653309014572e-06, "loss": 0.4321, "step": 25850 }, { "epoch": 0.2068783449732402, "grad_norm": 0.7249220756469105, "learning_rate": 9.656144643674405e-06, "loss": 0.4077, "step": 25860 }, { "epoch": 0.20695834433324534, "grad_norm": 0.7028141304156097, "learning_rate": 9.655635615238494e-06, "loss": 0.4279, "step": 25870 }, { "epoch": 0.20703834369325044, "grad_norm": 1.3788002651953202, "learning_rate": 9.65512622374653e-06, "loss": 0.4338, "step": 25880 }, { "epoch": 0.20711834305325558, "grad_norm": 0.639479061561768, "learning_rate": 9.654616469238241e-06, "loss": 0.4126, "step": 25890 }, { "epoch": 0.20719834241326068, "grad_norm": 0.609807082437189, "learning_rate": 9.654106351753373e-06, "loss": 0.431, "step": 25900 }, { "epoch": 0.20727834177326582, "grad_norm": 0.5201524993179082, "learning_rate": 9.653595871331712e-06, "loss": 0.4005, "step": 25910 }, { "epoch": 0.20735834113327092, "grad_norm": 0.643849745976512, "learning_rate": 9.653085028013063e-06, "loss": 0.4093, "step": 25920 }, { "epoch": 0.20743834049327606, "grad_norm": 0.7133161807382631, "learning_rate": 9.652573821837262e-06, "loss": 0.4229, "step": 25930 }, { "epoch": 0.20751833985328116, "grad_norm": 0.5936675930972536, "learning_rate": 9.652062252844174e-06, "loss": 0.4194, "step": 25940 }, { "epoch": 0.2075983392132863, "grad_norm": 0.7618304557371944, "learning_rate": 9.651550321073694e-06, "loss": 0.4266, "step": 25950 }, { "epoch": 0.2076783385732914, "grad_norm": 0.7217181850538551, "learning_rate": 9.651038026565744e-06, "loss": 0.3873, "step": 25960 }, { "epoch": 0.20775833793329654, "grad_norm": 0.7635321989136754, "learning_rate": 9.65052536936027e-06, "loss": 0.406, "step": 25970 }, { "epoch": 0.20783833729330164, "grad_norm": 0.7632772185036838, "learning_rate": 9.650012349497255e-06, "loss": 0.4177, "step": 25980 }, { "epoch": 0.20791833665330678, "grad_norm": 0.6428791222236218, "learning_rate": 9.6494989670167e-06, "loss": 0.4264, "step": 25990 }, { "epoch": 0.20799833601331189, "grad_norm": 0.6761107443674549, "learning_rate": 9.648985221958644e-06, "loss": 0.4168, "step": 26000 } ], "logging_steps": 10, "max_steps": 125001, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1503332426153984.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }