| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 30.0, |
| "eval_steps": 500, |
| "global_step": 66150, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.22675736961451248, |
| "grad_norm": 4.094242572784424, |
| "learning_rate": 1.984913076341648e-05, |
| "loss": 0.9959, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.45351473922902497, |
| "grad_norm": 4.962518215179443, |
| "learning_rate": 1.969795918367347e-05, |
| "loss": 0.8762, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6802721088435374, |
| "grad_norm": 4.5195441246032715, |
| "learning_rate": 1.954678760393046e-05, |
| "loss": 0.8285, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.9070294784580499, |
| "grad_norm": 3.6667134761810303, |
| "learning_rate": 1.9395616024187454e-05, |
| "loss": 0.8241, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.1337868480725624, |
| "grad_norm": 3.35432505607605, |
| "learning_rate": 1.9244444444444444e-05, |
| "loss": 0.7984, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.3605442176870748, |
| "grad_norm": 5.972733974456787, |
| "learning_rate": 1.9093272864701437e-05, |
| "loss": 0.757, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.5873015873015874, |
| "grad_norm": 5.633714199066162, |
| "learning_rate": 1.894210128495843e-05, |
| "loss": 0.7392, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.8140589569160999, |
| "grad_norm": 7.9989333152771, |
| "learning_rate": 1.8790929705215423e-05, |
| "loss": 0.7412, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.0408163265306123, |
| "grad_norm": 4.803481101989746, |
| "learning_rate": 1.8639758125472413e-05, |
| "loss": 0.7267, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.2675736961451247, |
| "grad_norm": 7.628437519073486, |
| "learning_rate": 1.8488586545729402e-05, |
| "loss": 0.6747, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.494331065759637, |
| "grad_norm": 7.200098991394043, |
| "learning_rate": 1.8337414965986395e-05, |
| "loss": 0.6842, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.7210884353741496, |
| "grad_norm": 7.570149898529053, |
| "learning_rate": 1.8186243386243388e-05, |
| "loss": 0.6995, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.947845804988662, |
| "grad_norm": 8.058834075927734, |
| "learning_rate": 1.8035071806500378e-05, |
| "loss": 0.6947, |
| "step": 6500 |
| }, |
| { |
| "epoch": 3.1746031746031744, |
| "grad_norm": 5.649857997894287, |
| "learning_rate": 1.788390022675737e-05, |
| "loss": 0.6568, |
| "step": 7000 |
| }, |
| { |
| "epoch": 3.4013605442176873, |
| "grad_norm": 5.62798547744751, |
| "learning_rate": 1.7732728647014364e-05, |
| "loss": 0.6328, |
| "step": 7500 |
| }, |
| { |
| "epoch": 3.6281179138321997, |
| "grad_norm": 7.817151069641113, |
| "learning_rate": 1.7581557067271357e-05, |
| "loss": 0.6355, |
| "step": 8000 |
| }, |
| { |
| "epoch": 3.854875283446712, |
| "grad_norm": 7.009090423583984, |
| "learning_rate": 1.7430385487528347e-05, |
| "loss": 0.6347, |
| "step": 8500 |
| }, |
| { |
| "epoch": 4.081632653061225, |
| "grad_norm": 5.835013389587402, |
| "learning_rate": 1.7279213907785336e-05, |
| "loss": 0.6233, |
| "step": 9000 |
| }, |
| { |
| "epoch": 4.308390022675737, |
| "grad_norm": 8.56863021850586, |
| "learning_rate": 1.712804232804233e-05, |
| "loss": 0.578, |
| "step": 9500 |
| }, |
| { |
| "epoch": 4.535147392290249, |
| "grad_norm": 6.346089839935303, |
| "learning_rate": 1.6976870748299322e-05, |
| "loss": 0.5871, |
| "step": 10000 |
| }, |
| { |
| "epoch": 4.761904761904762, |
| "grad_norm": 6.353672504425049, |
| "learning_rate": 1.6825699168556312e-05, |
| "loss": 0.5998, |
| "step": 10500 |
| }, |
| { |
| "epoch": 4.988662131519274, |
| "grad_norm": 13.262858390808105, |
| "learning_rate": 1.6674527588813305e-05, |
| "loss": 0.5868, |
| "step": 11000 |
| }, |
| { |
| "epoch": 5.215419501133787, |
| "grad_norm": 8.374523162841797, |
| "learning_rate": 1.6523356009070298e-05, |
| "loss": 0.5356, |
| "step": 11500 |
| }, |
| { |
| "epoch": 5.442176870748299, |
| "grad_norm": 6.546370506286621, |
| "learning_rate": 1.6372184429327287e-05, |
| "loss": 0.5511, |
| "step": 12000 |
| }, |
| { |
| "epoch": 5.668934240362812, |
| "grad_norm": 14.608261108398438, |
| "learning_rate": 1.622101284958428e-05, |
| "loss": 0.5387, |
| "step": 12500 |
| }, |
| { |
| "epoch": 5.895691609977324, |
| "grad_norm": 9.447562217712402, |
| "learning_rate": 1.606984126984127e-05, |
| "loss": 0.561, |
| "step": 13000 |
| }, |
| { |
| "epoch": 6.122448979591836, |
| "grad_norm": 8.588167190551758, |
| "learning_rate": 1.5918669690098263e-05, |
| "loss": 0.5072, |
| "step": 13500 |
| }, |
| { |
| "epoch": 6.349206349206349, |
| "grad_norm": 10.386740684509277, |
| "learning_rate": 1.5767498110355256e-05, |
| "loss": 0.5107, |
| "step": 14000 |
| }, |
| { |
| "epoch": 6.575963718820862, |
| "grad_norm": 19.57710838317871, |
| "learning_rate": 1.5616326530612246e-05, |
| "loss": 0.4949, |
| "step": 14500 |
| }, |
| { |
| "epoch": 6.802721088435375, |
| "grad_norm": 5.1340508460998535, |
| "learning_rate": 1.546515495086924e-05, |
| "loss": 0.4991, |
| "step": 15000 |
| }, |
| { |
| "epoch": 7.029478458049887, |
| "grad_norm": 8.451451301574707, |
| "learning_rate": 1.531398337112623e-05, |
| "loss": 0.513, |
| "step": 15500 |
| }, |
| { |
| "epoch": 7.2562358276643995, |
| "grad_norm": 13.097034454345703, |
| "learning_rate": 1.5162811791383221e-05, |
| "loss": 0.457, |
| "step": 16000 |
| }, |
| { |
| "epoch": 7.482993197278912, |
| "grad_norm": 14.697782516479492, |
| "learning_rate": 1.5011640211640213e-05, |
| "loss": 0.4611, |
| "step": 16500 |
| }, |
| { |
| "epoch": 7.709750566893424, |
| "grad_norm": 8.722723960876465, |
| "learning_rate": 1.4860468631897204e-05, |
| "loss": 0.461, |
| "step": 17000 |
| }, |
| { |
| "epoch": 7.936507936507937, |
| "grad_norm": 15.735103607177734, |
| "learning_rate": 1.4709297052154197e-05, |
| "loss": 0.4655, |
| "step": 17500 |
| }, |
| { |
| "epoch": 8.16326530612245, |
| "grad_norm": 14.541454315185547, |
| "learning_rate": 1.4558125472411188e-05, |
| "loss": 0.43, |
| "step": 18000 |
| }, |
| { |
| "epoch": 8.390022675736962, |
| "grad_norm": 11.13815975189209, |
| "learning_rate": 1.4406953892668178e-05, |
| "loss": 0.435, |
| "step": 18500 |
| }, |
| { |
| "epoch": 8.616780045351474, |
| "grad_norm": 20.309188842773438, |
| "learning_rate": 1.4255782312925171e-05, |
| "loss": 0.4303, |
| "step": 19000 |
| }, |
| { |
| "epoch": 8.843537414965986, |
| "grad_norm": 13.516115188598633, |
| "learning_rate": 1.4104610733182162e-05, |
| "loss": 0.4234, |
| "step": 19500 |
| }, |
| { |
| "epoch": 9.070294784580499, |
| "grad_norm": 10.709738731384277, |
| "learning_rate": 1.3953439153439154e-05, |
| "loss": 0.4287, |
| "step": 20000 |
| }, |
| { |
| "epoch": 9.297052154195011, |
| "grad_norm": 10.075858116149902, |
| "learning_rate": 1.3802267573696147e-05, |
| "loss": 0.3874, |
| "step": 20500 |
| }, |
| { |
| "epoch": 9.523809523809524, |
| "grad_norm": 13.06119441986084, |
| "learning_rate": 1.3651095993953138e-05, |
| "loss": 0.4131, |
| "step": 21000 |
| }, |
| { |
| "epoch": 9.750566893424036, |
| "grad_norm": 20.51653289794922, |
| "learning_rate": 1.3499924414210131e-05, |
| "loss": 0.4012, |
| "step": 21500 |
| }, |
| { |
| "epoch": 9.977324263038549, |
| "grad_norm": 10.614886283874512, |
| "learning_rate": 1.334875283446712e-05, |
| "loss": 0.4019, |
| "step": 22000 |
| }, |
| { |
| "epoch": 10.204081632653061, |
| "grad_norm": 11.375739097595215, |
| "learning_rate": 1.3197581254724112e-05, |
| "loss": 0.3722, |
| "step": 22500 |
| }, |
| { |
| "epoch": 10.430839002267573, |
| "grad_norm": 11.405980110168457, |
| "learning_rate": 1.3046409674981105e-05, |
| "loss": 0.3649, |
| "step": 23000 |
| }, |
| { |
| "epoch": 10.657596371882086, |
| "grad_norm": 9.21558952331543, |
| "learning_rate": 1.2895238095238096e-05, |
| "loss": 0.3639, |
| "step": 23500 |
| }, |
| { |
| "epoch": 10.884353741496598, |
| "grad_norm": 6.192341327667236, |
| "learning_rate": 1.2744066515495088e-05, |
| "loss": 0.3828, |
| "step": 24000 |
| }, |
| { |
| "epoch": 11.11111111111111, |
| "grad_norm": 9.45790958404541, |
| "learning_rate": 1.259289493575208e-05, |
| "loss": 0.3741, |
| "step": 24500 |
| }, |
| { |
| "epoch": 11.337868480725623, |
| "grad_norm": 7.998142242431641, |
| "learning_rate": 1.2441723356009072e-05, |
| "loss": 0.3433, |
| "step": 25000 |
| }, |
| { |
| "epoch": 11.564625850340136, |
| "grad_norm": 27.37310791015625, |
| "learning_rate": 1.2290551776266062e-05, |
| "loss": 0.3331, |
| "step": 25500 |
| }, |
| { |
| "epoch": 11.791383219954648, |
| "grad_norm": 16.175437927246094, |
| "learning_rate": 1.2139380196523055e-05, |
| "loss": 0.3465, |
| "step": 26000 |
| }, |
| { |
| "epoch": 12.01814058956916, |
| "grad_norm": 9.284594535827637, |
| "learning_rate": 1.1988208616780046e-05, |
| "loss": 0.3534, |
| "step": 26500 |
| }, |
| { |
| "epoch": 12.244897959183673, |
| "grad_norm": 11.269328117370605, |
| "learning_rate": 1.1837037037037037e-05, |
| "loss": 0.3101, |
| "step": 27000 |
| }, |
| { |
| "epoch": 12.471655328798185, |
| "grad_norm": 17.147966384887695, |
| "learning_rate": 1.168586545729403e-05, |
| "loss": 0.3088, |
| "step": 27500 |
| }, |
| { |
| "epoch": 12.698412698412698, |
| "grad_norm": 11.016709327697754, |
| "learning_rate": 1.1534693877551022e-05, |
| "loss": 0.3286, |
| "step": 28000 |
| }, |
| { |
| "epoch": 12.92517006802721, |
| "grad_norm": 18.88196563720703, |
| "learning_rate": 1.1383522297808015e-05, |
| "loss": 0.3238, |
| "step": 28500 |
| }, |
| { |
| "epoch": 13.151927437641723, |
| "grad_norm": 13.807648658752441, |
| "learning_rate": 1.1232350718065004e-05, |
| "loss": 0.3108, |
| "step": 29000 |
| }, |
| { |
| "epoch": 13.378684807256235, |
| "grad_norm": 12.66650676727295, |
| "learning_rate": 1.1081179138321996e-05, |
| "loss": 0.2994, |
| "step": 29500 |
| }, |
| { |
| "epoch": 13.60544217687075, |
| "grad_norm": 14.468998908996582, |
| "learning_rate": 1.0930007558578989e-05, |
| "loss": 0.293, |
| "step": 30000 |
| }, |
| { |
| "epoch": 13.83219954648526, |
| "grad_norm": 9.742269515991211, |
| "learning_rate": 1.077883597883598e-05, |
| "loss": 0.3078, |
| "step": 30500 |
| }, |
| { |
| "epoch": 14.058956916099774, |
| "grad_norm": 2.813500165939331, |
| "learning_rate": 1.0627664399092971e-05, |
| "loss": 0.2985, |
| "step": 31000 |
| }, |
| { |
| "epoch": 14.285714285714286, |
| "grad_norm": 6.373344421386719, |
| "learning_rate": 1.0476492819349964e-05, |
| "loss": 0.2772, |
| "step": 31500 |
| }, |
| { |
| "epoch": 14.512471655328799, |
| "grad_norm": 21.58708953857422, |
| "learning_rate": 1.0325321239606956e-05, |
| "loss": 0.2833, |
| "step": 32000 |
| }, |
| { |
| "epoch": 14.739229024943311, |
| "grad_norm": 15.204211235046387, |
| "learning_rate": 1.0174149659863945e-05, |
| "loss": 0.2888, |
| "step": 32500 |
| }, |
| { |
| "epoch": 14.965986394557824, |
| "grad_norm": 26.945823669433594, |
| "learning_rate": 1.0022978080120938e-05, |
| "loss": 0.2924, |
| "step": 33000 |
| }, |
| { |
| "epoch": 15.192743764172336, |
| "grad_norm": 22.14579963684082, |
| "learning_rate": 9.87180650037793e-06, |
| "loss": 0.2666, |
| "step": 33500 |
| }, |
| { |
| "epoch": 15.419501133786849, |
| "grad_norm": 11.540060997009277, |
| "learning_rate": 9.720634920634921e-06, |
| "loss": 0.2731, |
| "step": 34000 |
| }, |
| { |
| "epoch": 15.646258503401361, |
| "grad_norm": 18.65288734436035, |
| "learning_rate": 9.569463340891914e-06, |
| "loss": 0.259, |
| "step": 34500 |
| }, |
| { |
| "epoch": 15.873015873015873, |
| "grad_norm": 29.307865142822266, |
| "learning_rate": 9.418291761148905e-06, |
| "loss": 0.2727, |
| "step": 35000 |
| }, |
| { |
| "epoch": 16.099773242630384, |
| "grad_norm": 20.7984561920166, |
| "learning_rate": 9.267120181405897e-06, |
| "loss": 0.2565, |
| "step": 35500 |
| }, |
| { |
| "epoch": 16.3265306122449, |
| "grad_norm": 1.1995394229888916, |
| "learning_rate": 9.115948601662888e-06, |
| "loss": 0.2474, |
| "step": 36000 |
| }, |
| { |
| "epoch": 16.55328798185941, |
| "grad_norm": 14.748051643371582, |
| "learning_rate": 8.964777021919879e-06, |
| "loss": 0.2548, |
| "step": 36500 |
| }, |
| { |
| "epoch": 16.780045351473923, |
| "grad_norm": 15.29910945892334, |
| "learning_rate": 8.81360544217687e-06, |
| "loss": 0.2666, |
| "step": 37000 |
| }, |
| { |
| "epoch": 17.006802721088434, |
| "grad_norm": 14.932755470275879, |
| "learning_rate": 8.662433862433863e-06, |
| "loss": 0.2523, |
| "step": 37500 |
| }, |
| { |
| "epoch": 17.233560090702948, |
| "grad_norm": 16.09978485107422, |
| "learning_rate": 8.511262282690855e-06, |
| "loss": 0.2383, |
| "step": 38000 |
| }, |
| { |
| "epoch": 17.46031746031746, |
| "grad_norm": 21.41301918029785, |
| "learning_rate": 8.360090702947846e-06, |
| "loss": 0.2449, |
| "step": 38500 |
| }, |
| { |
| "epoch": 17.687074829931973, |
| "grad_norm": 46.11956024169922, |
| "learning_rate": 8.208919123204837e-06, |
| "loss": 0.2449, |
| "step": 39000 |
| }, |
| { |
| "epoch": 17.913832199546484, |
| "grad_norm": 21.357784271240234, |
| "learning_rate": 8.05774754346183e-06, |
| "loss": 0.243, |
| "step": 39500 |
| }, |
| { |
| "epoch": 18.140589569160998, |
| "grad_norm": 24.270694732666016, |
| "learning_rate": 7.906575963718822e-06, |
| "loss": 0.2392, |
| "step": 40000 |
| }, |
| { |
| "epoch": 18.367346938775512, |
| "grad_norm": 17.150836944580078, |
| "learning_rate": 7.755404383975813e-06, |
| "loss": 0.2166, |
| "step": 40500 |
| }, |
| { |
| "epoch": 18.594104308390023, |
| "grad_norm": 2.3719019889831543, |
| "learning_rate": 7.604232804232805e-06, |
| "loss": 0.2441, |
| "step": 41000 |
| }, |
| { |
| "epoch": 18.820861678004537, |
| "grad_norm": 23.74626922607422, |
| "learning_rate": 7.4530612244897974e-06, |
| "loss": 0.2499, |
| "step": 41500 |
| }, |
| { |
| "epoch": 19.047619047619047, |
| "grad_norm": 23.06348419189453, |
| "learning_rate": 7.301889644746788e-06, |
| "loss": 0.2335, |
| "step": 42000 |
| }, |
| { |
| "epoch": 19.27437641723356, |
| "grad_norm": 36.47420120239258, |
| "learning_rate": 7.15071806500378e-06, |
| "loss": 0.2246, |
| "step": 42500 |
| }, |
| { |
| "epoch": 19.501133786848072, |
| "grad_norm": 7.2201714515686035, |
| "learning_rate": 6.999546485260772e-06, |
| "loss": 0.2251, |
| "step": 43000 |
| }, |
| { |
| "epoch": 19.727891156462587, |
| "grad_norm": 21.442813873291016, |
| "learning_rate": 6.848374905517763e-06, |
| "loss": 0.222, |
| "step": 43500 |
| }, |
| { |
| "epoch": 19.954648526077097, |
| "grad_norm": 18.4246826171875, |
| "learning_rate": 6.697203325774755e-06, |
| "loss": 0.2281, |
| "step": 44000 |
| }, |
| { |
| "epoch": 20.18140589569161, |
| "grad_norm": 11.556923866271973, |
| "learning_rate": 6.546031746031747e-06, |
| "loss": 0.2158, |
| "step": 44500 |
| }, |
| { |
| "epoch": 20.408163265306122, |
| "grad_norm": 2.7828309535980225, |
| "learning_rate": 6.3948601662887375e-06, |
| "loss": 0.2146, |
| "step": 45000 |
| }, |
| { |
| "epoch": 20.634920634920636, |
| "grad_norm": 14.234125137329102, |
| "learning_rate": 6.24368858654573e-06, |
| "loss": 0.2131, |
| "step": 45500 |
| }, |
| { |
| "epoch": 20.861678004535147, |
| "grad_norm": 11.016937255859375, |
| "learning_rate": 6.092517006802722e-06, |
| "loss": 0.221, |
| "step": 46000 |
| }, |
| { |
| "epoch": 21.08843537414966, |
| "grad_norm": 24.453733444213867, |
| "learning_rate": 5.941345427059714e-06, |
| "loss": 0.2075, |
| "step": 46500 |
| }, |
| { |
| "epoch": 21.31519274376417, |
| "grad_norm": 7.180193901062012, |
| "learning_rate": 5.7901738473167045e-06, |
| "loss": 0.2038, |
| "step": 47000 |
| }, |
| { |
| "epoch": 21.541950113378686, |
| "grad_norm": 8.576226234436035, |
| "learning_rate": 5.639002267573697e-06, |
| "loss": 0.1919, |
| "step": 47500 |
| }, |
| { |
| "epoch": 21.768707482993197, |
| "grad_norm": 14.872846603393555, |
| "learning_rate": 5.487830687830689e-06, |
| "loss": 0.2169, |
| "step": 48000 |
| }, |
| { |
| "epoch": 21.99546485260771, |
| "grad_norm": 27.958250045776367, |
| "learning_rate": 5.336659108087679e-06, |
| "loss": 0.2178, |
| "step": 48500 |
| }, |
| { |
| "epoch": 22.22222222222222, |
| "grad_norm": 24.300565719604492, |
| "learning_rate": 5.1854875283446715e-06, |
| "loss": 0.1931, |
| "step": 49000 |
| }, |
| { |
| "epoch": 22.448979591836736, |
| "grad_norm": 21.32301902770996, |
| "learning_rate": 5.034315948601664e-06, |
| "loss": 0.1916, |
| "step": 49500 |
| }, |
| { |
| "epoch": 22.675736961451246, |
| "grad_norm": 1.0707628726959229, |
| "learning_rate": 4.883144368858655e-06, |
| "loss": 0.2076, |
| "step": 50000 |
| }, |
| { |
| "epoch": 22.90249433106576, |
| "grad_norm": 19.261188507080078, |
| "learning_rate": 4.731972789115646e-06, |
| "loss": 0.2121, |
| "step": 50500 |
| }, |
| { |
| "epoch": 23.12925170068027, |
| "grad_norm": 20.699966430664062, |
| "learning_rate": 4.5808012093726385e-06, |
| "loss": 0.1849, |
| "step": 51000 |
| }, |
| { |
| "epoch": 23.356009070294785, |
| "grad_norm": 21.104005813598633, |
| "learning_rate": 4.42962962962963e-06, |
| "loss": 0.1884, |
| "step": 51500 |
| }, |
| { |
| "epoch": 23.582766439909296, |
| "grad_norm": 28.356447219848633, |
| "learning_rate": 4.278458049886622e-06, |
| "loss": 0.1878, |
| "step": 52000 |
| }, |
| { |
| "epoch": 23.80952380952381, |
| "grad_norm": 24.488407135009766, |
| "learning_rate": 4.127286470143613e-06, |
| "loss": 0.2097, |
| "step": 52500 |
| }, |
| { |
| "epoch": 24.03628117913832, |
| "grad_norm": 25.003814697265625, |
| "learning_rate": 3.9761148904006054e-06, |
| "loss": 0.2065, |
| "step": 53000 |
| }, |
| { |
| "epoch": 24.263038548752835, |
| "grad_norm": 14.80716323852539, |
| "learning_rate": 3.824943310657597e-06, |
| "loss": 0.1935, |
| "step": 53500 |
| }, |
| { |
| "epoch": 24.489795918367346, |
| "grad_norm": 6.110260486602783, |
| "learning_rate": 3.673771730914588e-06, |
| "loss": 0.1807, |
| "step": 54000 |
| }, |
| { |
| "epoch": 24.71655328798186, |
| "grad_norm": 0.7426683902740479, |
| "learning_rate": 3.5226001511715803e-06, |
| "loss": 0.1946, |
| "step": 54500 |
| }, |
| { |
| "epoch": 24.94331065759637, |
| "grad_norm": 16.315649032592773, |
| "learning_rate": 3.3714285714285716e-06, |
| "loss": 0.1762, |
| "step": 55000 |
| }, |
| { |
| "epoch": 25.170068027210885, |
| "grad_norm": 1.4113119840621948, |
| "learning_rate": 3.2202569916855637e-06, |
| "loss": 0.1828, |
| "step": 55500 |
| }, |
| { |
| "epoch": 25.396825396825395, |
| "grad_norm": 11.793295860290527, |
| "learning_rate": 3.069085411942555e-06, |
| "loss": 0.1724, |
| "step": 56000 |
| }, |
| { |
| "epoch": 25.62358276643991, |
| "grad_norm": 35.62842559814453, |
| "learning_rate": 2.9179138321995464e-06, |
| "loss": 0.1979, |
| "step": 56500 |
| }, |
| { |
| "epoch": 25.85034013605442, |
| "grad_norm": 7.971707820892334, |
| "learning_rate": 2.7667422524565386e-06, |
| "loss": 0.1948, |
| "step": 57000 |
| }, |
| { |
| "epoch": 26.077097505668934, |
| "grad_norm": 42.05970001220703, |
| "learning_rate": 2.61557067271353e-06, |
| "loss": 0.19, |
| "step": 57500 |
| }, |
| { |
| "epoch": 26.303854875283445, |
| "grad_norm": 4.295804500579834, |
| "learning_rate": 2.4643990929705216e-06, |
| "loss": 0.183, |
| "step": 58000 |
| }, |
| { |
| "epoch": 26.53061224489796, |
| "grad_norm": 55.50460433959961, |
| "learning_rate": 2.3132275132275134e-06, |
| "loss": 0.1723, |
| "step": 58500 |
| }, |
| { |
| "epoch": 26.75736961451247, |
| "grad_norm": 6.339694023132324, |
| "learning_rate": 2.162055933484505e-06, |
| "loss": 0.1795, |
| "step": 59000 |
| }, |
| { |
| "epoch": 26.984126984126984, |
| "grad_norm": 12.868098258972168, |
| "learning_rate": 2.010884353741497e-06, |
| "loss": 0.1708, |
| "step": 59500 |
| }, |
| { |
| "epoch": 27.2108843537415, |
| "grad_norm": 11.474719047546387, |
| "learning_rate": 1.8597127739984886e-06, |
| "loss": 0.1813, |
| "step": 60000 |
| }, |
| { |
| "epoch": 27.43764172335601, |
| "grad_norm": 18.244434356689453, |
| "learning_rate": 1.70854119425548e-06, |
| "loss": 0.1729, |
| "step": 60500 |
| }, |
| { |
| "epoch": 27.664399092970523, |
| "grad_norm": 1.7608823776245117, |
| "learning_rate": 1.5573696145124717e-06, |
| "loss": 0.1703, |
| "step": 61000 |
| }, |
| { |
| "epoch": 27.891156462585034, |
| "grad_norm": 33.47771453857422, |
| "learning_rate": 1.4061980347694634e-06, |
| "loss": 0.1583, |
| "step": 61500 |
| }, |
| { |
| "epoch": 28.117913832199548, |
| "grad_norm": 2.692805528640747, |
| "learning_rate": 1.2550264550264552e-06, |
| "loss": 0.1768, |
| "step": 62000 |
| }, |
| { |
| "epoch": 28.34467120181406, |
| "grad_norm": 6.285675048828125, |
| "learning_rate": 1.103854875283447e-06, |
| "loss": 0.1735, |
| "step": 62500 |
| }, |
| { |
| "epoch": 28.571428571428573, |
| "grad_norm": 8.573649406433105, |
| "learning_rate": 9.526832955404384e-07, |
| "loss": 0.1807, |
| "step": 63000 |
| }, |
| { |
| "epoch": 28.798185941043084, |
| "grad_norm": 6.0096049308776855, |
| "learning_rate": 8.015117157974302e-07, |
| "loss": 0.1614, |
| "step": 63500 |
| }, |
| { |
| "epoch": 29.024943310657598, |
| "grad_norm": 15.904170989990234, |
| "learning_rate": 6.503401360544217e-07, |
| "loss": 0.1673, |
| "step": 64000 |
| }, |
| { |
| "epoch": 29.25170068027211, |
| "grad_norm": 44.96242141723633, |
| "learning_rate": 4.991685563114135e-07, |
| "loss": 0.1695, |
| "step": 64500 |
| }, |
| { |
| "epoch": 29.478458049886623, |
| "grad_norm": 22.98110008239746, |
| "learning_rate": 3.479969765684052e-07, |
| "loss": 0.1583, |
| "step": 65000 |
| }, |
| { |
| "epoch": 29.705215419501133, |
| "grad_norm": 10.874411582946777, |
| "learning_rate": 1.9682539682539684e-07, |
| "loss": 0.1829, |
| "step": 65500 |
| }, |
| { |
| "epoch": 29.931972789115648, |
| "grad_norm": 25.05258560180664, |
| "learning_rate": 4.565381708238851e-08, |
| "loss": 0.1582, |
| "step": 66000 |
| }, |
| { |
| "epoch": 30.0, |
| "step": 66150, |
| "total_flos": 7781556721720320.0, |
| "train_loss": 0.35355195213427437, |
| "train_runtime": 4324.2133, |
| "train_samples_per_second": 244.664, |
| "train_steps_per_second": 15.298 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 66150, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 30, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7781556721720320.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|