distilBERT_m / trainer_state.json
Farzana89's picture
your-username/banglabert-small-finetuned
8cd915a verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 30.0,
"eval_steps": 500,
"global_step": 66150,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.22675736961451248,
"grad_norm": 4.094242572784424,
"learning_rate": 1.984913076341648e-05,
"loss": 0.9959,
"step": 500
},
{
"epoch": 0.45351473922902497,
"grad_norm": 4.962518215179443,
"learning_rate": 1.969795918367347e-05,
"loss": 0.8762,
"step": 1000
},
{
"epoch": 0.6802721088435374,
"grad_norm": 4.5195441246032715,
"learning_rate": 1.954678760393046e-05,
"loss": 0.8285,
"step": 1500
},
{
"epoch": 0.9070294784580499,
"grad_norm": 3.6667134761810303,
"learning_rate": 1.9395616024187454e-05,
"loss": 0.8241,
"step": 2000
},
{
"epoch": 1.1337868480725624,
"grad_norm": 3.35432505607605,
"learning_rate": 1.9244444444444444e-05,
"loss": 0.7984,
"step": 2500
},
{
"epoch": 1.3605442176870748,
"grad_norm": 5.972733974456787,
"learning_rate": 1.9093272864701437e-05,
"loss": 0.757,
"step": 3000
},
{
"epoch": 1.5873015873015874,
"grad_norm": 5.633714199066162,
"learning_rate": 1.894210128495843e-05,
"loss": 0.7392,
"step": 3500
},
{
"epoch": 1.8140589569160999,
"grad_norm": 7.9989333152771,
"learning_rate": 1.8790929705215423e-05,
"loss": 0.7412,
"step": 4000
},
{
"epoch": 2.0408163265306123,
"grad_norm": 4.803481101989746,
"learning_rate": 1.8639758125472413e-05,
"loss": 0.7267,
"step": 4500
},
{
"epoch": 2.2675736961451247,
"grad_norm": 7.628437519073486,
"learning_rate": 1.8488586545729402e-05,
"loss": 0.6747,
"step": 5000
},
{
"epoch": 2.494331065759637,
"grad_norm": 7.200098991394043,
"learning_rate": 1.8337414965986395e-05,
"loss": 0.6842,
"step": 5500
},
{
"epoch": 2.7210884353741496,
"grad_norm": 7.570149898529053,
"learning_rate": 1.8186243386243388e-05,
"loss": 0.6995,
"step": 6000
},
{
"epoch": 2.947845804988662,
"grad_norm": 8.058834075927734,
"learning_rate": 1.8035071806500378e-05,
"loss": 0.6947,
"step": 6500
},
{
"epoch": 3.1746031746031744,
"grad_norm": 5.649857997894287,
"learning_rate": 1.788390022675737e-05,
"loss": 0.6568,
"step": 7000
},
{
"epoch": 3.4013605442176873,
"grad_norm": 5.62798547744751,
"learning_rate": 1.7732728647014364e-05,
"loss": 0.6328,
"step": 7500
},
{
"epoch": 3.6281179138321997,
"grad_norm": 7.817151069641113,
"learning_rate": 1.7581557067271357e-05,
"loss": 0.6355,
"step": 8000
},
{
"epoch": 3.854875283446712,
"grad_norm": 7.009090423583984,
"learning_rate": 1.7430385487528347e-05,
"loss": 0.6347,
"step": 8500
},
{
"epoch": 4.081632653061225,
"grad_norm": 5.835013389587402,
"learning_rate": 1.7279213907785336e-05,
"loss": 0.6233,
"step": 9000
},
{
"epoch": 4.308390022675737,
"grad_norm": 8.56863021850586,
"learning_rate": 1.712804232804233e-05,
"loss": 0.578,
"step": 9500
},
{
"epoch": 4.535147392290249,
"grad_norm": 6.346089839935303,
"learning_rate": 1.6976870748299322e-05,
"loss": 0.5871,
"step": 10000
},
{
"epoch": 4.761904761904762,
"grad_norm": 6.353672504425049,
"learning_rate": 1.6825699168556312e-05,
"loss": 0.5998,
"step": 10500
},
{
"epoch": 4.988662131519274,
"grad_norm": 13.262858390808105,
"learning_rate": 1.6674527588813305e-05,
"loss": 0.5868,
"step": 11000
},
{
"epoch": 5.215419501133787,
"grad_norm": 8.374523162841797,
"learning_rate": 1.6523356009070298e-05,
"loss": 0.5356,
"step": 11500
},
{
"epoch": 5.442176870748299,
"grad_norm": 6.546370506286621,
"learning_rate": 1.6372184429327287e-05,
"loss": 0.5511,
"step": 12000
},
{
"epoch": 5.668934240362812,
"grad_norm": 14.608261108398438,
"learning_rate": 1.622101284958428e-05,
"loss": 0.5387,
"step": 12500
},
{
"epoch": 5.895691609977324,
"grad_norm": 9.447562217712402,
"learning_rate": 1.606984126984127e-05,
"loss": 0.561,
"step": 13000
},
{
"epoch": 6.122448979591836,
"grad_norm": 8.588167190551758,
"learning_rate": 1.5918669690098263e-05,
"loss": 0.5072,
"step": 13500
},
{
"epoch": 6.349206349206349,
"grad_norm": 10.386740684509277,
"learning_rate": 1.5767498110355256e-05,
"loss": 0.5107,
"step": 14000
},
{
"epoch": 6.575963718820862,
"grad_norm": 19.57710838317871,
"learning_rate": 1.5616326530612246e-05,
"loss": 0.4949,
"step": 14500
},
{
"epoch": 6.802721088435375,
"grad_norm": 5.1340508460998535,
"learning_rate": 1.546515495086924e-05,
"loss": 0.4991,
"step": 15000
},
{
"epoch": 7.029478458049887,
"grad_norm": 8.451451301574707,
"learning_rate": 1.531398337112623e-05,
"loss": 0.513,
"step": 15500
},
{
"epoch": 7.2562358276643995,
"grad_norm": 13.097034454345703,
"learning_rate": 1.5162811791383221e-05,
"loss": 0.457,
"step": 16000
},
{
"epoch": 7.482993197278912,
"grad_norm": 14.697782516479492,
"learning_rate": 1.5011640211640213e-05,
"loss": 0.4611,
"step": 16500
},
{
"epoch": 7.709750566893424,
"grad_norm": 8.722723960876465,
"learning_rate": 1.4860468631897204e-05,
"loss": 0.461,
"step": 17000
},
{
"epoch": 7.936507936507937,
"grad_norm": 15.735103607177734,
"learning_rate": 1.4709297052154197e-05,
"loss": 0.4655,
"step": 17500
},
{
"epoch": 8.16326530612245,
"grad_norm": 14.541454315185547,
"learning_rate": 1.4558125472411188e-05,
"loss": 0.43,
"step": 18000
},
{
"epoch": 8.390022675736962,
"grad_norm": 11.13815975189209,
"learning_rate": 1.4406953892668178e-05,
"loss": 0.435,
"step": 18500
},
{
"epoch": 8.616780045351474,
"grad_norm": 20.309188842773438,
"learning_rate": 1.4255782312925171e-05,
"loss": 0.4303,
"step": 19000
},
{
"epoch": 8.843537414965986,
"grad_norm": 13.516115188598633,
"learning_rate": 1.4104610733182162e-05,
"loss": 0.4234,
"step": 19500
},
{
"epoch": 9.070294784580499,
"grad_norm": 10.709738731384277,
"learning_rate": 1.3953439153439154e-05,
"loss": 0.4287,
"step": 20000
},
{
"epoch": 9.297052154195011,
"grad_norm": 10.075858116149902,
"learning_rate": 1.3802267573696147e-05,
"loss": 0.3874,
"step": 20500
},
{
"epoch": 9.523809523809524,
"grad_norm": 13.06119441986084,
"learning_rate": 1.3651095993953138e-05,
"loss": 0.4131,
"step": 21000
},
{
"epoch": 9.750566893424036,
"grad_norm": 20.51653289794922,
"learning_rate": 1.3499924414210131e-05,
"loss": 0.4012,
"step": 21500
},
{
"epoch": 9.977324263038549,
"grad_norm": 10.614886283874512,
"learning_rate": 1.334875283446712e-05,
"loss": 0.4019,
"step": 22000
},
{
"epoch": 10.204081632653061,
"grad_norm": 11.375739097595215,
"learning_rate": 1.3197581254724112e-05,
"loss": 0.3722,
"step": 22500
},
{
"epoch": 10.430839002267573,
"grad_norm": 11.405980110168457,
"learning_rate": 1.3046409674981105e-05,
"loss": 0.3649,
"step": 23000
},
{
"epoch": 10.657596371882086,
"grad_norm": 9.21558952331543,
"learning_rate": 1.2895238095238096e-05,
"loss": 0.3639,
"step": 23500
},
{
"epoch": 10.884353741496598,
"grad_norm": 6.192341327667236,
"learning_rate": 1.2744066515495088e-05,
"loss": 0.3828,
"step": 24000
},
{
"epoch": 11.11111111111111,
"grad_norm": 9.45790958404541,
"learning_rate": 1.259289493575208e-05,
"loss": 0.3741,
"step": 24500
},
{
"epoch": 11.337868480725623,
"grad_norm": 7.998142242431641,
"learning_rate": 1.2441723356009072e-05,
"loss": 0.3433,
"step": 25000
},
{
"epoch": 11.564625850340136,
"grad_norm": 27.37310791015625,
"learning_rate": 1.2290551776266062e-05,
"loss": 0.3331,
"step": 25500
},
{
"epoch": 11.791383219954648,
"grad_norm": 16.175437927246094,
"learning_rate": 1.2139380196523055e-05,
"loss": 0.3465,
"step": 26000
},
{
"epoch": 12.01814058956916,
"grad_norm": 9.284594535827637,
"learning_rate": 1.1988208616780046e-05,
"loss": 0.3534,
"step": 26500
},
{
"epoch": 12.244897959183673,
"grad_norm": 11.269328117370605,
"learning_rate": 1.1837037037037037e-05,
"loss": 0.3101,
"step": 27000
},
{
"epoch": 12.471655328798185,
"grad_norm": 17.147966384887695,
"learning_rate": 1.168586545729403e-05,
"loss": 0.3088,
"step": 27500
},
{
"epoch": 12.698412698412698,
"grad_norm": 11.016709327697754,
"learning_rate": 1.1534693877551022e-05,
"loss": 0.3286,
"step": 28000
},
{
"epoch": 12.92517006802721,
"grad_norm": 18.88196563720703,
"learning_rate": 1.1383522297808015e-05,
"loss": 0.3238,
"step": 28500
},
{
"epoch": 13.151927437641723,
"grad_norm": 13.807648658752441,
"learning_rate": 1.1232350718065004e-05,
"loss": 0.3108,
"step": 29000
},
{
"epoch": 13.378684807256235,
"grad_norm": 12.66650676727295,
"learning_rate": 1.1081179138321996e-05,
"loss": 0.2994,
"step": 29500
},
{
"epoch": 13.60544217687075,
"grad_norm": 14.468998908996582,
"learning_rate": 1.0930007558578989e-05,
"loss": 0.293,
"step": 30000
},
{
"epoch": 13.83219954648526,
"grad_norm": 9.742269515991211,
"learning_rate": 1.077883597883598e-05,
"loss": 0.3078,
"step": 30500
},
{
"epoch": 14.058956916099774,
"grad_norm": 2.813500165939331,
"learning_rate": 1.0627664399092971e-05,
"loss": 0.2985,
"step": 31000
},
{
"epoch": 14.285714285714286,
"grad_norm": 6.373344421386719,
"learning_rate": 1.0476492819349964e-05,
"loss": 0.2772,
"step": 31500
},
{
"epoch": 14.512471655328799,
"grad_norm": 21.58708953857422,
"learning_rate": 1.0325321239606956e-05,
"loss": 0.2833,
"step": 32000
},
{
"epoch": 14.739229024943311,
"grad_norm": 15.204211235046387,
"learning_rate": 1.0174149659863945e-05,
"loss": 0.2888,
"step": 32500
},
{
"epoch": 14.965986394557824,
"grad_norm": 26.945823669433594,
"learning_rate": 1.0022978080120938e-05,
"loss": 0.2924,
"step": 33000
},
{
"epoch": 15.192743764172336,
"grad_norm": 22.14579963684082,
"learning_rate": 9.87180650037793e-06,
"loss": 0.2666,
"step": 33500
},
{
"epoch": 15.419501133786849,
"grad_norm": 11.540060997009277,
"learning_rate": 9.720634920634921e-06,
"loss": 0.2731,
"step": 34000
},
{
"epoch": 15.646258503401361,
"grad_norm": 18.65288734436035,
"learning_rate": 9.569463340891914e-06,
"loss": 0.259,
"step": 34500
},
{
"epoch": 15.873015873015873,
"grad_norm": 29.307865142822266,
"learning_rate": 9.418291761148905e-06,
"loss": 0.2727,
"step": 35000
},
{
"epoch": 16.099773242630384,
"grad_norm": 20.7984561920166,
"learning_rate": 9.267120181405897e-06,
"loss": 0.2565,
"step": 35500
},
{
"epoch": 16.3265306122449,
"grad_norm": 1.1995394229888916,
"learning_rate": 9.115948601662888e-06,
"loss": 0.2474,
"step": 36000
},
{
"epoch": 16.55328798185941,
"grad_norm": 14.748051643371582,
"learning_rate": 8.964777021919879e-06,
"loss": 0.2548,
"step": 36500
},
{
"epoch": 16.780045351473923,
"grad_norm": 15.29910945892334,
"learning_rate": 8.81360544217687e-06,
"loss": 0.2666,
"step": 37000
},
{
"epoch": 17.006802721088434,
"grad_norm": 14.932755470275879,
"learning_rate": 8.662433862433863e-06,
"loss": 0.2523,
"step": 37500
},
{
"epoch": 17.233560090702948,
"grad_norm": 16.09978485107422,
"learning_rate": 8.511262282690855e-06,
"loss": 0.2383,
"step": 38000
},
{
"epoch": 17.46031746031746,
"grad_norm": 21.41301918029785,
"learning_rate": 8.360090702947846e-06,
"loss": 0.2449,
"step": 38500
},
{
"epoch": 17.687074829931973,
"grad_norm": 46.11956024169922,
"learning_rate": 8.208919123204837e-06,
"loss": 0.2449,
"step": 39000
},
{
"epoch": 17.913832199546484,
"grad_norm": 21.357784271240234,
"learning_rate": 8.05774754346183e-06,
"loss": 0.243,
"step": 39500
},
{
"epoch": 18.140589569160998,
"grad_norm": 24.270694732666016,
"learning_rate": 7.906575963718822e-06,
"loss": 0.2392,
"step": 40000
},
{
"epoch": 18.367346938775512,
"grad_norm": 17.150836944580078,
"learning_rate": 7.755404383975813e-06,
"loss": 0.2166,
"step": 40500
},
{
"epoch": 18.594104308390023,
"grad_norm": 2.3719019889831543,
"learning_rate": 7.604232804232805e-06,
"loss": 0.2441,
"step": 41000
},
{
"epoch": 18.820861678004537,
"grad_norm": 23.74626922607422,
"learning_rate": 7.4530612244897974e-06,
"loss": 0.2499,
"step": 41500
},
{
"epoch": 19.047619047619047,
"grad_norm": 23.06348419189453,
"learning_rate": 7.301889644746788e-06,
"loss": 0.2335,
"step": 42000
},
{
"epoch": 19.27437641723356,
"grad_norm": 36.47420120239258,
"learning_rate": 7.15071806500378e-06,
"loss": 0.2246,
"step": 42500
},
{
"epoch": 19.501133786848072,
"grad_norm": 7.2201714515686035,
"learning_rate": 6.999546485260772e-06,
"loss": 0.2251,
"step": 43000
},
{
"epoch": 19.727891156462587,
"grad_norm": 21.442813873291016,
"learning_rate": 6.848374905517763e-06,
"loss": 0.222,
"step": 43500
},
{
"epoch": 19.954648526077097,
"grad_norm": 18.4246826171875,
"learning_rate": 6.697203325774755e-06,
"loss": 0.2281,
"step": 44000
},
{
"epoch": 20.18140589569161,
"grad_norm": 11.556923866271973,
"learning_rate": 6.546031746031747e-06,
"loss": 0.2158,
"step": 44500
},
{
"epoch": 20.408163265306122,
"grad_norm": 2.7828309535980225,
"learning_rate": 6.3948601662887375e-06,
"loss": 0.2146,
"step": 45000
},
{
"epoch": 20.634920634920636,
"grad_norm": 14.234125137329102,
"learning_rate": 6.24368858654573e-06,
"loss": 0.2131,
"step": 45500
},
{
"epoch": 20.861678004535147,
"grad_norm": 11.016937255859375,
"learning_rate": 6.092517006802722e-06,
"loss": 0.221,
"step": 46000
},
{
"epoch": 21.08843537414966,
"grad_norm": 24.453733444213867,
"learning_rate": 5.941345427059714e-06,
"loss": 0.2075,
"step": 46500
},
{
"epoch": 21.31519274376417,
"grad_norm": 7.180193901062012,
"learning_rate": 5.7901738473167045e-06,
"loss": 0.2038,
"step": 47000
},
{
"epoch": 21.541950113378686,
"grad_norm": 8.576226234436035,
"learning_rate": 5.639002267573697e-06,
"loss": 0.1919,
"step": 47500
},
{
"epoch": 21.768707482993197,
"grad_norm": 14.872846603393555,
"learning_rate": 5.487830687830689e-06,
"loss": 0.2169,
"step": 48000
},
{
"epoch": 21.99546485260771,
"grad_norm": 27.958250045776367,
"learning_rate": 5.336659108087679e-06,
"loss": 0.2178,
"step": 48500
},
{
"epoch": 22.22222222222222,
"grad_norm": 24.300565719604492,
"learning_rate": 5.1854875283446715e-06,
"loss": 0.1931,
"step": 49000
},
{
"epoch": 22.448979591836736,
"grad_norm": 21.32301902770996,
"learning_rate": 5.034315948601664e-06,
"loss": 0.1916,
"step": 49500
},
{
"epoch": 22.675736961451246,
"grad_norm": 1.0707628726959229,
"learning_rate": 4.883144368858655e-06,
"loss": 0.2076,
"step": 50000
},
{
"epoch": 22.90249433106576,
"grad_norm": 19.261188507080078,
"learning_rate": 4.731972789115646e-06,
"loss": 0.2121,
"step": 50500
},
{
"epoch": 23.12925170068027,
"grad_norm": 20.699966430664062,
"learning_rate": 4.5808012093726385e-06,
"loss": 0.1849,
"step": 51000
},
{
"epoch": 23.356009070294785,
"grad_norm": 21.104005813598633,
"learning_rate": 4.42962962962963e-06,
"loss": 0.1884,
"step": 51500
},
{
"epoch": 23.582766439909296,
"grad_norm": 28.356447219848633,
"learning_rate": 4.278458049886622e-06,
"loss": 0.1878,
"step": 52000
},
{
"epoch": 23.80952380952381,
"grad_norm": 24.488407135009766,
"learning_rate": 4.127286470143613e-06,
"loss": 0.2097,
"step": 52500
},
{
"epoch": 24.03628117913832,
"grad_norm": 25.003814697265625,
"learning_rate": 3.9761148904006054e-06,
"loss": 0.2065,
"step": 53000
},
{
"epoch": 24.263038548752835,
"grad_norm": 14.80716323852539,
"learning_rate": 3.824943310657597e-06,
"loss": 0.1935,
"step": 53500
},
{
"epoch": 24.489795918367346,
"grad_norm": 6.110260486602783,
"learning_rate": 3.673771730914588e-06,
"loss": 0.1807,
"step": 54000
},
{
"epoch": 24.71655328798186,
"grad_norm": 0.7426683902740479,
"learning_rate": 3.5226001511715803e-06,
"loss": 0.1946,
"step": 54500
},
{
"epoch": 24.94331065759637,
"grad_norm": 16.315649032592773,
"learning_rate": 3.3714285714285716e-06,
"loss": 0.1762,
"step": 55000
},
{
"epoch": 25.170068027210885,
"grad_norm": 1.4113119840621948,
"learning_rate": 3.2202569916855637e-06,
"loss": 0.1828,
"step": 55500
},
{
"epoch": 25.396825396825395,
"grad_norm": 11.793295860290527,
"learning_rate": 3.069085411942555e-06,
"loss": 0.1724,
"step": 56000
},
{
"epoch": 25.62358276643991,
"grad_norm": 35.62842559814453,
"learning_rate": 2.9179138321995464e-06,
"loss": 0.1979,
"step": 56500
},
{
"epoch": 25.85034013605442,
"grad_norm": 7.971707820892334,
"learning_rate": 2.7667422524565386e-06,
"loss": 0.1948,
"step": 57000
},
{
"epoch": 26.077097505668934,
"grad_norm": 42.05970001220703,
"learning_rate": 2.61557067271353e-06,
"loss": 0.19,
"step": 57500
},
{
"epoch": 26.303854875283445,
"grad_norm": 4.295804500579834,
"learning_rate": 2.4643990929705216e-06,
"loss": 0.183,
"step": 58000
},
{
"epoch": 26.53061224489796,
"grad_norm": 55.50460433959961,
"learning_rate": 2.3132275132275134e-06,
"loss": 0.1723,
"step": 58500
},
{
"epoch": 26.75736961451247,
"grad_norm": 6.339694023132324,
"learning_rate": 2.162055933484505e-06,
"loss": 0.1795,
"step": 59000
},
{
"epoch": 26.984126984126984,
"grad_norm": 12.868098258972168,
"learning_rate": 2.010884353741497e-06,
"loss": 0.1708,
"step": 59500
},
{
"epoch": 27.2108843537415,
"grad_norm": 11.474719047546387,
"learning_rate": 1.8597127739984886e-06,
"loss": 0.1813,
"step": 60000
},
{
"epoch": 27.43764172335601,
"grad_norm": 18.244434356689453,
"learning_rate": 1.70854119425548e-06,
"loss": 0.1729,
"step": 60500
},
{
"epoch": 27.664399092970523,
"grad_norm": 1.7608823776245117,
"learning_rate": 1.5573696145124717e-06,
"loss": 0.1703,
"step": 61000
},
{
"epoch": 27.891156462585034,
"grad_norm": 33.47771453857422,
"learning_rate": 1.4061980347694634e-06,
"loss": 0.1583,
"step": 61500
},
{
"epoch": 28.117913832199548,
"grad_norm": 2.692805528640747,
"learning_rate": 1.2550264550264552e-06,
"loss": 0.1768,
"step": 62000
},
{
"epoch": 28.34467120181406,
"grad_norm": 6.285675048828125,
"learning_rate": 1.103854875283447e-06,
"loss": 0.1735,
"step": 62500
},
{
"epoch": 28.571428571428573,
"grad_norm": 8.573649406433105,
"learning_rate": 9.526832955404384e-07,
"loss": 0.1807,
"step": 63000
},
{
"epoch": 28.798185941043084,
"grad_norm": 6.0096049308776855,
"learning_rate": 8.015117157974302e-07,
"loss": 0.1614,
"step": 63500
},
{
"epoch": 29.024943310657598,
"grad_norm": 15.904170989990234,
"learning_rate": 6.503401360544217e-07,
"loss": 0.1673,
"step": 64000
},
{
"epoch": 29.25170068027211,
"grad_norm": 44.96242141723633,
"learning_rate": 4.991685563114135e-07,
"loss": 0.1695,
"step": 64500
},
{
"epoch": 29.478458049886623,
"grad_norm": 22.98110008239746,
"learning_rate": 3.479969765684052e-07,
"loss": 0.1583,
"step": 65000
},
{
"epoch": 29.705215419501133,
"grad_norm": 10.874411582946777,
"learning_rate": 1.9682539682539684e-07,
"loss": 0.1829,
"step": 65500
},
{
"epoch": 29.931972789115648,
"grad_norm": 25.05258560180664,
"learning_rate": 4.565381708238851e-08,
"loss": 0.1582,
"step": 66000
},
{
"epoch": 30.0,
"step": 66150,
"total_flos": 7781556721720320.0,
"train_loss": 0.35355195213427437,
"train_runtime": 4324.2133,
"train_samples_per_second": 244.664,
"train_steps_per_second": 15.298
}
],
"logging_steps": 500,
"max_steps": 66150,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 7781556721720320.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}