Kaylee0501's picture
Upload folder using huggingface_hub
8873149 verified
{
"best_global_step": 210,
"best_metric": 0.09402994,
"best_model_checkpoint": "/fsx-neo/dedicated-fsx-data-repo-neo-us-east-1/kayleexl/tree_reasoning/logical-reasoning/ms-swift/output_dpo/v7-20260217-182416/checkpoint-210",
"epoch": 1.985781990521327,
"eval_steps": 50,
"global_step": 210,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.009478672985781991,
"grad_norm": 19.55022430419922,
"learning_rate": 9.090909090909091e-06,
"logits/chosen": -0.8645371198654175,
"logits/rejected": -0.8560649752616882,
"logps/chosen": -212.84078979492188,
"logps/rejected": -181.89553833007812,
"loss": 1.9682148694992065,
"memory(GiB)": 153.35,
"nll_loss": 1.275067687034607,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1,
"train_speed(iter/s)": 0.001829
},
{
"epoch": 0.04739336492890995,
"grad_norm": 10.874584197998047,
"learning_rate": 4.545454545454546e-05,
"logits/chosen": -0.8818354606628418,
"logits/rejected": -0.8391438722610474,
"logps/chosen": -206.83299255371094,
"logps/rejected": -210.26193237304688,
"loss": 2.225569725036621,
"memory(GiB)": 171.5,
"nll_loss": 1.5596290826797485,
"rewards/accuracies": 0.390625,
"rewards/chosen": 0.42497575283050537,
"rewards/margins": 0.14753574132919312,
"rewards/rejected": 0.27743998169898987,
"step": 5,
"train_speed(iter/s)": 0.001962
},
{
"epoch": 0.0947867298578199,
"grad_norm": 3.7358806133270264,
"learning_rate": 9.090909090909092e-05,
"logits/chosen": -0.42944854497909546,
"logits/rejected": -0.4029228687286377,
"logps/chosen": -153.91217041015625,
"logps/rejected": -171.6427764892578,
"loss": 1.2244630813598634,
"memory(GiB)": 178.87,
"nll_loss": 0.9248598217964172,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": 8.403377532958984,
"rewards/margins": 3.649811267852783,
"rewards/rejected": 4.753565788269043,
"step": 10,
"train_speed(iter/s)": 0.002002
},
{
"epoch": 0.14218009478672985,
"grad_norm": 2.5456342697143555,
"learning_rate": 9.990034266657467e-05,
"logits/chosen": 0.14313745498657227,
"logits/rejected": 0.15165017545223236,
"logps/chosen": -64.2056655883789,
"logps/rejected": -108.23348236083984,
"loss": 0.9479263305664063,
"memory(GiB)": 178.87,
"nll_loss": 0.5506663918495178,
"rewards/accuracies": 0.8125,
"rewards/chosen": 13.307535171508789,
"rewards/margins": 4.5142388343811035,
"rewards/rejected": 8.793294906616211,
"step": 15,
"train_speed(iter/s)": 0.001977
},
{
"epoch": 0.1895734597156398,
"grad_norm": 9.239604949951172,
"learning_rate": 9.949616551002787e-05,
"logits/chosen": -0.7042765617370605,
"logits/rejected": -0.6620756983757019,
"logps/chosen": -57.892356872558594,
"logps/rejected": -133.2845916748047,
"loss": 0.6575197696685791,
"memory(GiB)": 178.87,
"nll_loss": 0.44857126474380493,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 15.717184066772461,
"rewards/margins": 7.5960211753845215,
"rewards/rejected": 8.121164321899414,
"step": 20,
"train_speed(iter/s)": 0.001969
},
{
"epoch": 0.23696682464454977,
"grad_norm": 3.75114369392395,
"learning_rate": 9.87837549867887e-05,
"logits/chosen": -0.5987659096717834,
"logits/rejected": -0.5469285249710083,
"logps/chosen": -39.35096740722656,
"logps/rejected": -128.31814575195312,
"loss": 0.4350598335266113,
"memory(GiB)": 178.87,
"nll_loss": 0.30207258462905884,
"rewards/accuracies": 0.9375,
"rewards/chosen": 19.038375854492188,
"rewards/margins": 10.453929901123047,
"rewards/rejected": 8.584444999694824,
"step": 25,
"train_speed(iter/s)": 0.001984
},
{
"epoch": 0.2843601895734597,
"grad_norm": 7.996090412139893,
"learning_rate": 9.776754757575975e-05,
"logits/chosen": -0.7030301094055176,
"logits/rejected": -0.6584943532943726,
"logps/chosen": -42.931304931640625,
"logps/rejected": -132.83807373046875,
"loss": 0.625270938873291,
"memory(GiB)": 178.87,
"nll_loss": 0.3963403105735779,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 16.53431510925293,
"rewards/margins": 9.225370407104492,
"rewards/rejected": 7.308945655822754,
"step": 30,
"train_speed(iter/s)": 0.001981
},
{
"epoch": 0.33175355450236965,
"grad_norm": 0.928902804851532,
"learning_rate": 9.645387162638652e-05,
"logits/chosen": -0.7767706513404846,
"logits/rejected": -0.7252510190010071,
"logps/chosen": -30.822132110595703,
"logps/rejected": -122.3298110961914,
"loss": 0.47859888076782225,
"memory(GiB)": 178.87,
"nll_loss": 0.25827503204345703,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 18.511274337768555,
"rewards/margins": 9.673491477966309,
"rewards/rejected": 8.837784767150879,
"step": 35,
"train_speed(iter/s)": 0.001982
},
{
"epoch": 0.3791469194312796,
"grad_norm": 4.358126163482666,
"learning_rate": 9.485090794937319e-05,
"logits/chosen": -0.8528544306755066,
"logits/rejected": -0.8117485046386719,
"logps/chosen": -28.26708984375,
"logps/rejected": -153.4051055908203,
"loss": 0.38582923412323,
"memory(GiB)": 178.87,
"nll_loss": 0.20131754875183105,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 19.518356323242188,
"rewards/margins": 13.451945304870605,
"rewards/rejected": 6.066409587860107,
"step": 40,
"train_speed(iter/s)": 0.001983
},
{
"epoch": 0.4265402843601896,
"grad_norm": 9.361577987670898,
"learning_rate": 9.29686388713456e-05,
"logits/chosen": -1.4655885696411133,
"logits/rejected": -1.4526116847991943,
"logps/chosen": -41.395565032958984,
"logps/rejected": -169.83302307128906,
"loss": 0.4372711658477783,
"memory(GiB)": 178.87,
"nll_loss": 0.3007845878601074,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 16.87301254272461,
"rewards/margins": 13.00146198272705,
"rewards/rejected": 3.871551513671875,
"step": 45,
"train_speed(iter/s)": 0.00198
},
{
"epoch": 0.47393364928909953,
"grad_norm": 1.3884881734848022,
"learning_rate": 9.081878607071996e-05,
"logits/chosen": -1.4089267253875732,
"logits/rejected": -1.3798249959945679,
"logps/chosen": -29.07427978515625,
"logps/rejected": -174.85606384277344,
"loss": 0.28701162338256836,
"memory(GiB)": 178.87,
"nll_loss": 0.17918026447296143,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 20.193706512451172,
"rewards/margins": 15.493553161621094,
"rewards/rejected": 4.700153350830078,
"step": 50,
"train_speed(iter/s)": 0.001981
},
{
"epoch": 0.47393364928909953,
"eval_logits/chosen": -1.2822269201278687,
"eval_logits/rejected": -1.255699634552002,
"eval_logps/chosen": -34.7473258972168,
"eval_logps/rejected": -143.84861755371094,
"eval_loss": 0.4424428939819336,
"eval_nll_loss": 0.2968122363090515,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 19.00676155090332,
"eval_rewards/margins": 12.395697593688965,
"eval_rewards/rejected": 6.611064910888672,
"eval_runtime": 230.2546,
"eval_samples_per_second": 0.074,
"eval_steps_per_second": 0.074,
"step": 50
},
{
"epoch": 0.5213270142180095,
"grad_norm": 1.1772722005844116,
"learning_rate": 8.841473758189854e-05,
"logits/chosen": -1.1759651899337769,
"logits/rejected": -1.1313683986663818,
"logps/chosen": -26.088220596313477,
"logps/rejected": -150.84393310546875,
"loss": 0.22870185375213622,
"memory(GiB)": 178.87,
"nll_loss": 0.16189467906951904,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 19.440641403198242,
"rewards/margins": 13.270869255065918,
"rewards/rejected": 6.169772148132324,
"step": 55,
"train_speed(iter/s)": 0.001958
},
{
"epoch": 0.5687203791469194,
"grad_norm": 2.0721075534820557,
"learning_rate": 8.577146442236857e-05,
"logits/chosen": -1.0945132970809937,
"logits/rejected": -1.060734748840332,
"logps/chosen": -22.90542984008789,
"logps/rejected": -128.8797607421875,
"loss": 0.24675798416137695,
"memory(GiB)": 178.87,
"nll_loss": 0.17432959377765656,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 17.570837020874023,
"rewards/margins": 11.27192497253418,
"rewards/rejected": 6.298913955688477,
"step": 60,
"train_speed(iter/s)": 0.00195
},
{
"epoch": 0.6161137440758294,
"grad_norm": 2.0549778938293457,
"learning_rate": 8.290542736190188e-05,
"logits/chosen": -1.080885648727417,
"logits/rejected": -1.057293176651001,
"logps/chosen": -17.660358428955078,
"logps/rejected": -124.87294006347656,
"loss": 0.36183264255523684,
"memory(GiB)": 178.87,
"nll_loss": 0.1854233592748642,
"rewards/accuracies": 0.9375,
"rewards/chosen": 18.208276748657227,
"rewards/margins": 11.001307487487793,
"rewards/rejected": 7.206968784332275,
"step": 65,
"train_speed(iter/s)": 0.001945
},
{
"epoch": 0.6635071090047393,
"grad_norm": 1.9049893617630005,
"learning_rate": 7.983447441444281e-05,
"logits/chosen": -1.4264296293258667,
"logits/rejected": -1.4030673503875732,
"logps/chosen": -22.567996978759766,
"logps/rejected": -163.80955505371094,
"loss": 0.2892845392227173,
"memory(GiB)": 178.87,
"nll_loss": 0.15383335947990417,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 18.27324676513672,
"rewards/margins": 15.088933944702148,
"rewards/rejected": 3.1843135356903076,
"step": 70,
"train_speed(iter/s)": 0.001941
},
{
"epoch": 0.7109004739336493,
"grad_norm": 1.0275962352752686,
"learning_rate": 7.657772969104508e-05,
"logits/chosen": -1.3237228393554688,
"logits/rejected": -1.3014802932739258,
"logps/chosen": -27.62123680114746,
"logps/rejected": -179.39981079101562,
"loss": 0.22349367141723633,
"memory(GiB)": 178.87,
"nll_loss": 0.16326689720153809,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 19.444076538085938,
"rewards/margins": 16.357398986816406,
"rewards/rejected": 3.0866756439208984,
"step": 75,
"train_speed(iter/s)": 0.001941
},
{
"epoch": 0.7582938388625592,
"grad_norm": 2.1720211505889893,
"learning_rate": 7.31554743060174e-05,
"logits/chosen": -0.9713658094406128,
"logits/rejected": -0.9438395500183105,
"logps/chosen": -21.5306396484375,
"logps/rejected": -158.5912628173828,
"loss": 0.2255859136581421,
"memory(GiB)": 178.87,
"nll_loss": 0.15225784480571747,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 20.218997955322266,
"rewards/margins": 14.551397323608398,
"rewards/rejected": 5.6676025390625,
"step": 80,
"train_speed(iter/s)": 0.001941
},
{
"epoch": 0.8056872037914692,
"grad_norm": 2.247673988342285,
"learning_rate": 6.958902007792466e-05,
"logits/chosen": -0.7944511771202087,
"logits/rejected": -0.7699103355407715,
"logps/chosen": -11.273658752441406,
"logps/rejected": -142.4789581298828,
"loss": 0.1783364772796631,
"memory(GiB)": 178.87,
"nll_loss": 0.10094492137432098,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 19.9717960357666,
"rewards/margins": 13.428759574890137,
"rewards/rejected": 6.543033599853516,
"step": 85,
"train_speed(iter/s)": 0.001943
},
{
"epoch": 0.8530805687203792,
"grad_norm": 5.502572059631348,
"learning_rate": 6.590057681196191e-05,
"logits/chosen": -0.7691094875335693,
"logits/rejected": -0.7428280711174011,
"logps/chosen": -16.701950073242188,
"logps/rejected": -178.28001403808594,
"loss": 0.21286754608154296,
"memory(GiB)": 178.87,
"nll_loss": 0.1458219736814499,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 18.113544464111328,
"rewards/margins": 14.766156196594238,
"rewards/rejected": 3.3473877906799316,
"step": 90,
"train_speed(iter/s)": 0.001941
},
{
"epoch": 0.9004739336492891,
"grad_norm": 6.771712779998779,
"learning_rate": 6.211311399018916e-05,
"logits/chosen": -1.2176296710968018,
"logits/rejected": -1.2004112005233765,
"logps/chosen": -10.625171661376953,
"logps/rejected": -197.5986785888672,
"loss": 0.14389824867248535,
"memory(GiB)": 178.87,
"nll_loss": 0.08421098440885544,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 21.306598663330078,
"rewards/margins": 20.043148040771484,
"rewards/rejected": 1.2634522914886475,
"step": 95,
"train_speed(iter/s)": 0.001943
},
{
"epoch": 0.9478672985781991,
"grad_norm": 0.7936939001083374,
"learning_rate": 5.8250217730939973e-05,
"logits/chosen": -1.2689450979232788,
"logits/rejected": -1.2652291059494019,
"logps/chosen": -24.885387420654297,
"logps/rejected": -166.68470764160156,
"loss": 0.22324090003967284,
"memory(GiB)": 178.87,
"nll_loss": 0.16993048787117004,
"rewards/accuracies": 1.0,
"rewards/chosen": 18.267993927001953,
"rewards/margins": 15.335573196411133,
"rewards/rejected": 2.932422637939453,
"step": 100,
"train_speed(iter/s)": 0.001939
},
{
"epoch": 0.9478672985781991,
"eval_logits/chosen": -1.276165246963501,
"eval_logits/rejected": -1.2667920589447021,
"eval_logps/chosen": -17.997478485107422,
"eval_logps/rejected": -172.5413818359375,
"eval_loss": 0.18138757348060608,
"eval_nll_loss": 0.15586450695991516,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 20.681747436523438,
"eval_rewards/margins": 16.939956665039062,
"eval_rewards/rejected": 3.741788625717163,
"eval_runtime": 230.8449,
"eval_samples_per_second": 0.074,
"eval_steps_per_second": 0.074,
"step": 100
},
{
"epoch": 0.995260663507109,
"grad_norm": 0.7884749174118042,
"learning_rate": 5.433594390817756e-05,
"logits/chosen": -1.217622995376587,
"logits/rejected": -1.2057679891586304,
"logps/chosen": -13.138340950012207,
"logps/rejected": -187.07447814941406,
"loss": 0.12456157207489013,
"memory(GiB)": 178.87,
"nll_loss": 0.09173186123371124,
"rewards/accuracies": 1.0,
"rewards/chosen": 19.977113723754883,
"rewards/margins": 18.163719177246094,
"rewards/rejected": 1.8133970499038696,
"step": 105,
"train_speed(iter/s)": 0.001931
},
{
"epoch": 1.037914691943128,
"grad_norm": 0.41725555062294006,
"learning_rate": 5.039466834548568e-05,
"logits/chosen": -1.0610564947128296,
"logits/rejected": -1.0492563247680664,
"logps/chosen": -17.22906494140625,
"logps/rejected": -178.35047912597656,
"loss": 0.10526471138000489,
"memory(GiB)": 178.87,
"nll_loss": 0.0985727533698082,
"rewards/accuracies": 1.0,
"rewards/chosen": 19.968387603759766,
"rewards/margins": 18.300251007080078,
"rewards/rejected": 1.6681346893310547,
"step": 110,
"train_speed(iter/s)": 0.001939
},
{
"epoch": 1.085308056872038,
"grad_norm": 0.5403364300727844,
"learning_rate": 4.64509350175992e-05,
"logits/chosen": -0.9350749254226685,
"logits/rejected": -0.9276103973388672,
"logps/chosen": -15.192606925964355,
"logps/rejected": -196.2861785888672,
"loss": 0.10088248252868652,
"memory(GiB)": 178.87,
"nll_loss": 0.09568502753973007,
"rewards/accuracies": 1.0,
"rewards/chosen": 20.20822525024414,
"rewards/margins": 17.932273864746094,
"rewards/rejected": 2.2759501934051514,
"step": 115,
"train_speed(iter/s)": 0.001939
},
{
"epoch": 1.132701421800948,
"grad_norm": 0.33638796210289,
"learning_rate": 4.2529303204786953e-05,
"logits/chosen": -0.8360708355903625,
"logits/rejected": -0.8255692720413208,
"logps/chosen": -12.213701248168945,
"logps/rejected": -178.4746856689453,
"loss": 0.08850648403167724,
"memory(GiB)": 178.87,
"nll_loss": 0.07898052781820297,
"rewards/accuracies": 1.0,
"rewards/chosen": 20.77065086364746,
"rewards/margins": 17.60364532470703,
"rewards/rejected": 3.167004346847534,
"step": 120,
"train_speed(iter/s)": 0.00194
},
{
"epoch": 1.180094786729858,
"grad_norm": 0.42006343603134155,
"learning_rate": 3.8654194551920485e-05,
"logits/chosen": -0.8648909330368042,
"logits/rejected": -0.8533682823181152,
"logps/chosen": -13.303857803344727,
"logps/rejected": -196.53375244140625,
"loss": 0.11154735088348389,
"memory(GiB)": 178.87,
"nll_loss": 0.09461511671543121,
"rewards/accuracies": 1.0,
"rewards/chosen": 19.808521270751953,
"rewards/margins": 18.673742294311523,
"rewards/rejected": 1.1347795724868774,
"step": 125,
"train_speed(iter/s)": 0.00194
},
{
"epoch": 1.2274881516587677,
"grad_norm": 0.4501703679561615,
"learning_rate": 3.484974098465636e-05,
"logits/chosen": -1.0564872026443481,
"logits/rejected": -1.0499274730682373,
"logps/chosen": -9.447141647338867,
"logps/rejected": -210.6065216064453,
"loss": 0.07890591621398926,
"memory(GiB)": 178.87,
"nll_loss": 0.0762052983045578,
"rewards/accuracies": 1.0,
"rewards/chosen": 20.350528717041016,
"rewards/margins": 20.427692413330078,
"rewards/rejected": -0.07716653496026993,
"step": 130,
"train_speed(iter/s)": 0.00194
},
{
"epoch": 1.2748815165876777,
"grad_norm": 0.48629000782966614,
"learning_rate": 3.11396344298212e-05,
"logits/chosen": -1.122717022895813,
"logits/rejected": -1.1127209663391113,
"logps/chosen": -8.969633102416992,
"logps/rejected": -178.16427612304688,
"loss": 0.07699697613716125,
"memory(GiB)": 178.87,
"nll_loss": 0.07132184505462646,
"rewards/accuracies": 1.0,
"rewards/chosen": 20.212081909179688,
"rewards/margins": 18.33711814880371,
"rewards/rejected": 1.8749620914459229,
"step": 135,
"train_speed(iter/s)": 0.001941
},
{
"epoch": 1.3222748815165877,
"grad_norm": 0.4852510094642639,
"learning_rate": 2.754697927585399e-05,
"logits/chosen": -1.0894103050231934,
"logits/rejected": -1.0850476026535034,
"logps/chosen": -13.88347053527832,
"logps/rejected": -190.9267120361328,
"loss": 0.11755204200744629,
"memory(GiB)": 178.87,
"nll_loss": 0.09814213216304779,
"rewards/accuracies": 1.0,
"rewards/chosen": 20.125789642333984,
"rewards/margins": 17.44692611694336,
"rewards/rejected": 2.678863048553467,
"step": 140,
"train_speed(iter/s)": 0.001938
},
{
"epoch": 1.3696682464454977,
"grad_norm": 1.0523611307144165,
"learning_rate": 2.4094148492096125e-05,
"logits/chosen": -1.18220055103302,
"logits/rejected": -1.183691382408142,
"logps/chosen": -11.095239639282227,
"logps/rejected": -180.09048461914062,
"loss": 0.07882866859436036,
"memory(GiB)": 178.87,
"nll_loss": 0.06894151866436005,
"rewards/accuracies": 1.0,
"rewards/chosen": 19.55763816833496,
"rewards/margins": 17.858949661254883,
"rewards/rejected": 1.6986896991729736,
"step": 145,
"train_speed(iter/s)": 0.001938
},
{
"epoch": 1.4170616113744074,
"grad_norm": 0.9743487238883972,
"learning_rate": 2.0802644302934683e-05,
"logits/chosen": -1.2402594089508057,
"logits/rejected": -1.229536771774292,
"logps/chosen": -10.528487205505371,
"logps/rejected": -173.75906372070312,
"loss": 0.05581583380699158,
"memory(GiB)": 178.87,
"nll_loss": 0.04851926118135452,
"rewards/accuracies": 1.0,
"rewards/chosen": 21.25326156616211,
"rewards/margins": 17.409027099609375,
"rewards/rejected": 3.84423565864563,
"step": 150,
"train_speed(iter/s)": 0.001939
},
{
"epoch": 1.4170616113744074,
"eval_logits/chosen": -1.2280174493789673,
"eval_logits/rejected": -1.2243937253952026,
"eval_logps/chosen": -11.513480186462402,
"eval_logps/rejected": -170.34632873535156,
"eval_loss": 0.11434541642665863,
"eval_nll_loss": 0.10074843466281891,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 21.33014678955078,
"eval_rewards/margins": 17.368852615356445,
"eval_rewards/rejected": 3.9612925052642822,
"eval_runtime": 228.4343,
"eval_samples_per_second": 0.074,
"eval_steps_per_second": 0.074,
"step": 150
},
{
"epoch": 1.4644549763033177,
"grad_norm": 1.9418814182281494,
"learning_rate": 1.7692964284439505e-05,
"logits/chosen": -1.2662538290023804,
"logits/rejected": -1.2612764835357666,
"logps/chosen": -8.698439598083496,
"logps/rejected": -195.09063720703125,
"loss": 0.07216010689735412,
"memory(GiB)": 178.87,
"nll_loss": 0.05235465615987778,
"rewards/accuracies": 1.0,
"rewards/chosen": 20.530902862548828,
"rewards/margins": 19.212444305419922,
"rewards/rejected": 1.3184587955474854,
"step": 155,
"train_speed(iter/s)": 0.001934
},
{
"epoch": 1.5118483412322274,
"grad_norm": 2.3436055183410645,
"learning_rate": 1.4784473717366387e-05,
"logits/chosen": -1.2591499090194702,
"logits/rejected": -1.2514972686767578,
"logps/chosen": -9.602866172790527,
"logps/rejected": -204.9786376953125,
"loss": 0.06997905969619751,
"memory(GiB)": 178.87,
"nll_loss": 0.0632362961769104,
"rewards/accuracies": 1.0,
"rewards/chosen": 20.80245590209961,
"rewards/margins": 19.767711639404297,
"rewards/rejected": 1.0347453355789185,
"step": 160,
"train_speed(iter/s)": 0.001935
},
{
"epoch": 1.5592417061611374,
"grad_norm": 0.4779791235923767,
"learning_rate": 1.2095284991437733e-05,
"logits/chosen": -1.2291038036346436,
"logits/rejected": -1.2240577936172485,
"logps/chosen": -12.217391014099121,
"logps/rejected": -200.1668243408203,
"loss": 0.0741503119468689,
"memory(GiB)": 178.87,
"nll_loss": 0.0690564215183258,
"rewards/accuracies": 1.0,
"rewards/chosen": 19.801494598388672,
"rewards/margins": 18.78908920288086,
"rewards/rejected": 1.0124043226242065,
"step": 165,
"train_speed(iter/s)": 0.001934
},
{
"epoch": 1.6066350710900474,
"grad_norm": 0.6247928142547607,
"learning_rate": 9.642144811900739e-06,
"logits/chosen": -1.225555181503296,
"logits/rejected": -1.2201206684112549,
"logps/chosen": -9.304153442382812,
"logps/rejected": -219.2526092529297,
"loss": 0.056455212831497195,
"memory(GiB)": 178.87,
"nll_loss": 0.04982581362128258,
"rewards/accuracies": 1.0,
"rewards/chosen": 21.17973518371582,
"rewards/margins": 21.15430450439453,
"rewards/rejected": 0.025428902357816696,
"step": 170,
"train_speed(iter/s)": 0.001935
},
{
"epoch": 1.6540284360189572,
"grad_norm": 0.7543458342552185,
"learning_rate": 7.440329910775273e-06,
"logits/chosen": -1.2147386074066162,
"logits/rejected": -1.2046931982040405,
"logps/chosen": -11.757909774780273,
"logps/rejected": -177.62388610839844,
"loss": 0.11545271873474121,
"memory(GiB)": 178.87,
"nll_loss": 0.0637175664305687,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 19.637483596801758,
"rewards/margins": 17.161773681640625,
"rewards/rejected": 2.4757096767425537,
"step": 175,
"train_speed(iter/s)": 0.001935
},
{
"epoch": 1.7014218009478674,
"grad_norm": 0.9932089447975159,
"learning_rate": 5.5035519122409895e-06,
"logits/chosen": -1.3096096515655518,
"logits/rejected": -1.3018414974212646,
"logps/chosen": -10.068361282348633,
"logps/rejected": -190.3312225341797,
"loss": 0.07646466493606567,
"memory(GiB)": 178.87,
"nll_loss": 0.07355433702468872,
"rewards/accuracies": 1.0,
"rewards/chosen": 19.979000091552734,
"rewards/margins": 18.556865692138672,
"rewards/rejected": 1.4221333265304565,
"step": 180,
"train_speed(iter/s)": 0.001934
},
{
"epoch": 1.7488151658767772,
"grad_norm": 1.022765040397644,
"learning_rate": 3.843871944606969e-06,
"logits/chosen": -1.3755590915679932,
"logits/rejected": -1.3671270608901978,
"logps/chosen": -8.571699142456055,
"logps/rejected": -195.33099365234375,
"loss": 0.06720139980316162,
"memory(GiB)": 178.87,
"nll_loss": 0.06385985761880875,
"rewards/accuracies": 1.0,
"rewards/chosen": 20.94257164001465,
"rewards/margins": 19.358797073364258,
"rewards/rejected": 1.5837746858596802,
"step": 185,
"train_speed(iter/s)": 0.001935
},
{
"epoch": 1.7962085308056872,
"grad_norm": 0.2912954092025757,
"learning_rate": 2.4716255306108605e-06,
"logits/chosen": -1.4053622484207153,
"logits/rejected": -1.397859811782837,
"logps/chosen": -7.519402503967285,
"logps/rejected": -204.46128845214844,
"loss": 0.05536782741546631,
"memory(GiB)": 178.87,
"nll_loss": 0.05153592675924301,
"rewards/accuracies": 1.0,
"rewards/chosen": 21.256345748901367,
"rewards/margins": 20.680620193481445,
"rewards/rejected": 0.5757262706756592,
"step": 190,
"train_speed(iter/s)": 0.001936
},
{
"epoch": 1.8436018957345972,
"grad_norm": 0.3675084114074707,
"learning_rate": 1.3953582237871521e-06,
"logits/chosen": -1.3809669017791748,
"logits/rejected": -1.3703250885009766,
"logps/chosen": -15.006329536437988,
"logps/rejected": -199.88511657714844,
"loss": 0.08639336824417114,
"memory(GiB)": 178.87,
"nll_loss": 0.07968685030937195,
"rewards/accuracies": 1.0,
"rewards/chosen": 20.411216735839844,
"rewards/margins": 19.984678268432617,
"rewards/rejected": 0.4265367090702057,
"step": 195,
"train_speed(iter/s)": 0.001936
},
{
"epoch": 1.890995260663507,
"grad_norm": 0.941786527633667,
"learning_rate": 6.217723917238128e-07,
"logits/chosen": -1.3778371810913086,
"logits/rejected": -1.3684101104736328,
"logps/chosen": -7.696736812591553,
"logps/rejected": -223.34890747070312,
"loss": 0.0544640064239502,
"memory(GiB)": 178.87,
"nll_loss": 0.04780174046754837,
"rewards/accuracies": 1.0,
"rewards/chosen": 19.833518981933594,
"rewards/margins": 21.463306427001953,
"rewards/rejected": -1.6297862529754639,
"step": 200,
"train_speed(iter/s)": 0.001936
},
{
"epoch": 1.890995260663507,
"eval_logits/chosen": -1.3681780099868774,
"eval_logits/rejected": -1.363707423210144,
"eval_logps/chosen": -9.783992767333984,
"eval_logps/rejected": -174.166015625,
"eval_loss": 0.09675905108451843,
"eval_nll_loss": 0.08484382927417755,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 21.503095626831055,
"eval_rewards/margins": 17.923770904541016,
"eval_rewards/rejected": 3.5793240070343018,
"eval_runtime": 228.9543,
"eval_samples_per_second": 0.074,
"eval_steps_per_second": 0.074,
"step": 200
},
{
"epoch": 1.9383886255924172,
"grad_norm": 0.34630173444747925,
"learning_rate": 1.5568547761034004e-07,
"logits/chosen": -1.3920785188674927,
"logits/rejected": -1.3849624395370483,
"logps/chosen": -9.088478088378906,
"logps/rejected": -196.94784545898438,
"loss": 0.0544456422328949,
"memory(GiB)": 178.87,
"nll_loss": 0.04648340493440628,
"rewards/accuracies": 1.0,
"rewards/chosen": 20.052505493164062,
"rewards/margins": 19.269899368286133,
"rewards/rejected": 0.7826067805290222,
"step": 205,
"train_speed(iter/s)": 0.001931
},
{
"epoch": 1.985781990521327,
"grad_norm": 0.3001299798488617,
"learning_rate": 0.0,
"logits/chosen": -1.345251202583313,
"logits/rejected": -1.3417621850967407,
"logps/chosen": -13.094474792480469,
"logps/rejected": -177.3877410888672,
"loss": 0.10830415487289428,
"memory(GiB)": 178.87,
"nll_loss": 0.06853027641773224,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 19.333545684814453,
"rewards/margins": 17.912960052490234,
"rewards/rejected": 1.4205853939056396,
"step": 210,
"train_speed(iter/s)": 0.00193
},
{
"epoch": 1.985781990521327,
"eval_logits/chosen": -1.3710763454437256,
"eval_logits/rejected": -1.364762783050537,
"eval_logps/chosen": -9.65355110168457,
"eval_logps/rejected": -172.0828094482422,
"eval_loss": 0.09402994066476822,
"eval_nll_loss": 0.08348451554775238,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 21.51613998413086,
"eval_rewards/margins": 17.72849464416504,
"eval_rewards/rejected": 3.787644863128662,
"eval_runtime": 228.9559,
"eval_samples_per_second": 0.074,
"eval_steps_per_second": 0.074,
"step": 210
}
],
"logging_steps": 5,
"max_steps": 210,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.9852517843992576e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}