| { | |
| "best_global_step": 210, | |
| "best_metric": 0.09402994, | |
| "best_model_checkpoint": "/fsx-neo/dedicated-fsx-data-repo-neo-us-east-1/kayleexl/tree_reasoning/logical-reasoning/ms-swift/output_dpo/v7-20260217-182416/checkpoint-210", | |
| "epoch": 1.985781990521327, | |
| "eval_steps": 50, | |
| "global_step": 210, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.009478672985781991, | |
| "grad_norm": 19.55022430419922, | |
| "learning_rate": 9.090909090909091e-06, | |
| "logits/chosen": -0.8645371198654175, | |
| "logits/rejected": -0.8560649752616882, | |
| "logps/chosen": -212.84078979492188, | |
| "logps/rejected": -181.89553833007812, | |
| "loss": 1.9682148694992065, | |
| "memory(GiB)": 153.35, | |
| "nll_loss": 1.275067687034607, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1, | |
| "train_speed(iter/s)": 0.001829 | |
| }, | |
| { | |
| "epoch": 0.04739336492890995, | |
| "grad_norm": 10.874584197998047, | |
| "learning_rate": 4.545454545454546e-05, | |
| "logits/chosen": -0.8818354606628418, | |
| "logits/rejected": -0.8391438722610474, | |
| "logps/chosen": -206.83299255371094, | |
| "logps/rejected": -210.26193237304688, | |
| "loss": 2.225569725036621, | |
| "memory(GiB)": 171.5, | |
| "nll_loss": 1.5596290826797485, | |
| "rewards/accuracies": 0.390625, | |
| "rewards/chosen": 0.42497575283050537, | |
| "rewards/margins": 0.14753574132919312, | |
| "rewards/rejected": 0.27743998169898987, | |
| "step": 5, | |
| "train_speed(iter/s)": 0.001962 | |
| }, | |
| { | |
| "epoch": 0.0947867298578199, | |
| "grad_norm": 3.7358806133270264, | |
| "learning_rate": 9.090909090909092e-05, | |
| "logits/chosen": -0.42944854497909546, | |
| "logits/rejected": -0.4029228687286377, | |
| "logps/chosen": -153.91217041015625, | |
| "logps/rejected": -171.6427764892578, | |
| "loss": 1.2244630813598634, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.9248598217964172, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": 8.403377532958984, | |
| "rewards/margins": 3.649811267852783, | |
| "rewards/rejected": 4.753565788269043, | |
| "step": 10, | |
| "train_speed(iter/s)": 0.002002 | |
| }, | |
| { | |
| "epoch": 0.14218009478672985, | |
| "grad_norm": 2.5456342697143555, | |
| "learning_rate": 9.990034266657467e-05, | |
| "logits/chosen": 0.14313745498657227, | |
| "logits/rejected": 0.15165017545223236, | |
| "logps/chosen": -64.2056655883789, | |
| "logps/rejected": -108.23348236083984, | |
| "loss": 0.9479263305664063, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.5506663918495178, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 13.307535171508789, | |
| "rewards/margins": 4.5142388343811035, | |
| "rewards/rejected": 8.793294906616211, | |
| "step": 15, | |
| "train_speed(iter/s)": 0.001977 | |
| }, | |
| { | |
| "epoch": 0.1895734597156398, | |
| "grad_norm": 9.239604949951172, | |
| "learning_rate": 9.949616551002787e-05, | |
| "logits/chosen": -0.7042765617370605, | |
| "logits/rejected": -0.6620756983757019, | |
| "logps/chosen": -57.892356872558594, | |
| "logps/rejected": -133.2845916748047, | |
| "loss": 0.6575197696685791, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.44857126474380493, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 15.717184066772461, | |
| "rewards/margins": 7.5960211753845215, | |
| "rewards/rejected": 8.121164321899414, | |
| "step": 20, | |
| "train_speed(iter/s)": 0.001969 | |
| }, | |
| { | |
| "epoch": 0.23696682464454977, | |
| "grad_norm": 3.75114369392395, | |
| "learning_rate": 9.87837549867887e-05, | |
| "logits/chosen": -0.5987659096717834, | |
| "logits/rejected": -0.5469285249710083, | |
| "logps/chosen": -39.35096740722656, | |
| "logps/rejected": -128.31814575195312, | |
| "loss": 0.4350598335266113, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.30207258462905884, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 19.038375854492188, | |
| "rewards/margins": 10.453929901123047, | |
| "rewards/rejected": 8.584444999694824, | |
| "step": 25, | |
| "train_speed(iter/s)": 0.001984 | |
| }, | |
| { | |
| "epoch": 0.2843601895734597, | |
| "grad_norm": 7.996090412139893, | |
| "learning_rate": 9.776754757575975e-05, | |
| "logits/chosen": -0.7030301094055176, | |
| "logits/rejected": -0.6584943532943726, | |
| "logps/chosen": -42.931304931640625, | |
| "logps/rejected": -132.83807373046875, | |
| "loss": 0.625270938873291, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.3963403105735779, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 16.53431510925293, | |
| "rewards/margins": 9.225370407104492, | |
| "rewards/rejected": 7.308945655822754, | |
| "step": 30, | |
| "train_speed(iter/s)": 0.001981 | |
| }, | |
| { | |
| "epoch": 0.33175355450236965, | |
| "grad_norm": 0.928902804851532, | |
| "learning_rate": 9.645387162638652e-05, | |
| "logits/chosen": -0.7767706513404846, | |
| "logits/rejected": -0.7252510190010071, | |
| "logps/chosen": -30.822132110595703, | |
| "logps/rejected": -122.3298110961914, | |
| "loss": 0.47859888076782225, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.25827503204345703, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 18.511274337768555, | |
| "rewards/margins": 9.673491477966309, | |
| "rewards/rejected": 8.837784767150879, | |
| "step": 35, | |
| "train_speed(iter/s)": 0.001982 | |
| }, | |
| { | |
| "epoch": 0.3791469194312796, | |
| "grad_norm": 4.358126163482666, | |
| "learning_rate": 9.485090794937319e-05, | |
| "logits/chosen": -0.8528544306755066, | |
| "logits/rejected": -0.8117485046386719, | |
| "logps/chosen": -28.26708984375, | |
| "logps/rejected": -153.4051055908203, | |
| "loss": 0.38582923412323, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.20131754875183105, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 19.518356323242188, | |
| "rewards/margins": 13.451945304870605, | |
| "rewards/rejected": 6.066409587860107, | |
| "step": 40, | |
| "train_speed(iter/s)": 0.001983 | |
| }, | |
| { | |
| "epoch": 0.4265402843601896, | |
| "grad_norm": 9.361577987670898, | |
| "learning_rate": 9.29686388713456e-05, | |
| "logits/chosen": -1.4655885696411133, | |
| "logits/rejected": -1.4526116847991943, | |
| "logps/chosen": -41.395565032958984, | |
| "logps/rejected": -169.83302307128906, | |
| "loss": 0.4372711658477783, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.3007845878601074, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 16.87301254272461, | |
| "rewards/margins": 13.00146198272705, | |
| "rewards/rejected": 3.871551513671875, | |
| "step": 45, | |
| "train_speed(iter/s)": 0.00198 | |
| }, | |
| { | |
| "epoch": 0.47393364928909953, | |
| "grad_norm": 1.3884881734848022, | |
| "learning_rate": 9.081878607071996e-05, | |
| "logits/chosen": -1.4089267253875732, | |
| "logits/rejected": -1.3798249959945679, | |
| "logps/chosen": -29.07427978515625, | |
| "logps/rejected": -174.85606384277344, | |
| "loss": 0.28701162338256836, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.17918026447296143, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 20.193706512451172, | |
| "rewards/margins": 15.493553161621094, | |
| "rewards/rejected": 4.700153350830078, | |
| "step": 50, | |
| "train_speed(iter/s)": 0.001981 | |
| }, | |
| { | |
| "epoch": 0.47393364928909953, | |
| "eval_logits/chosen": -1.2822269201278687, | |
| "eval_logits/rejected": -1.255699634552002, | |
| "eval_logps/chosen": -34.7473258972168, | |
| "eval_logps/rejected": -143.84861755371094, | |
| "eval_loss": 0.4424428939819336, | |
| "eval_nll_loss": 0.2968122363090515, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 19.00676155090332, | |
| "eval_rewards/margins": 12.395697593688965, | |
| "eval_rewards/rejected": 6.611064910888672, | |
| "eval_runtime": 230.2546, | |
| "eval_samples_per_second": 0.074, | |
| "eval_steps_per_second": 0.074, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.5213270142180095, | |
| "grad_norm": 1.1772722005844116, | |
| "learning_rate": 8.841473758189854e-05, | |
| "logits/chosen": -1.1759651899337769, | |
| "logits/rejected": -1.1313683986663818, | |
| "logps/chosen": -26.088220596313477, | |
| "logps/rejected": -150.84393310546875, | |
| "loss": 0.22870185375213622, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.16189467906951904, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 19.440641403198242, | |
| "rewards/margins": 13.270869255065918, | |
| "rewards/rejected": 6.169772148132324, | |
| "step": 55, | |
| "train_speed(iter/s)": 0.001958 | |
| }, | |
| { | |
| "epoch": 0.5687203791469194, | |
| "grad_norm": 2.0721075534820557, | |
| "learning_rate": 8.577146442236857e-05, | |
| "logits/chosen": -1.0945132970809937, | |
| "logits/rejected": -1.060734748840332, | |
| "logps/chosen": -22.90542984008789, | |
| "logps/rejected": -128.8797607421875, | |
| "loss": 0.24675798416137695, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.17432959377765656, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 17.570837020874023, | |
| "rewards/margins": 11.27192497253418, | |
| "rewards/rejected": 6.298913955688477, | |
| "step": 60, | |
| "train_speed(iter/s)": 0.00195 | |
| }, | |
| { | |
| "epoch": 0.6161137440758294, | |
| "grad_norm": 2.0549778938293457, | |
| "learning_rate": 8.290542736190188e-05, | |
| "logits/chosen": -1.080885648727417, | |
| "logits/rejected": -1.057293176651001, | |
| "logps/chosen": -17.660358428955078, | |
| "logps/rejected": -124.87294006347656, | |
| "loss": 0.36183264255523684, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.1854233592748642, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 18.208276748657227, | |
| "rewards/margins": 11.001307487487793, | |
| "rewards/rejected": 7.206968784332275, | |
| "step": 65, | |
| "train_speed(iter/s)": 0.001945 | |
| }, | |
| { | |
| "epoch": 0.6635071090047393, | |
| "grad_norm": 1.9049893617630005, | |
| "learning_rate": 7.983447441444281e-05, | |
| "logits/chosen": -1.4264296293258667, | |
| "logits/rejected": -1.4030673503875732, | |
| "logps/chosen": -22.567996978759766, | |
| "logps/rejected": -163.80955505371094, | |
| "loss": 0.2892845392227173, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.15383335947990417, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 18.27324676513672, | |
| "rewards/margins": 15.088933944702148, | |
| "rewards/rejected": 3.1843135356903076, | |
| "step": 70, | |
| "train_speed(iter/s)": 0.001941 | |
| }, | |
| { | |
| "epoch": 0.7109004739336493, | |
| "grad_norm": 1.0275962352752686, | |
| "learning_rate": 7.657772969104508e-05, | |
| "logits/chosen": -1.3237228393554688, | |
| "logits/rejected": -1.3014802932739258, | |
| "logps/chosen": -27.62123680114746, | |
| "logps/rejected": -179.39981079101562, | |
| "loss": 0.22349367141723633, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.16326689720153809, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 19.444076538085938, | |
| "rewards/margins": 16.357398986816406, | |
| "rewards/rejected": 3.0866756439208984, | |
| "step": 75, | |
| "train_speed(iter/s)": 0.001941 | |
| }, | |
| { | |
| "epoch": 0.7582938388625592, | |
| "grad_norm": 2.1720211505889893, | |
| "learning_rate": 7.31554743060174e-05, | |
| "logits/chosen": -0.9713658094406128, | |
| "logits/rejected": -0.9438395500183105, | |
| "logps/chosen": -21.5306396484375, | |
| "logps/rejected": -158.5912628173828, | |
| "loss": 0.2255859136581421, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.15225784480571747, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 20.218997955322266, | |
| "rewards/margins": 14.551397323608398, | |
| "rewards/rejected": 5.6676025390625, | |
| "step": 80, | |
| "train_speed(iter/s)": 0.001941 | |
| }, | |
| { | |
| "epoch": 0.8056872037914692, | |
| "grad_norm": 2.247673988342285, | |
| "learning_rate": 6.958902007792466e-05, | |
| "logits/chosen": -0.7944511771202087, | |
| "logits/rejected": -0.7699103355407715, | |
| "logps/chosen": -11.273658752441406, | |
| "logps/rejected": -142.4789581298828, | |
| "loss": 0.1783364772796631, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.10094492137432098, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 19.9717960357666, | |
| "rewards/margins": 13.428759574890137, | |
| "rewards/rejected": 6.543033599853516, | |
| "step": 85, | |
| "train_speed(iter/s)": 0.001943 | |
| }, | |
| { | |
| "epoch": 0.8530805687203792, | |
| "grad_norm": 5.502572059631348, | |
| "learning_rate": 6.590057681196191e-05, | |
| "logits/chosen": -0.7691094875335693, | |
| "logits/rejected": -0.7428280711174011, | |
| "logps/chosen": -16.701950073242188, | |
| "logps/rejected": -178.28001403808594, | |
| "loss": 0.21286754608154296, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.1458219736814499, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 18.113544464111328, | |
| "rewards/margins": 14.766156196594238, | |
| "rewards/rejected": 3.3473877906799316, | |
| "step": 90, | |
| "train_speed(iter/s)": 0.001941 | |
| }, | |
| { | |
| "epoch": 0.9004739336492891, | |
| "grad_norm": 6.771712779998779, | |
| "learning_rate": 6.211311399018916e-05, | |
| "logits/chosen": -1.2176296710968018, | |
| "logits/rejected": -1.2004112005233765, | |
| "logps/chosen": -10.625171661376953, | |
| "logps/rejected": -197.5986785888672, | |
| "loss": 0.14389824867248535, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.08421098440885544, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 21.306598663330078, | |
| "rewards/margins": 20.043148040771484, | |
| "rewards/rejected": 1.2634522914886475, | |
| "step": 95, | |
| "train_speed(iter/s)": 0.001943 | |
| }, | |
| { | |
| "epoch": 0.9478672985781991, | |
| "grad_norm": 0.7936939001083374, | |
| "learning_rate": 5.8250217730939973e-05, | |
| "logits/chosen": -1.2689450979232788, | |
| "logits/rejected": -1.2652291059494019, | |
| "logps/chosen": -24.885387420654297, | |
| "logps/rejected": -166.68470764160156, | |
| "loss": 0.22324090003967284, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.16993048787117004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 18.267993927001953, | |
| "rewards/margins": 15.335573196411133, | |
| "rewards/rejected": 2.932422637939453, | |
| "step": 100, | |
| "train_speed(iter/s)": 0.001939 | |
| }, | |
| { | |
| "epoch": 0.9478672985781991, | |
| "eval_logits/chosen": -1.276165246963501, | |
| "eval_logits/rejected": -1.2667920589447021, | |
| "eval_logps/chosen": -17.997478485107422, | |
| "eval_logps/rejected": -172.5413818359375, | |
| "eval_loss": 0.18138757348060608, | |
| "eval_nll_loss": 0.15586450695991516, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 20.681747436523438, | |
| "eval_rewards/margins": 16.939956665039062, | |
| "eval_rewards/rejected": 3.741788625717163, | |
| "eval_runtime": 230.8449, | |
| "eval_samples_per_second": 0.074, | |
| "eval_steps_per_second": 0.074, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.995260663507109, | |
| "grad_norm": 0.7884749174118042, | |
| "learning_rate": 5.433594390817756e-05, | |
| "logits/chosen": -1.217622995376587, | |
| "logits/rejected": -1.2057679891586304, | |
| "logps/chosen": -13.138340950012207, | |
| "logps/rejected": -187.07447814941406, | |
| "loss": 0.12456157207489013, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.09173186123371124, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.977113723754883, | |
| "rewards/margins": 18.163719177246094, | |
| "rewards/rejected": 1.8133970499038696, | |
| "step": 105, | |
| "train_speed(iter/s)": 0.001931 | |
| }, | |
| { | |
| "epoch": 1.037914691943128, | |
| "grad_norm": 0.41725555062294006, | |
| "learning_rate": 5.039466834548568e-05, | |
| "logits/chosen": -1.0610564947128296, | |
| "logits/rejected": -1.0492563247680664, | |
| "logps/chosen": -17.22906494140625, | |
| "logps/rejected": -178.35047912597656, | |
| "loss": 0.10526471138000489, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.0985727533698082, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.968387603759766, | |
| "rewards/margins": 18.300251007080078, | |
| "rewards/rejected": 1.6681346893310547, | |
| "step": 110, | |
| "train_speed(iter/s)": 0.001939 | |
| }, | |
| { | |
| "epoch": 1.085308056872038, | |
| "grad_norm": 0.5403364300727844, | |
| "learning_rate": 4.64509350175992e-05, | |
| "logits/chosen": -0.9350749254226685, | |
| "logits/rejected": -0.9276103973388672, | |
| "logps/chosen": -15.192606925964355, | |
| "logps/rejected": -196.2861785888672, | |
| "loss": 0.10088248252868652, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.09568502753973007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.20822525024414, | |
| "rewards/margins": 17.932273864746094, | |
| "rewards/rejected": 2.2759501934051514, | |
| "step": 115, | |
| "train_speed(iter/s)": 0.001939 | |
| }, | |
| { | |
| "epoch": 1.132701421800948, | |
| "grad_norm": 0.33638796210289, | |
| "learning_rate": 4.2529303204786953e-05, | |
| "logits/chosen": -0.8360708355903625, | |
| "logits/rejected": -0.8255692720413208, | |
| "logps/chosen": -12.213701248168945, | |
| "logps/rejected": -178.4746856689453, | |
| "loss": 0.08850648403167724, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.07898052781820297, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.77065086364746, | |
| "rewards/margins": 17.60364532470703, | |
| "rewards/rejected": 3.167004346847534, | |
| "step": 120, | |
| "train_speed(iter/s)": 0.00194 | |
| }, | |
| { | |
| "epoch": 1.180094786729858, | |
| "grad_norm": 0.42006343603134155, | |
| "learning_rate": 3.8654194551920485e-05, | |
| "logits/chosen": -0.8648909330368042, | |
| "logits/rejected": -0.8533682823181152, | |
| "logps/chosen": -13.303857803344727, | |
| "logps/rejected": -196.53375244140625, | |
| "loss": 0.11154735088348389, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.09461511671543121, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.808521270751953, | |
| "rewards/margins": 18.673742294311523, | |
| "rewards/rejected": 1.1347795724868774, | |
| "step": 125, | |
| "train_speed(iter/s)": 0.00194 | |
| }, | |
| { | |
| "epoch": 1.2274881516587677, | |
| "grad_norm": 0.4501703679561615, | |
| "learning_rate": 3.484974098465636e-05, | |
| "logits/chosen": -1.0564872026443481, | |
| "logits/rejected": -1.0499274730682373, | |
| "logps/chosen": -9.447141647338867, | |
| "logps/rejected": -210.6065216064453, | |
| "loss": 0.07890591621398926, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.0762052983045578, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.350528717041016, | |
| "rewards/margins": 20.427692413330078, | |
| "rewards/rejected": -0.07716653496026993, | |
| "step": 130, | |
| "train_speed(iter/s)": 0.00194 | |
| }, | |
| { | |
| "epoch": 1.2748815165876777, | |
| "grad_norm": 0.48629000782966614, | |
| "learning_rate": 3.11396344298212e-05, | |
| "logits/chosen": -1.122717022895813, | |
| "logits/rejected": -1.1127209663391113, | |
| "logps/chosen": -8.969633102416992, | |
| "logps/rejected": -178.16427612304688, | |
| "loss": 0.07699697613716125, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.07132184505462646, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.212081909179688, | |
| "rewards/margins": 18.33711814880371, | |
| "rewards/rejected": 1.8749620914459229, | |
| "step": 135, | |
| "train_speed(iter/s)": 0.001941 | |
| }, | |
| { | |
| "epoch": 1.3222748815165877, | |
| "grad_norm": 0.4852510094642639, | |
| "learning_rate": 2.754697927585399e-05, | |
| "logits/chosen": -1.0894103050231934, | |
| "logits/rejected": -1.0850476026535034, | |
| "logps/chosen": -13.88347053527832, | |
| "logps/rejected": -190.9267120361328, | |
| "loss": 0.11755204200744629, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.09814213216304779, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.125789642333984, | |
| "rewards/margins": 17.44692611694336, | |
| "rewards/rejected": 2.678863048553467, | |
| "step": 140, | |
| "train_speed(iter/s)": 0.001938 | |
| }, | |
| { | |
| "epoch": 1.3696682464454977, | |
| "grad_norm": 1.0523611307144165, | |
| "learning_rate": 2.4094148492096125e-05, | |
| "logits/chosen": -1.18220055103302, | |
| "logits/rejected": -1.183691382408142, | |
| "logps/chosen": -11.095239639282227, | |
| "logps/rejected": -180.09048461914062, | |
| "loss": 0.07882866859436036, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.06894151866436005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.55763816833496, | |
| "rewards/margins": 17.858949661254883, | |
| "rewards/rejected": 1.6986896991729736, | |
| "step": 145, | |
| "train_speed(iter/s)": 0.001938 | |
| }, | |
| { | |
| "epoch": 1.4170616113744074, | |
| "grad_norm": 0.9743487238883972, | |
| "learning_rate": 2.0802644302934683e-05, | |
| "logits/chosen": -1.2402594089508057, | |
| "logits/rejected": -1.229536771774292, | |
| "logps/chosen": -10.528487205505371, | |
| "logps/rejected": -173.75906372070312, | |
| "loss": 0.05581583380699158, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.04851926118135452, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 21.25326156616211, | |
| "rewards/margins": 17.409027099609375, | |
| "rewards/rejected": 3.84423565864563, | |
| "step": 150, | |
| "train_speed(iter/s)": 0.001939 | |
| }, | |
| { | |
| "epoch": 1.4170616113744074, | |
| "eval_logits/chosen": -1.2280174493789673, | |
| "eval_logits/rejected": -1.2243937253952026, | |
| "eval_logps/chosen": -11.513480186462402, | |
| "eval_logps/rejected": -170.34632873535156, | |
| "eval_loss": 0.11434541642665863, | |
| "eval_nll_loss": 0.10074843466281891, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 21.33014678955078, | |
| "eval_rewards/margins": 17.368852615356445, | |
| "eval_rewards/rejected": 3.9612925052642822, | |
| "eval_runtime": 228.4343, | |
| "eval_samples_per_second": 0.074, | |
| "eval_steps_per_second": 0.074, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.4644549763033177, | |
| "grad_norm": 1.9418814182281494, | |
| "learning_rate": 1.7692964284439505e-05, | |
| "logits/chosen": -1.2662538290023804, | |
| "logits/rejected": -1.2612764835357666, | |
| "logps/chosen": -8.698439598083496, | |
| "logps/rejected": -195.09063720703125, | |
| "loss": 0.07216010689735412, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.05235465615987778, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.530902862548828, | |
| "rewards/margins": 19.212444305419922, | |
| "rewards/rejected": 1.3184587955474854, | |
| "step": 155, | |
| "train_speed(iter/s)": 0.001934 | |
| }, | |
| { | |
| "epoch": 1.5118483412322274, | |
| "grad_norm": 2.3436055183410645, | |
| "learning_rate": 1.4784473717366387e-05, | |
| "logits/chosen": -1.2591499090194702, | |
| "logits/rejected": -1.2514972686767578, | |
| "logps/chosen": -9.602866172790527, | |
| "logps/rejected": -204.9786376953125, | |
| "loss": 0.06997905969619751, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.0632362961769104, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.80245590209961, | |
| "rewards/margins": 19.767711639404297, | |
| "rewards/rejected": 1.0347453355789185, | |
| "step": 160, | |
| "train_speed(iter/s)": 0.001935 | |
| }, | |
| { | |
| "epoch": 1.5592417061611374, | |
| "grad_norm": 0.4779791235923767, | |
| "learning_rate": 1.2095284991437733e-05, | |
| "logits/chosen": -1.2291038036346436, | |
| "logits/rejected": -1.2240577936172485, | |
| "logps/chosen": -12.217391014099121, | |
| "logps/rejected": -200.1668243408203, | |
| "loss": 0.0741503119468689, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.0690564215183258, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.801494598388672, | |
| "rewards/margins": 18.78908920288086, | |
| "rewards/rejected": 1.0124043226242065, | |
| "step": 165, | |
| "train_speed(iter/s)": 0.001934 | |
| }, | |
| { | |
| "epoch": 1.6066350710900474, | |
| "grad_norm": 0.6247928142547607, | |
| "learning_rate": 9.642144811900739e-06, | |
| "logits/chosen": -1.225555181503296, | |
| "logits/rejected": -1.2201206684112549, | |
| "logps/chosen": -9.304153442382812, | |
| "logps/rejected": -219.2526092529297, | |
| "loss": 0.056455212831497195, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.04982581362128258, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 21.17973518371582, | |
| "rewards/margins": 21.15430450439453, | |
| "rewards/rejected": 0.025428902357816696, | |
| "step": 170, | |
| "train_speed(iter/s)": 0.001935 | |
| }, | |
| { | |
| "epoch": 1.6540284360189572, | |
| "grad_norm": 0.7543458342552185, | |
| "learning_rate": 7.440329910775273e-06, | |
| "logits/chosen": -1.2147386074066162, | |
| "logits/rejected": -1.2046931982040405, | |
| "logps/chosen": -11.757909774780273, | |
| "logps/rejected": -177.62388610839844, | |
| "loss": 0.11545271873474121, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.0637175664305687, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 19.637483596801758, | |
| "rewards/margins": 17.161773681640625, | |
| "rewards/rejected": 2.4757096767425537, | |
| "step": 175, | |
| "train_speed(iter/s)": 0.001935 | |
| }, | |
| { | |
| "epoch": 1.7014218009478674, | |
| "grad_norm": 0.9932089447975159, | |
| "learning_rate": 5.5035519122409895e-06, | |
| "logits/chosen": -1.3096096515655518, | |
| "logits/rejected": -1.3018414974212646, | |
| "logps/chosen": -10.068361282348633, | |
| "logps/rejected": -190.3312225341797, | |
| "loss": 0.07646466493606567, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.07355433702468872, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.979000091552734, | |
| "rewards/margins": 18.556865692138672, | |
| "rewards/rejected": 1.4221333265304565, | |
| "step": 180, | |
| "train_speed(iter/s)": 0.001934 | |
| }, | |
| { | |
| "epoch": 1.7488151658767772, | |
| "grad_norm": 1.022765040397644, | |
| "learning_rate": 3.843871944606969e-06, | |
| "logits/chosen": -1.3755590915679932, | |
| "logits/rejected": -1.3671270608901978, | |
| "logps/chosen": -8.571699142456055, | |
| "logps/rejected": -195.33099365234375, | |
| "loss": 0.06720139980316162, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.06385985761880875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.94257164001465, | |
| "rewards/margins": 19.358797073364258, | |
| "rewards/rejected": 1.5837746858596802, | |
| "step": 185, | |
| "train_speed(iter/s)": 0.001935 | |
| }, | |
| { | |
| "epoch": 1.7962085308056872, | |
| "grad_norm": 0.2912954092025757, | |
| "learning_rate": 2.4716255306108605e-06, | |
| "logits/chosen": -1.4053622484207153, | |
| "logits/rejected": -1.397859811782837, | |
| "logps/chosen": -7.519402503967285, | |
| "logps/rejected": -204.46128845214844, | |
| "loss": 0.05536782741546631, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.05153592675924301, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 21.256345748901367, | |
| "rewards/margins": 20.680620193481445, | |
| "rewards/rejected": 0.5757262706756592, | |
| "step": 190, | |
| "train_speed(iter/s)": 0.001936 | |
| }, | |
| { | |
| "epoch": 1.8436018957345972, | |
| "grad_norm": 0.3675084114074707, | |
| "learning_rate": 1.3953582237871521e-06, | |
| "logits/chosen": -1.3809669017791748, | |
| "logits/rejected": -1.3703250885009766, | |
| "logps/chosen": -15.006329536437988, | |
| "logps/rejected": -199.88511657714844, | |
| "loss": 0.08639336824417114, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.07968685030937195, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.411216735839844, | |
| "rewards/margins": 19.984678268432617, | |
| "rewards/rejected": 0.4265367090702057, | |
| "step": 195, | |
| "train_speed(iter/s)": 0.001936 | |
| }, | |
| { | |
| "epoch": 1.890995260663507, | |
| "grad_norm": 0.941786527633667, | |
| "learning_rate": 6.217723917238128e-07, | |
| "logits/chosen": -1.3778371810913086, | |
| "logits/rejected": -1.3684101104736328, | |
| "logps/chosen": -7.696736812591553, | |
| "logps/rejected": -223.34890747070312, | |
| "loss": 0.0544640064239502, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.04780174046754837, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.833518981933594, | |
| "rewards/margins": 21.463306427001953, | |
| "rewards/rejected": -1.6297862529754639, | |
| "step": 200, | |
| "train_speed(iter/s)": 0.001936 | |
| }, | |
| { | |
| "epoch": 1.890995260663507, | |
| "eval_logits/chosen": -1.3681780099868774, | |
| "eval_logits/rejected": -1.363707423210144, | |
| "eval_logps/chosen": -9.783992767333984, | |
| "eval_logps/rejected": -174.166015625, | |
| "eval_loss": 0.09675905108451843, | |
| "eval_nll_loss": 0.08484382927417755, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 21.503095626831055, | |
| "eval_rewards/margins": 17.923770904541016, | |
| "eval_rewards/rejected": 3.5793240070343018, | |
| "eval_runtime": 228.9543, | |
| "eval_samples_per_second": 0.074, | |
| "eval_steps_per_second": 0.074, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.9383886255924172, | |
| "grad_norm": 0.34630173444747925, | |
| "learning_rate": 1.5568547761034004e-07, | |
| "logits/chosen": -1.3920785188674927, | |
| "logits/rejected": -1.3849624395370483, | |
| "logps/chosen": -9.088478088378906, | |
| "logps/rejected": -196.94784545898438, | |
| "loss": 0.0544456422328949, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.04648340493440628, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.052505493164062, | |
| "rewards/margins": 19.269899368286133, | |
| "rewards/rejected": 0.7826067805290222, | |
| "step": 205, | |
| "train_speed(iter/s)": 0.001931 | |
| }, | |
| { | |
| "epoch": 1.985781990521327, | |
| "grad_norm": 0.3001299798488617, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -1.345251202583313, | |
| "logits/rejected": -1.3417621850967407, | |
| "logps/chosen": -13.094474792480469, | |
| "logps/rejected": -177.3877410888672, | |
| "loss": 0.10830415487289428, | |
| "memory(GiB)": 178.87, | |
| "nll_loss": 0.06853027641773224, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 19.333545684814453, | |
| "rewards/margins": 17.912960052490234, | |
| "rewards/rejected": 1.4205853939056396, | |
| "step": 210, | |
| "train_speed(iter/s)": 0.00193 | |
| }, | |
| { | |
| "epoch": 1.985781990521327, | |
| "eval_logits/chosen": -1.3710763454437256, | |
| "eval_logits/rejected": -1.364762783050537, | |
| "eval_logps/chosen": -9.65355110168457, | |
| "eval_logps/rejected": -172.0828094482422, | |
| "eval_loss": 0.09402994066476822, | |
| "eval_nll_loss": 0.08348451554775238, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 21.51613998413086, | |
| "eval_rewards/margins": 17.72849464416504, | |
| "eval_rewards/rejected": 3.787644863128662, | |
| "eval_runtime": 228.9559, | |
| "eval_samples_per_second": 0.074, | |
| "eval_steps_per_second": 0.074, | |
| "step": 210 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 210, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.9852517843992576e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |