{ "learning_rate": 2e-05, "batch_size": 8, "gradient_accumulation_steps": 6, "num_epochs": 3, "max_length": 2048, "warmup_ratio": 0.03, "weight_decay": 0.01, "max_grad_norm": 1.0, "seed": 42, "eval_frequency": 5, "logging_steps": 10, "dataloader_num_workers": 8, "pin_memory": true, "dataloader_persistent_workers": true, "prefetch_factor": 4, "early_stopping_patience": 3, "early_stopping_min_delta": 0.001, "checkpoint_dir": "/root/llama3.2-3b-training", "save_best_model": true, "save_last_checkpoint": true, "save_every_n_epochs": 1, "save_every_n_steps": 5000, "keep_last_n_checkpoints": 2 }