Naholav's picture
Upload fine-tuned Java code generation model
675da3b verified
{
"learning_rate": 2e-05,
"batch_size": 8,
"gradient_accumulation_steps": 6,
"num_epochs": 3,
"max_length": 2048,
"warmup_ratio": 0.03,
"weight_decay": 0.01,
"max_grad_norm": 1.0,
"seed": 42,
"eval_frequency": 5,
"logging_steps": 10,
"dataloader_num_workers": 8,
"pin_memory": true,
"dataloader_persistent_workers": true,
"prefetch_factor": 4,
"early_stopping_patience": 3,
"early_stopping_min_delta": 0.001,
"checkpoint_dir": "/root/llama3.2-3b-training",
"save_best_model": true,
"save_last_checkpoint": true,
"save_every_n_epochs": 1,
"save_every_n_steps": 5000,
"keep_last_n_checkpoints": 2
}