{ "train_loss": [ { "step": 1, "loss": 0.3608 }, { "step": 2, "loss": 0.3479 }, { "step": 3, "loss": 0.3415 }, { "step": 4, "loss": 0.1414 }, { "step": 5, "loss": 0.2754 }, { "step": 6, "loss": 0.1902 }, { "step": 7, "loss": 0.1224 }, { "step": 8, "loss": 0.2016 }, { "step": 9, "loss": 0.2094 }, { "step": 10, "loss": 0.0881 }, { "step": 11, "loss": 0.1214 }, { "step": 12, "loss": 0.0638 }, { "step": 13, "loss": 0.1138 }, { "step": 14, "loss": 0.0194 } ], "eval_loss": [], "args": { "lora_r": 16, "lora_alpha": 16, "lora_dropout": 0.05, "lora_target_modules": "q_proj,v_proj", "tuning_strategy": "lora", "num_trainable_layers": 2, "output_dir": "sft_prefill/prompt_id_0/ultra_large/qwen2-VL-7B-Instruct-syn-count-lora-only-black-10", "num_train_epochs": 2, "learning_rate": 0.0002, "per_device_train_batch_size": 16, "per_device_eval_batch_size": 16, "gradient_accumulation_steps": 1, "logging_steps": 10, "eval_steps": 200, "save_steps": 200, "warmup_ratio": 0.03, "weight_decay": 0.0, "max_grad_norm": 0.3, "lr_scheduler_type": "constant", "bf16": true, "tf32": true, "gradient_checkpointing": true, "optim": "adamw_torch_fused", "ft_type": "SFT_Prefill", "data_type": "ultra_large", "only_black": true, "only_color": false, "prompt_id": 0, "num_per_class": 10 } }