{ "quantization_method": "auto-round", "scheme": "W4A16", "bits": 4, "group_size": 128, "sym": true, "data_type": "int", "act_bits": 16, "iters": 1000, "nsamples": 500, "calibration_dataset": "NeelNanda/pile-10k (AutoRound default)", "calibration_type": "text-only (language model only)", "quantized_layers": 252, "fp16_layers": 105, "original_model": "TomoroAI/tomoro-colqwen3-embed-4b", "note": "Vision encoder kept in FP16 (not quantized). Text-only calibration is appropriate since only language_model is quantized." }