{ "dataset": "data/sft_kimi_k25.jsonl", "base_model": "Qwen/Qwen2.5-Coder-3B-Instruct", "output_dir": "/tmp/qwen3b-sft-long", "epochs": 4.0, "lr": 0.0002, "lora_r": 32, "lora_alpha": 64, "lora_dropout": 0.05, "target_modules": "all-linear", "per_device_batch_size": 1, "gradient_accumulation": 2, "max_seq_len": 8192, "logging_steps": 2, "save_steps": 50, "warmup_ratio": 0.05, "use_qlora": false, "no_assistant_only_loss": false, "push_to_hub": "bpHigh/qwen3b-office-sft-kimi-long", "seed": 42, "report_to": "none" }