{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\sbtwc\\.conda\\envs\\unsloth_env\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Unsloth: OpenAI failed to import - ignoring for now.\n", "🦥 Unsloth Zoo will now patch everything to make training faster!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\sbtwc\\.conda\\envs\\unsloth_env\\Lib\\site-packages\\unsloth_zoo\\gradient_checkpointing.py:330: UserWarning: expandable_segments not supported on this platform (Triggered internally at C:\\actions-runner\\_work\\pytorch\\pytorch\\pytorch\\c10/cuda/CUDAAllocatorConfig.h:28.)\n", " GPU_BUFFERS = tuple([torch.empty(2*256*2048, dtype = dtype, device = f\"cuda:{i}\") for i in range(n_gpus)])\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth 2025.3.9: Fast Qwen2 patching. Transformers: 4.48.1.\n", " \\\\ /| NVIDIA GeForce RTX 3060 Laptop GPU. Num GPUs = 1. Max memory: 6.0 GB. Platform: Windows.\n", "O^O/ \\_/ \\ Torch: 2.6.0+cu126. CUDA: 8.6. CUDA Toolkit: 12.6. Triton: 3.2.0\n", "\\ / Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]\n", " \"-____-\" Free license: http://github.com/unslothai/unsloth\n", "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Unsloth 2025.3.9 patched 36 layers with 36 QKV layers, 36 O layers and 36 MLP layers.\n" ] } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "\n", "# 基础配置参数\n", "max_seq_length = 4096 # 最大序列长度\n", "dtype = None # 自动检测数据类型\n", "load_in_4bit = True # 使用4位量化以减少内存使用\n", "\n", "# 加载预训练模型和分词器\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"workdir/model/Qwen2.5-3B-Instruct-bnb-4bit\", # 选择Qwen2.5 32B指令模型\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", ")\n", "\n", "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 64, # LoRA秩,控制可训练参数数量\n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",], # 需要训练的目标模块\n", " lora_alpha = 64, # LoRA缩放因子\n", " lora_dropout = 0, # LoRA dropout率\n", " bias = \"none\", # 是否训练偏置项\n", " use_gradient_checkpointing = \"unsloth\", # 使用梯度检查点节省显存\n", " random_state = 114514, # 随机数种子\n", " use_rslora = False, # 是否使用稳定版LoRA\n", " loftq_config = None, # LoftQ配置\n", ")\n", "\n", "from unsloth.chat_templates import get_chat_template\n", "# 配置分词器使用qwen-2.5对话模板\n", "tokenizer = get_chat_template(\n", " tokenizer,\n", " chat_template=\"qwen-2.5\",\n", ")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Map: 100%|██████████| 595/595 [00:00<00:00, 7000.21 examples/s]\n" ] } ], "source": [ "# 加载数据集\n", "import json\n", "\n", "def formatting_prompts_func(examples):\n", " \"\"\"格式化对话数据的函数\n", " Args:\n", " examples: 包含对话列表的字典\n", " Returns:\n", " 包含格式化文本的字典\n", " \"\"\"\n", " questions = examples[\"question\"]\n", " answer = examples[\"answer\"]\n", " \n", " # 将Question和Response组合成对话形式\n", " convos = [\n", " [{\"role\": \"user\", \"content\": q}, {\"role\": \"assistant\", \"content\": r}]\n", " for q, r in zip(questions, answer)\n", " ]\n", " \n", " # 使用tokenizer.apply_chat_template格式化对话\n", " texts = [\n", " tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False)\n", " for convo in convos\n", " ]\n", " \n", " return {\"text\": texts}\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'question': 'LLaMA-Factory有哪些训练方法?', 'answer': 'LLaMA-Factory提供了Pre-training和Post-training两种训练方法。', 'text': '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n<|im_start|>user\\nLLaMA-Factory有哪些训练方法?<|im_end|>\\n<|im_start|>assistant\\nLLaMA-Factory提供了Pre-training和Post-training两种训练方法。<|im_end|>\\n'}\n" ] } ], "source": [ "from datasets import load_dataset\n", "dataset = load_dataset(\"json\", data_files=\"workdir\\dataset\\dataset.json\",split=\"train\")\n", "dataset = dataset.map(formatting_prompts_func, batched = True)\n", "\n", "print(dataset[5])" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPU = NVIDIA GeForce RTX 3060 Laptop GPU. Max memory = 6.0 GB.\n", "2.557 GB of memory reserved.\n" ] } ], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments, DataCollatorForSeq2Seq\n", "from unsloth import is_bfloat16_supported\n", "\n", "# 配置训练器\n", "trainer = SFTTrainer(\n", " model=model,\n", " tokenizer=tokenizer,\n", " train_dataset=dataset,\n", " dataset_text_field=\"text\",\n", " max_seq_length=max_seq_length,\n", " data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer),\n", " dataset_num_proc=1,\n", " packing=False,\n", " args=TrainingArguments(\n", " per_device_train_batch_size=1, # 每个设备的批次大小\n", " gradient_accumulation_steps=4, # 梯度累积步数\n", " warmup_steps=3*50, # 预热步数\n", " max_steps=3*500, # 最大训练步数\n", " learning_rate=1e-4, # 学习率\n", " fp16=not is_bfloat16_supported(), # 是否使用fp16\n", " bf16=is_bfloat16_supported(), # 是否使用bf16\n", " logging_steps=5, # 日志记录间隔\n", " optim=\"paged_adamw_8bit\", # 优化器\n", " weight_decay=0.01, # 权重衰减\n", " lr_scheduler_type=\"linear\", # 学习率调度器\n", " seed=114514, # 随机种子\n", " output_dir=\"workdir/checkpoint/\", # 输出目录\n", " save_strategy=\"steps\", # 按步保存中间权重\n", " save_steps=200, # 每多少步保存一次中间权重\n", " report_to=\"none\", # 不使用外部日志工具\n", " ),\n", ")\n", "\n", "from unsloth.chat_templates import train_on_responses_only\n", "# 设置仅对助手回复部分计算损失\n", "trainer = train_on_responses_only(\n", " trainer,\n", " instruction_part = \"<|im_start|>user\\n\",\n", " response_part = \"<|im_start|>assistant\\n\",\n", ")\n", "\n", "# 获取GPU信息\n", "gpu_stats = torch.cuda.get_device_properties(0)\n", "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n", "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n", "print(f\"{start_gpu_memory} GB of memory reserved.\")" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs used = 1\n", " \\\\ /| Num examples = 595 | Num Epochs = 11 | Total steps = 1,500\n", "O^O/ \\_/ \\ Batch size per device = 1 | Gradient accumulation steps = 4\n", "\\ / Data Parallel GPUs = 1 | Total batch size (1 x 4 x 1) = 4\n", " \"-____-\" Trainable parameters = 119,734,272/1,818,406,912 (6.58% trained)\n" ] }, { "data": { "text/html": [ "\n", "
Step | \n", "Training Loss | \n", "
---|---|
5 | \n", "2.941600 | \n", "
10 | \n", "2.629000 | \n", "
15 | \n", "2.573900 | \n", "
20 | \n", "1.995600 | \n", "
25 | \n", "1.651400 | \n", "
30 | \n", "1.505400 | \n", "
35 | \n", "1.709300 | \n", "
40 | \n", "1.530200 | \n", "
45 | \n", "1.362000 | \n", "
50 | \n", "1.413000 | \n", "
55 | \n", "1.291000 | \n", "
60 | \n", "1.365500 | \n", "
65 | \n", "1.374200 | \n", "
70 | \n", "1.313900 | \n", "
75 | \n", "1.388400 | \n", "
80 | \n", "1.292300 | \n", "
85 | \n", "1.205700 | \n", "
90 | \n", "1.162200 | \n", "
95 | \n", "1.194100 | \n", "
100 | \n", "0.905200 | \n", "
105 | \n", "1.107500 | \n", "
110 | \n", "0.915300 | \n", "
115 | \n", "1.197300 | \n", "
120 | \n", "0.832900 | \n", "
125 | \n", "1.005700 | \n", "
130 | \n", "0.883100 | \n", "
135 | \n", "1.002800 | \n", "
140 | \n", "0.871200 | \n", "
145 | \n", "0.896200 | \n", "
150 | \n", "0.738400 | \n", "
155 | \n", "0.821300 | \n", "
160 | \n", "0.540100 | \n", "
165 | \n", "0.528400 | \n", "
170 | \n", "0.748300 | \n", "
175 | \n", "0.530400 | \n", "
180 | \n", "0.827400 | \n", "
185 | \n", "0.462000 | \n", "
190 | \n", "0.662100 | \n", "
195 | \n", "0.460600 | \n", "
200 | \n", "0.489000 | \n", "
205 | \n", "0.718200 | \n", "
210 | \n", "0.560800 | \n", "
215 | \n", "0.465400 | \n", "
220 | \n", "0.563800 | \n", "
225 | \n", "0.511300 | \n", "
230 | \n", "0.633600 | \n", "
235 | \n", "0.672000 | \n", "
240 | \n", "0.512300 | \n", "
245 | \n", "0.435900 | \n", "
250 | \n", "0.602000 | \n", "
255 | \n", "0.410400 | \n", "
260 | \n", "0.444500 | \n", "
265 | \n", "0.498200 | \n", "
270 | \n", "0.474100 | \n", "
275 | \n", "0.499900 | \n", "
280 | \n", "0.472900 | \n", "
285 | \n", "0.515300 | \n", "
290 | \n", "0.710300 | \n", "
295 | \n", "0.471400 | \n", "
300 | \n", "0.435600 | \n", "
305 | \n", "0.227500 | \n", "
310 | \n", "0.329300 | \n", "
315 | \n", "0.278500 | \n", "
320 | \n", "0.199700 | \n", "
325 | \n", "0.185300 | \n", "
330 | \n", "0.319400 | \n", "
335 | \n", "0.221700 | \n", "
340 | \n", "0.199100 | \n", "
345 | \n", "0.263700 | \n", "
350 | \n", "0.162200 | \n", "
355 | \n", "0.240800 | \n", "
360 | \n", "0.233900 | \n", "
365 | \n", "0.283800 | \n", "
370 | \n", "0.234300 | \n", "
375 | \n", "0.280000 | \n", "
380 | \n", "0.421600 | \n", "
385 | \n", "0.244700 | \n", "
390 | \n", "0.263500 | \n", "
395 | \n", "0.227000 | \n", "
400 | \n", "0.200800 | \n", "
405 | \n", "0.196800 | \n", "
410 | \n", "0.226100 | \n", "
415 | \n", "0.267700 | \n", "
420 | \n", "0.166400 | \n", "
425 | \n", "0.307700 | \n", "
430 | \n", "0.295600 | \n", "
435 | \n", "0.184200 | \n", "
440 | \n", "0.196100 | \n", "
445 | \n", "0.220000 | \n", "
450 | \n", "0.137200 | \n", "
455 | \n", "0.115100 | \n", "
460 | \n", "0.157100 | \n", "
465 | \n", "0.142500 | \n", "
470 | \n", "0.161300 | \n", "
475 | \n", "0.121700 | \n", "
480 | \n", "0.182200 | \n", "
485 | \n", "0.094400 | \n", "
490 | \n", "0.135800 | \n", "
495 | \n", "0.115100 | \n", "
500 | \n", "0.144500 | \n", "
505 | \n", "0.148100 | \n", "
510 | \n", "0.099800 | \n", "
515 | \n", "0.131800 | \n", "
520 | \n", "0.150100 | \n", "
525 | \n", "0.130300 | \n", "
530 | \n", "0.153200 | \n", "
535 | \n", "0.178000 | \n", "
540 | \n", "0.107700 | \n", "
545 | \n", "0.182500 | \n", "
550 | \n", "0.151500 | \n", "
555 | \n", "0.157400 | \n", "
560 | \n", "0.186700 | \n", "
565 | \n", "0.192900 | \n", "
570 | \n", "0.137400 | \n", "
575 | \n", "0.099100 | \n", "
580 | \n", "0.094500 | \n", "
585 | \n", "0.117100 | \n", "
590 | \n", "0.150600 | \n", "
595 | \n", "0.093600 | \n", "
600 | \n", "0.090400 | \n", "
605 | \n", "0.068400 | \n", "
610 | \n", "0.107400 | \n", "
615 | \n", "0.034200 | \n", "
620 | \n", "0.075000 | \n", "
625 | \n", "0.073900 | \n", "
630 | \n", "0.078400 | \n", "
635 | \n", "0.077900 | \n", "
640 | \n", "0.065600 | \n", "
645 | \n", "0.101700 | \n", "
650 | \n", "0.084900 | \n", "
655 | \n", "0.073000 | \n", "
660 | \n", "0.100800 | \n", "
665 | \n", "0.035700 | \n", "
670 | \n", "0.076300 | \n", "
675 | \n", "0.077500 | \n", "
680 | \n", "0.060200 | \n", "
685 | \n", "0.107900 | \n", "
690 | \n", "0.109300 | \n", "
695 | \n", "0.082700 | \n", "
700 | \n", "0.075900 | \n", "
705 | \n", "0.088300 | \n", "
710 | \n", "0.112000 | \n", "
715 | \n", "0.084100 | \n", "
720 | \n", "0.127700 | \n", "
725 | \n", "0.070700 | \n", "
730 | \n", "0.085400 | \n", "
735 | \n", "0.054400 | \n", "
740 | \n", "0.083300 | \n", "
745 | \n", "0.044600 | \n", "
750 | \n", "0.025700 | \n", "
755 | \n", "0.039400 | \n", "
760 | \n", "0.056600 | \n", "
765 | \n", "0.050800 | \n", "
770 | \n", "0.042500 | \n", "
775 | \n", "0.054000 | \n", "
780 | \n", "0.061200 | \n", "
785 | \n", "0.064100 | \n", "
790 | \n", "0.048600 | \n", "
795 | \n", "0.048600 | \n", "
800 | \n", "0.050300 | \n", "
805 | \n", "0.056400 | \n", "
810 | \n", "0.051000 | \n", "
815 | \n", "0.060900 | \n", "
820 | \n", "0.054600 | \n", "
825 | \n", "0.024800 | \n", "
830 | \n", "0.027800 | \n", "
835 | \n", "0.083900 | \n", "
840 | \n", "0.046700 | \n", "
845 | \n", "0.073400 | \n", "
850 | \n", "0.030800 | \n", "
855 | \n", "0.059400 | \n", "
860 | \n", "0.027300 | \n", "
865 | \n", "0.066000 | \n", "
870 | \n", "0.080000 | \n", "
875 | \n", "0.059600 | \n", "
880 | \n", "0.052600 | \n", "
885 | \n", "0.055900 | \n", "
890 | \n", "0.042300 | \n", "
895 | \n", "0.034500 | \n", "
900 | \n", "0.019600 | \n", "
905 | \n", "0.027800 | \n", "
910 | \n", "0.012100 | \n", "
915 | \n", "0.027300 | \n", "
920 | \n", "0.036900 | \n", "
925 | \n", "0.030100 | \n", "
930 | \n", "0.027900 | \n", "
935 | \n", "0.028700 | \n", "
940 | \n", "0.061500 | \n", "
945 | \n", "0.025500 | \n", "
950 | \n", "0.020100 | \n", "
955 | \n", "0.021700 | \n", "
960 | \n", "0.026800 | \n", "
965 | \n", "0.035700 | \n", "
970 | \n", "0.029600 | \n", "
975 | \n", "0.020600 | \n", "
980 | \n", "0.032200 | \n", "
985 | \n", "0.040200 | \n", "
990 | \n", "0.015200 | \n", "
995 | \n", "0.025100 | \n", "
1000 | \n", "0.027800 | \n", "
1005 | \n", "0.032900 | \n", "
1010 | \n", "0.048000 | \n", "
1015 | \n", "0.035200 | \n", "
1020 | \n", "0.017700 | \n", "
1025 | \n", "0.029700 | \n", "
1030 | \n", "0.041900 | \n", "
1035 | \n", "0.031100 | \n", "
1040 | \n", "0.038100 | \n", "
1045 | \n", "0.034800 | \n", "
1050 | \n", "0.020000 | \n", "
1055 | \n", "0.017500 | \n", "
1060 | \n", "0.019300 | \n", "
1065 | \n", "0.029900 | \n", "
1070 | \n", "0.011500 | \n", "
1075 | \n", "0.023900 | \n", "
1080 | \n", "0.012700 | \n", "
1085 | \n", "0.012800 | \n", "
1090 | \n", "0.025500 | \n", "
1095 | \n", "0.018500 | \n", "
1100 | \n", "0.007200 | \n", "
1105 | \n", "0.028500 | \n", "
1110 | \n", "0.024600 | \n", "
1115 | \n", "0.015600 | \n", "
1120 | \n", "0.020200 | \n", "
1125 | \n", "0.010500 | \n", "
1130 | \n", "0.023900 | \n", "
1135 | \n", "0.020300 | \n", "
1140 | \n", "0.031900 | \n", "
1145 | \n", "0.023100 | \n", "
1150 | \n", "0.006700 | \n", "
1155 | \n", "0.016700 | \n", "
1160 | \n", "0.020200 | \n", "
1165 | \n", "0.023900 | \n", "
1170 | \n", "0.014300 | \n", "
1175 | \n", "0.017000 | \n", "
1180 | \n", "0.034100 | \n", "
1185 | \n", "0.034900 | \n", "
1190 | \n", "0.020800 | \n", "
1195 | \n", "0.016200 | \n", "
1200 | \n", "0.013200 | \n", "
1205 | \n", "0.015100 | \n", "
1210 | \n", "0.013000 | \n", "
1215 | \n", "0.015700 | \n", "
1220 | \n", "0.006100 | \n", "
1225 | \n", "0.011600 | \n", "
1230 | \n", "0.016800 | \n", "
1235 | \n", "0.015200 | \n", "
1240 | \n", "0.013600 | \n", "
1245 | \n", "0.012000 | \n", "
1250 | \n", "0.017800 | \n", "
1255 | \n", "0.018500 | \n", "
1260 | \n", "0.010800 | \n", "
1265 | \n", "0.012700 | \n", "
1270 | \n", "0.008500 | \n", "
1275 | \n", "0.015700 | \n", "
1280 | \n", "0.016000 | \n", "
1285 | \n", "0.012100 | \n", "
1290 | \n", "0.019400 | \n", "
1295 | \n", "0.018100 | \n", "
1300 | \n", "0.009400 | \n", "
1305 | \n", "0.026600 | \n", "
1310 | \n", "0.006500 | \n", "
1315 | \n", "0.010900 | \n", "
1320 | \n", "0.026600 | \n", "
1325 | \n", "0.021100 | \n", "
1330 | \n", "0.012100 | \n", "
1335 | \n", "0.014700 | \n", "
1340 | \n", "0.018100 | \n", "
1345 | \n", "0.009500 | \n", "
1350 | \n", "0.008000 | \n", "
1355 | \n", "0.006400 | \n", "
1360 | \n", "0.007300 | \n", "
1365 | \n", "0.007600 | \n", "
1370 | \n", "0.010000 | \n", "
1375 | \n", "0.010200 | \n", "
1380 | \n", "0.013900 | \n", "
1385 | \n", "0.006400 | \n", "
1390 | \n", "0.005500 | \n", "
1395 | \n", "0.009000 | \n", "
1400 | \n", "0.010400 | \n", "
1405 | \n", "0.011200 | \n", "
1410 | \n", "0.007200 | \n", "
1415 | \n", "0.008300 | \n", "
1420 | \n", "0.006000 | \n", "
1425 | \n", "0.010000 | \n", "
1430 | \n", "0.008900 | \n", "
1435 | \n", "0.013300 | \n", "
1440 | \n", "0.015000 | \n", "
1445 | \n", "0.013900 | \n", "
1450 | \n", "0.016000 | \n", "
1455 | \n", "0.019200 | \n", "
1460 | \n", "0.012300 | \n", "
1465 | \n", "0.012800 | \n", "
1470 | \n", "0.011900 | \n", "
1475 | \n", "0.016400 | \n", "
1480 | \n", "0.008100 | \n", "
1485 | \n", "0.007700 | \n", "
1490 | \n", "0.006300 | \n", "
1495 | \n", "0.002100 | \n", "
1500 | \n", "0.008000 | \n", "
"
],
"text/plain": [
"