import os def formatting_prompts_func(examples,tokenizer): """格式化对话数据的函数 Args: examples: 包含对话列表的字典 Returns: 包含格式化文本的字典 """ questions = examples["question"] answer = examples["answer"] # 将Question和Response组合成对话形式 convos = [ [{"role": "user", "content": q}, {"role": "assistant", "content": r}] for q, r in zip(questions, answer) ] # 使用tokenizer.apply_chat_template格式化对话 texts = [ tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False) for convo in convos ] return {"text": texts} def get_model_name(model): return os.path.basename(model.name_or_path)