27 lines
786 B
Python
27 lines
786 B
Python
import os
|
|
def formatting_prompts_func(examples,tokenizer):
|
|
"""格式化对话数据的函数
|
|
Args:
|
|
examples: 包含对话列表的字典
|
|
Returns:
|
|
包含格式化文本的字典
|
|
"""
|
|
questions = examples["question"]
|
|
answer = examples["answer"]
|
|
|
|
# 将Question和Response组合成对话形式
|
|
convos = [
|
|
[{"role": "user", "content": q}, {"role": "assistant", "content": r}]
|
|
for q, r in zip(questions, answer)
|
|
]
|
|
|
|
# 使用tokenizer.apply_chat_template格式化对话
|
|
texts = [
|
|
tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False)
|
|
for convo in convos
|
|
]
|
|
|
|
return {"text": texts}
|
|
|
|
def get_model_name(model):
|
|
return os.path.basename(model.name_or_path) |