from typing import List from schema.dataset import Dataset, DatasetItem, Q_A import json def convert_json_to_dataset(json_data: List[dict]) -> Dataset: # 将JSON数据转换为dataset格式 dataset_items = [] item_id = 1 # 自增ID计数器 for item in json_data: qa = Q_A(question=item["question"], answer=item["answer"]) dataset_item_obj = DatasetItem(id=item_id, message=[qa]) dataset_items.append(dataset_item_obj) item_id += 1 # ID自增 # 创建dataset对象 result_dataset = Dataset( name="Converted Dataset", model_id=None, description="Dataset converted from JSON", dataset_items=dataset_items ) return result_dataset # 示例:从文件读取JSON并转换 if __name__ == "__main__": # 假设JSON数据存储在文件中 with open(r"workdir\dataset_old\llamafactory.json", "r", encoding="utf-8") as file: json_data = json.load(file) # 转换为dataset格式 converted_dataset = convert_json_to_dataset(json_data) # 输出结果到文件 with open("output.json", "w", encoding="utf-8") as file: file.write(converted_dataset.model_dump_json(indent=4))