import os import sys import json from pathlib import Path from typing import List from tinydb import TinyDB, Query from tinydb.storages import MemoryStorage # 将项目根目录添加到系统路径中,以便能够导入项目中的其他模块 sys.path.append(str(Path(__file__).resolve().parent.parent)) from schema.dataset import dataset, dataset_item, Q_A def get_all_dataset(workdir: str) -> TinyDB: """ 扫描workdir/dataset目录下的所有json文件并读取为dataset对象列表 Args: workdir (str): 工作目录路径 Returns: TinyDB: 包含所有数据集对象的TinyDB对象 """ dataset_dir = os.path.join(workdir, "dataset") if not os.path.exists(dataset_dir): return TinyDB(storage=MemoryStorage) db = TinyDB(storage=MemoryStorage) for filename in os.listdir(dataset_dir): if filename.endswith(".json"): filepath = os.path.join(dataset_dir, filename) try: with open(filepath, "r", encoding="utf-8") as f: data = json.load(f) db.insert(data) except (json.JSONDecodeError, Exception) as e: print(f"Error loading dataset file {filename}: {str(e)}") continue return db if __name__ == "__main__": # 定义工作目录路径 workdir = os.path.join(os.path.dirname(__file__), "..", "workdir") # 获取所有数据集 datasets = get_all_dataset(workdir) # 打印结果 print(f"Found {len(datasets)} datasets:") for ds in datasets.all(): print(f"- {ds['name']} (ID: {ds['id']})")