feat(dataset): 初步完成数据集管理页面和功能

This commit is contained in:
carry
2025-04-09 20:49:20 +08:00
parent 932d1e2687
commit 2c8e54bb1e
3 changed files with 32 additions and 11 deletions

View File

@@ -3,13 +3,14 @@ import sys
import json
from pathlib import Path
from typing import List
from tinydb import TinyDB
from tinydb import TinyDB, Query
from tinydb.storages import MemoryStorage
# 将项目根目录添加到系统路径中,以便能够导入项目中的其他模块
sys.path.append(str(Path(__file__).resolve().parent.parent))
from schema.dataset import dataset, dataset_item, Q_A
def get_all_dataset(workdir: str) -> List[dataset]:
def get_all_dataset(workdir: str) -> TinyDB:
"""
扫描workdir/dataset目录下的所有json文件并读取为dataset对象列表
@@ -17,25 +18,25 @@ def get_all_dataset(workdir: str) -> List[dataset]:
workdir (str): 工作目录路径
Returns:
List[dataset]: 包含所有数据集对象的列表
TinyDB: 包含所有数据集对象的TinyDB对象
"""
dataset_dir = os.path.join(workdir, "dataset")
if not os.path.exists(dataset_dir):
return []
return TinyDB(storage=MemoryStorage)
datasets = []
db = TinyDB(storage=MemoryStorage)
for filename in os.listdir(dataset_dir):
if filename.endswith(".json"):
filepath = os.path.join(dataset_dir, filename)
try:
with open(filepath, "r", encoding="utf-8") as f:
data = json.load(f)
datasets.append(dataset(**data))
db.insert(data)
except (json.JSONDecodeError, Exception) as e:
print(f"Error loading dataset file {filename}: {str(e)}")
continue
return datasets
return db
if __name__ == "__main__":
@@ -45,5 +46,5 @@ if __name__ == "__main__":
datasets = get_all_dataset(workdir)
# 打印结果
print(f"Found {len(datasets)} datasets:")
for ds in datasets:
print(f"- {ds.name} (ID: {ds.id})")
for ds in datasets.all():
print(f"- {ds['name']} (ID: {ds['id']})")