diff --git a/db/dataset_store.py b/db/dataset_store.py index 79e6d40..e73ef33 100644 --- a/db/dataset_store.py +++ b/db/dataset_store.py @@ -8,7 +8,7 @@ from tinydb.storages import MemoryStorage # 将项目根目录添加到系统路径中,以便能够导入项目中的其他模块 sys.path.append(str(Path(__file__).resolve().parent.parent)) -from schema.dataset import dataset, dataset_item, Q_A +from schema.dataset import Dataset, DatasetItem, Q_A def get_all_dataset(workdir: str) -> TinyDB: """ diff --git a/schema/dataset.py b/schema/dataset.py index 6419a58..8ecd5eb 100644 --- a/schema/dataset.py +++ b/schema/dataset.py @@ -2,7 +2,7 @@ from typing import Optional from pydantic import BaseModel, Field from datetime import datetime, timezone -class doc(BaseModel): +class Doc(BaseModel): id: Optional[int] = Field(default=None, description="文档ID") name: str = Field(default="", description="文档名称") path: str = Field(default="", description="文档路径") @@ -13,18 +13,18 @@ class Q_A(BaseModel): question: str = Field(default="", min_length=1,description="问题") answer: str = Field(default="", min_length=1, description="答案") -class dataset_item(BaseModel): +class DatasetItem(BaseModel): id: Optional[int] = Field(default=None, description="数据集项ID") message: list[Q_A] = Field(description="数据集项内容") -class dataset(BaseModel): +class Dataset(BaseModel): id: Optional[int] = Field(default=None, description="数据集ID") name: str = Field(default="", description="数据集名称") model_id: Optional[list[str]] = Field(default=None, description="数据集使用的模型ID") - source_doc: Optional[doc] = Field(default=None, description="数据集来源文档") + source_doc: Optional[Doc] = Field(default=None, description="数据集来源文档") description: Optional[str] = Field(default="", description="数据集描述") created_at: datetime = Field( default_factory=lambda: datetime.now(timezone.utc), description="记录创建时间" ) - dataset_items: list[dataset_item] = Field(default_factory=list, description="数据集项列表") \ No newline at end of file + dataset_items: list[DatasetItem] = Field(default_factory=list, description="数据集项列表") \ No newline at end of file diff --git a/tools/convert_json_to_dataset.py b/tools/convert_json_to_dataset.py index 3145cdd..1d31644 100644 --- a/tools/convert_json_to_dataset.py +++ b/tools/convert_json_to_dataset.py @@ -1,19 +1,19 @@ from typing import List -from schema.dataset import dataset, dataset_item, Q_A +from schema.dataset import Dataset, DatasetItem, Q_A import json -def convert_json_to_dataset(json_data: List[dict]) -> dataset: +def convert_json_to_dataset(json_data: List[dict]) -> Dataset: # 将JSON数据转换为dataset格式 dataset_items = [] item_id = 1 # 自增ID计数器 for item in json_data: qa = Q_A(question=item["question"], answer=item["answer"]) - dataset_item_obj = dataset_item(id=item_id, message=[qa]) + dataset_item_obj = DatasetItem(id=item_id, message=[qa]) dataset_items.append(dataset_item_obj) item_id += 1 # ID自增 # 创建dataset对象 - result_dataset = dataset( + result_dataset = Dataset( name="Converted Dataset", model_id=None, description="Dataset converted from JSON", diff --git a/tools/document.py b/tools/document.py index bd17d1b..f53b58b 100644 --- a/tools/document.py +++ b/tools/document.py @@ -4,7 +4,7 @@ from pathlib import Path # 添加项目根目录到sys.path sys.path.append(str(Path(__file__).resolve().parent.parent)) -from schema import doc +from schema import Doc def scan_docs_directory(workdir: str): docs_dir = os.path.join(workdir, "docs") @@ -21,7 +21,7 @@ def scan_docs_directory(workdir: str): for file in files: if file.endswith(".md"): markdown_files.append(os.path.join(root, file)) - to_return.append(doc(name=doc_name, path=doc_path, markdown_files=markdown_files)) + to_return.append(Doc(name=doc_name, path=doc_path, markdown_files=markdown_files)) return to_return diff --git a/tools/json_example.py b/tools/json_example.py index 81196ba..8ebc68b 100644 --- a/tools/json_example.py +++ b/tools/json_example.py @@ -61,7 +61,7 @@ if __name__ == "__main__": from pathlib import Path # 添加项目根目录到sys.path sys.path.append(str(Path(__file__).resolve().parent.parent)) - from schema import dataset + from schema import Dataset print("示例 JSON:") - print(generate_example_json(dataset)) + print(generate_example_json(Dataset)) diff --git a/tools/reasoning.py b/tools/reasoning.py index 27cc718..a9fde0e 100644 --- a/tools/reasoning.py +++ b/tools/reasoning.py @@ -96,7 +96,7 @@ if __name__ == "__main__": from json_example import generate_example_json from sqlmodel import Session, select from global_var import get_sql_engine, init_global_var - from schema import dataset_item + from schema import DatasetItem init_global_var("workdir") api_state = "1 deepseek-chat" @@ -105,7 +105,7 @@ if __name__ == "__main__": llm_request = LLMRequest( prompt="测试,随便说点什么", api_provider=api_provider, - format=generate_example_json(dataset_item) + format=generate_example_json(DatasetItem) ) # # 单次调用示例