refactor(schema): 重构数据集和文档类的命名
- 将 dataset、dataset_item 和 doc 类的首字母大写,以符合 Python 类命名惯例 - 更新相关模块中的导入和引用,以适应新的类名 - 此更改不影响功能,仅提高了代码的一致性和可读性
This commit is contained in:
parent
9236f49b36
commit
4c9caff668
@ -8,7 +8,7 @@ from tinydb.storages import MemoryStorage
|
|||||||
|
|
||||||
# 将项目根目录添加到系统路径中,以便能够导入项目中的其他模块
|
# 将项目根目录添加到系统路径中,以便能够导入项目中的其他模块
|
||||||
sys.path.append(str(Path(__file__).resolve().parent.parent))
|
sys.path.append(str(Path(__file__).resolve().parent.parent))
|
||||||
from schema.dataset import dataset, dataset_item, Q_A
|
from schema.dataset import Dataset, DatasetItem, Q_A
|
||||||
|
|
||||||
def get_all_dataset(workdir: str) -> TinyDB:
|
def get_all_dataset(workdir: str) -> TinyDB:
|
||||||
"""
|
"""
|
||||||
|
@ -2,7 +2,7 @@ from typing import Optional
|
|||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
class doc(BaseModel):
|
class Doc(BaseModel):
|
||||||
id: Optional[int] = Field(default=None, description="文档ID")
|
id: Optional[int] = Field(default=None, description="文档ID")
|
||||||
name: str = Field(default="", description="文档名称")
|
name: str = Field(default="", description="文档名称")
|
||||||
path: str = Field(default="", description="文档路径")
|
path: str = Field(default="", description="文档路径")
|
||||||
@ -13,18 +13,18 @@ class Q_A(BaseModel):
|
|||||||
question: str = Field(default="", min_length=1,description="问题")
|
question: str = Field(default="", min_length=1,description="问题")
|
||||||
answer: str = Field(default="", min_length=1, description="答案")
|
answer: str = Field(default="", min_length=1, description="答案")
|
||||||
|
|
||||||
class dataset_item(BaseModel):
|
class DatasetItem(BaseModel):
|
||||||
id: Optional[int] = Field(default=None, description="数据集项ID")
|
id: Optional[int] = Field(default=None, description="数据集项ID")
|
||||||
message: list[Q_A] = Field(description="数据集项内容")
|
message: list[Q_A] = Field(description="数据集项内容")
|
||||||
|
|
||||||
class dataset(BaseModel):
|
class Dataset(BaseModel):
|
||||||
id: Optional[int] = Field(default=None, description="数据集ID")
|
id: Optional[int] = Field(default=None, description="数据集ID")
|
||||||
name: str = Field(default="", description="数据集名称")
|
name: str = Field(default="", description="数据集名称")
|
||||||
model_id: Optional[list[str]] = Field(default=None, description="数据集使用的模型ID")
|
model_id: Optional[list[str]] = Field(default=None, description="数据集使用的模型ID")
|
||||||
source_doc: Optional[doc] = Field(default=None, description="数据集来源文档")
|
source_doc: Optional[Doc] = Field(default=None, description="数据集来源文档")
|
||||||
description: Optional[str] = Field(default="", description="数据集描述")
|
description: Optional[str] = Field(default="", description="数据集描述")
|
||||||
created_at: datetime = Field(
|
created_at: datetime = Field(
|
||||||
default_factory=lambda: datetime.now(timezone.utc),
|
default_factory=lambda: datetime.now(timezone.utc),
|
||||||
description="记录创建时间"
|
description="记录创建时间"
|
||||||
)
|
)
|
||||||
dataset_items: list[dataset_item] = Field(default_factory=list, description="数据集项列表")
|
dataset_items: list[DatasetItem] = Field(default_factory=list, description="数据集项列表")
|
@ -1,19 +1,19 @@
|
|||||||
from typing import List
|
from typing import List
|
||||||
from schema.dataset import dataset, dataset_item, Q_A
|
from schema.dataset import Dataset, DatasetItem, Q_A
|
||||||
import json
|
import json
|
||||||
|
|
||||||
def convert_json_to_dataset(json_data: List[dict]) -> dataset:
|
def convert_json_to_dataset(json_data: List[dict]) -> Dataset:
|
||||||
# 将JSON数据转换为dataset格式
|
# 将JSON数据转换为dataset格式
|
||||||
dataset_items = []
|
dataset_items = []
|
||||||
item_id = 1 # 自增ID计数器
|
item_id = 1 # 自增ID计数器
|
||||||
for item in json_data:
|
for item in json_data:
|
||||||
qa = Q_A(question=item["question"], answer=item["answer"])
|
qa = Q_A(question=item["question"], answer=item["answer"])
|
||||||
dataset_item_obj = dataset_item(id=item_id, message=[qa])
|
dataset_item_obj = DatasetItem(id=item_id, message=[qa])
|
||||||
dataset_items.append(dataset_item_obj)
|
dataset_items.append(dataset_item_obj)
|
||||||
item_id += 1 # ID自增
|
item_id += 1 # ID自增
|
||||||
|
|
||||||
# 创建dataset对象
|
# 创建dataset对象
|
||||||
result_dataset = dataset(
|
result_dataset = Dataset(
|
||||||
name="Converted Dataset",
|
name="Converted Dataset",
|
||||||
model_id=None,
|
model_id=None,
|
||||||
description="Dataset converted from JSON",
|
description="Dataset converted from JSON",
|
||||||
|
@ -4,7 +4,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
# 添加项目根目录到sys.path
|
# 添加项目根目录到sys.path
|
||||||
sys.path.append(str(Path(__file__).resolve().parent.parent))
|
sys.path.append(str(Path(__file__).resolve().parent.parent))
|
||||||
from schema import doc
|
from schema import Doc
|
||||||
|
|
||||||
def scan_docs_directory(workdir: str):
|
def scan_docs_directory(workdir: str):
|
||||||
docs_dir = os.path.join(workdir, "docs")
|
docs_dir = os.path.join(workdir, "docs")
|
||||||
@ -21,7 +21,7 @@ def scan_docs_directory(workdir: str):
|
|||||||
for file in files:
|
for file in files:
|
||||||
if file.endswith(".md"):
|
if file.endswith(".md"):
|
||||||
markdown_files.append(os.path.join(root, file))
|
markdown_files.append(os.path.join(root, file))
|
||||||
to_return.append(doc(name=doc_name, path=doc_path, markdown_files=markdown_files))
|
to_return.append(Doc(name=doc_name, path=doc_path, markdown_files=markdown_files))
|
||||||
|
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ if __name__ == "__main__":
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
# 添加项目根目录到sys.path
|
# 添加项目根目录到sys.path
|
||||||
sys.path.append(str(Path(__file__).resolve().parent.parent))
|
sys.path.append(str(Path(__file__).resolve().parent.parent))
|
||||||
from schema import dataset
|
from schema import Dataset
|
||||||
|
|
||||||
print("示例 JSON:")
|
print("示例 JSON:")
|
||||||
print(generate_example_json(dataset))
|
print(generate_example_json(Dataset))
|
||||||
|
@ -96,7 +96,7 @@ if __name__ == "__main__":
|
|||||||
from json_example import generate_example_json
|
from json_example import generate_example_json
|
||||||
from sqlmodel import Session, select
|
from sqlmodel import Session, select
|
||||||
from global_var import get_sql_engine, init_global_var
|
from global_var import get_sql_engine, init_global_var
|
||||||
from schema import dataset_item
|
from schema import DatasetItem
|
||||||
|
|
||||||
init_global_var("workdir")
|
init_global_var("workdir")
|
||||||
api_state = "1 deepseek-chat"
|
api_state = "1 deepseek-chat"
|
||||||
@ -105,7 +105,7 @@ if __name__ == "__main__":
|
|||||||
llm_request = LLMRequest(
|
llm_request = LLMRequest(
|
||||||
prompt="测试,随便说点什么",
|
prompt="测试,随便说点什么",
|
||||||
api_provider=api_provider,
|
api_provider=api_provider,
|
||||||
format=generate_example_json(dataset_item)
|
format=generate_example_json(DatasetItem)
|
||||||
)
|
)
|
||||||
|
|
||||||
# # 单次调用示例
|
# # 单次调用示例
|
||||||
|
Loading…
x
Reference in New Issue
Block a user