
- 新增 dataset.py 文件,定义数据集相关模型 - 新增 tools 目录,包含解析 Markdown 和扫描文档的功能 - 修改 parse_markdown.py,增加处理 Markdown 文件的函数 - 新增 scan_doc_dir.py,实现文档目录扫描功能
28 lines
1.3 KiB
Python
28 lines
1.3 KiB
Python
from typing import Optional
|
|
from pydantic import BaseModel, Field
|
|
from datetime import datetime, timezone
|
|
|
|
class doc(BaseModel):
|
|
id: Optional[int] = Field(default=None, description="文档ID")
|
|
name: str = Field(default="", description="文档名称")
|
|
path: str = Field(default="", description="文档路径")
|
|
markdown_files: list[str] = Field(default_factory=list, description="文档路径列表")
|
|
|
|
class Q_A(BaseModel):
|
|
question: str = Field(default="", min_length=1,description="问题")
|
|
answer: str = Field(default="", min_length=1, description="答案")
|
|
|
|
class dataset_item(BaseModel):
|
|
id: Optional[int] = Field(default=None, description="数据集项ID")
|
|
message: list[Q_A] = Field(description="数据集项内容")
|
|
|
|
class dataset(BaseModel):
|
|
id: Optional[int] = Field(default=None, description="数据集ID")
|
|
name: Optional[str] = Field(default=None, description="数据集名称")
|
|
model_id: Optional[list[str]] = Field(default=None, description="数据集使用的模型ID")
|
|
description: Optional[str] = Field(default="", description="数据集描述")
|
|
created_at: datetime = Field(
|
|
default_factory=lambda: datetime.now(timezone.utc),
|
|
description="记录创建时间"
|
|
)
|
|
dataset_items: list[dataset_item] = Field(default_factory=list, description="数据集项列表") |