
- 新增 dataset.py 文件,定义数据集相关模型 - 新增 tools 目录,包含解析 Markdown 和扫描文档的功能 - 修改 parse_markdown.py,增加处理 Markdown 文件的函数 - 新增 scan_doc_dir.py,实现文档目录扫描功能
33 lines
980 B
Python
33 lines
980 B
Python
import sys
|
|
import os
|
|
from pathlib import Path
|
|
|
|
# 添加项目根目录到sys.path
|
|
sys.path.append(str(Path(__file__).resolve().parent.parent))
|
|
from schema import doc
|
|
|
|
def scan_docs_directory(workdir: str):
|
|
docs_dir = os.path.join(workdir, "docs")
|
|
|
|
doc_list = os.listdir(docs_dir)
|
|
|
|
to_return = []
|
|
|
|
for doc_name in doc_list:
|
|
doc_path = os.path.join(docs_dir, doc_name)
|
|
if os.path.isdir(doc_path):
|
|
markdown_files = []
|
|
for root, dirs, files in os.walk(doc_path):
|
|
for file in files:
|
|
if file.endswith(".md"):
|
|
markdown_files.append(os.path.join(root, file))
|
|
to_return.append(doc(name=doc_name, path=doc_path, markdown_files=markdown_files))
|
|
|
|
return to_return
|
|
|
|
# 添加测试代码
|
|
if __name__ == "__main__":
|
|
workdir = os.path.join(os.path.dirname(__file__), "..", "workdir")
|
|
docs = scan_docs_directory(workdir)
|
|
print(docs)
|