From 3395b860e4539413add8cb2e1d01189e3671008d Mon Sep 17 00:00:00 2001
From: carry <2641257231@qq.com>
Date: Fri, 4 Apr 2025 20:50:39 +0800
Subject: [PATCH] =?UTF-8?q?refactor(parse=5Fmarkdown):=20=E9=87=8D?=
 =?UTF-8?q?=E6=9E=84=20Markdown=20=E8=A7=A3=E6=9E=90=E9=80=BB=E8=BE=91?=
 =?UTF-8?q?=E5=B9=B6=E4=BD=BF=E7=94=A8=20Pydantic=20=E6=A8=A1=E5=9E=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

将 MarkdownNode 类重构为使用 Pydantic 模型，提高代码的可维护性和类型安全性。同时，将解析逻辑与节点操作分离，简化代码结构。
---
 schema/__init__.py      |  4 ++++
 schema/md_doc.py        | 13 ++++++++++++
 tools/parse_markdown.py | 46 +++++++++++++++++++----------------------
 3 files changed, 38 insertions(+), 25 deletions(-)
 create mode 100644 schema/__init__.py
 create mode 100644 schema/md_doc.py

diff --git a/schema/__init__.py b/schema/__init__.py
new file mode 100644
index 0000000..909ccd1
--- /dev/null
+++ b/schema/__init__.py
@@ -0,0 +1,4 @@
+from .dataset import *
+from .dataset_generation import APIProvider, LLMResponse, LLMRequest
+from .md_doc import MarkdownNode
+from .prompt_templeta import prompt_templeta
\ No newline at end of file
diff --git a/schema/md_doc.py b/schema/md_doc.py
new file mode 100644
index 0000000..d67d6dd
--- /dev/null
+++ b/schema/md_doc.py
@@ -0,0 +1,13 @@
+from pydantic import BaseModel, Field
+from typing import List, Optional
+
+class MarkdownNode(BaseModel):
+    level: int = Field(default=0, description="节点层级")
+    title: str = Field(default="Root", description="节点标题")
+    content: Optional[str] = Field(default=None, description="节点内容")
+    children: List['MarkdownNode'] = Field(default_factory=list, description="子节点列表")
+
+    class Config:
+        arbitrary_types_allowed = True
+
+MarkdownNode.model_rebuild()
diff --git a/tools/parse_markdown.py b/tools/parse_markdown.py
index b9f41ea..6eeba01 100644
--- a/tools/parse_markdown.py
+++ b/tools/parse_markdown.py
@@ -1,28 +1,24 @@
 import re
+import sys
+from pathlib import Path
 
-class MarkdownNode:
-    def __init__(self, level=0, title="Root"):
-        self.level = level
-        self.title = title
-        self.content = ""  # 使用字符串存储合并后的内容
-        self.children = []
+# 添加项目根目录到sys.path
+sys.path.append(str(Path(__file__).resolve().parent.parent))
+from schema import MarkdownNode
 
-    def __repr__(self):
-        return f"({self.level}) {self.title}"
+def add_child(parent, child):
+    parent.children.append(child)
 
-    def add_child(self, child):
-        self.children.append(child)
-
-    def print_tree(self, indent=0):
-        prefix = "│  " * (indent - 1) + "└─ " if indent > 0 else ""
-        print(f"{prefix}{self.title}")
-        if self.content:
-            content_prefix = "│  " * indent + "├─ [内容]"
-            print(content_prefix)
-            for line in self.content.split('\n'):
-                print("│  " * indent + "│  " + line)
-        for child in self.children:
-            child.print_tree(indent + 1)
+def print_tree(node, indent=0):
+    prefix = "│  " * (indent - 1) + "└─ " if indent > 0 else ""
+    print(f"{prefix}{node.title}")
+    if node.content:
+        content_prefix = "│  " * indent + "├─ [内容]"
+        print(content_prefix)
+        for line in node.content.split('\n'):
+            print("│  " * indent + "│  " + line)
+    for child in node.children:
+        print_tree(child, indent + 1)
 
 def parse_markdown(markdown):
     lines = markdown.split('\n')
@@ -51,10 +47,10 @@ def parse_markdown(markdown):
         if match:
             level = len(match.group(1))
             title = match.group(2)
-            node = MarkdownNode(level, title)
+            node = MarkdownNode(level=level, title=title, content="", children=[])
             while stack[-1].level >= level:
                 stack.pop()
-            stack[-1].add_child(node)
+            add_child(stack[-1], node)
             stack.append(node)
         else:
             if stack[-1].content:
@@ -65,9 +61,9 @@ def parse_markdown(markdown):
 
 if __name__=="__main__":
     # 从文件读取 Markdown 内容
-    with open("example.md", "r", encoding="utf-8") as f:
+    with open("workdir/example.md", "r", encoding="utf-8") as f:
         markdown = f.read()
 
     # 解析 Markdown 并打印树结构
     root = parse_markdown(markdown)
-    root.print_tree()
+    print_tree(root)