refactor(parse_markdown): 重构 Markdown 解析逻辑并使用 Pydantic 模型

将 MarkdownNode 类重构为使用 Pydantic 模型,提高代码的可维护性和类型安全性。同时,将解析逻辑与节点操作分离,简化代码结构。
This commit is contained in:
carry
2025-04-04 20:50:39 +08:00
parent 22438d65d3
commit 3395b860e4
3 changed files with 38 additions and 25 deletions

View File

@@ -1,28 +1,24 @@
import re
import sys
from pathlib import Path
class MarkdownNode:
def __init__(self, level=0, title="Root"):
self.level = level
self.title = title
self.content = "" # 使用字符串存储合并后的内容
self.children = []
# 添加项目根目录到sys.path
sys.path.append(str(Path(__file__).resolve().parent.parent))
from schema import MarkdownNode
def __repr__(self):
return f"({self.level}) {self.title}"
def add_child(parent, child):
parent.children.append(child)
def add_child(self, child):
self.children.append(child)
def print_tree(self, indent=0):
prefix = "" * (indent - 1) + "" if indent > 0 else ""
print(f"{prefix}{self.title}")
if self.content:
content_prefix = "" * indent + "├─ [内容]"
print(content_prefix)
for line in self.content.split('\n'):
print("" * indent + "" + line)
for child in self.children:
child.print_tree(indent + 1)
def print_tree(node, indent=0):
prefix = "" * (indent - 1) + "└─ " if indent > 0 else ""
print(f"{prefix}{node.title}")
if node.content:
content_prefix = "" * indent + "[内容]"
print(content_prefix)
for line in node.content.split('\n'):
print("" * indent + "" + line)
for child in node.children:
print_tree(child, indent + 1)
def parse_markdown(markdown):
lines = markdown.split('\n')
@@ -51,10 +47,10 @@ def parse_markdown(markdown):
if match:
level = len(match.group(1))
title = match.group(2)
node = MarkdownNode(level, title)
node = MarkdownNode(level=level, title=title, content="", children=[])
while stack[-1].level >= level:
stack.pop()
stack[-1].add_child(node)
add_child(stack[-1], node)
stack.append(node)
else:
if stack[-1].content:
@@ -65,9 +61,9 @@ def parse_markdown(markdown):
if __name__=="__main__":
# 从文件读取 Markdown 内容
with open("example.md", "r", encoding="utf-8") as f:
with open("workdir/example.md", "r", encoding="utf-8") as f:
markdown = f.read()
# 解析 Markdown 并打印树结构
root = parse_markdown(markdown)
root.print_tree()
print_tree(root)