gzhu-biyesheji/tools/parse_markdown.py
carry 3395b860e4 refactor(parse_markdown): 重构 Markdown 解析逻辑并使用 Pydantic 模型
将 MarkdownNode 类重构为使用 Pydantic 模型,提高代码的可维护性和类型安全性。同时,将解析逻辑与节点操作分离,简化代码结构。
2025-04-04 20:50:39 +08:00

70 lines
2.1 KiB
Python

import re
import sys
from pathlib import Path
# 添加项目根目录到sys.path
sys.path.append(str(Path(__file__).resolve().parent.parent))
from schema import MarkdownNode
def add_child(parent, child):
parent.children.append(child)
def print_tree(node, indent=0):
prefix = "" * (indent - 1) + "└─ " if indent > 0 else ""
print(f"{prefix}{node.title}")
if node.content:
content_prefix = "" * indent + "├─ [内容]"
print(content_prefix)
for line in node.content.split('\n'):
print("" * indent + "" + line)
for child in node.children:
print_tree(child, indent + 1)
def parse_markdown(markdown):
lines = markdown.split('\n')
root = MarkdownNode()
stack = [root]
in_code_block = False
for line in lines:
if line.strip() == "":
continue
# 检测代码块开始/结束
if line.strip().startswith("```") or line.strip().startswith("~~~"):
in_code_block = not in_code_block
continue
# 如果当前在代码块中,直接作为内容处理
if in_code_block:
if stack[-1].content:
stack[-1].content += '\n'
stack[-1].content += line
continue
# 处理标题
match = re.match(r'^(#+)\s*(.*)', line)
if match:
level = len(match.group(1))
title = match.group(2)
node = MarkdownNode(level=level, title=title, content="", children=[])
while stack[-1].level >= level:
stack.pop()
add_child(stack[-1], node)
stack.append(node)
else:
if stack[-1].content:
stack[-1].content += '\n'
stack[-1].content += line
return root
if __name__=="__main__":
# 从文件读取 Markdown 内容
with open("workdir/example.md", "r", encoding="utf-8") as f:
markdown = f.read()
# 解析 Markdown 并打印树结构
root = parse_markdown(markdown)
print_tree(root)