diff --git a/tools/parse_markdown.py b/tools/parse_markdown.py new file mode 100644 index 0000000..4de1d2e --- /dev/null +++ b/tools/parse_markdown.py @@ -0,0 +1,58 @@ +import re + +class MarkdownNode: + def __init__(self, level=0, title="Root"): + self.level = level + self.title = title + self.content = "" # 使用字符串存储合并后的内容 + self.children = [] + + def __repr__(self): + return f"({self.level}) {self.title}" + + def add_child(self, child): + self.children.append(child) + + def print_tree(self, indent=0): + prefix = "│ " * (indent - 1) + "└─ " if indent > 0 else "" + print(f"{prefix}{self.title}") + if self.content: + content_prefix = "│ " * indent + "├─ [内容]" + print(content_prefix) + for line in self.content.split('\n'): + print("│ " * indent + "│ " + line) + for child in self.children: + child.print_tree(indent + 1) + +def parse_markdown(markdown): + lines = markdown.split('\n') + root = MarkdownNode() + stack = [root] + + for line in lines: + if line.strip() == "": + continue + match = re.match(r'^(#+)\s*(.*)', line) + if match: + level = len(match.group(1)) + title = match.group(2) + node = MarkdownNode(level, title) + while stack[-1].level >= level: + stack.pop() + stack[-1].add_child(node) + stack.append(node) + else: + if stack[-1].content: + stack[-1].content += '\n' + stack[-1].content += line + + return root + +if __name__=="__main__": + # 从文件读取 Markdown 内容 + with open("example.md", "r", encoding="utf-8") as f: + markdown = f.read() + + # 解析 Markdown 并打印树结构 + root = parse_markdown(markdown) + root.print_tree()