gzhu-biyesheji/tools/parse_markdown.py

import re

class MarkdownNode:
    def __init__(self, level=0, title="Root"):
        self.level = level
        self.title = title
        self.content = ""  # 使用字符串存储合并后的内容
        self.children = []

    def __repr__(self):
        return f"({self.level}) {self.title}"

    def add_child(self, child):
        self.children.append(child)

    def print_tree(self, indent=0):
        prefix = "│  " * (indent - 1) + "└─ " if indent > 0 else ""
        print(f"{prefix}{self.title}")
        if self.content:
            content_prefix = "│  " * indent + "├─ [内容]"
            print(content_prefix)
            for line in self.content.split('\n'):
                print("│  " * indent + "│  " + line)
        for child in self.children:
            child.print_tree(indent + 1)

def parse_markdown(markdown):
    lines = markdown.split('\n')
    root = MarkdownNode()
    stack = [root]

    for line in lines:
        if line.strip() == "":
            continue
        match = re.match(r'^(#+)\s*(.*)', line)
        if match:
            level = len(match.group(1))
            title = match.group(2)
            node = MarkdownNode(level, title)
            while stack[-1].level >= level:
                stack.pop()
            stack[-1].add_child(node)
            stack.append(node)
        else:
            if stack[-1].content:
                stack[-1].content += '\n'
            stack[-1].content += line

    return root

if __name__=="__main__":
    # 从文件读取 Markdown 内容
    with open("example.md", "r", encoding="utf-8") as f:
        markdown = f.read()

    # 解析 Markdown 并打印树结构
    root = parse_markdown(markdown)
    root.print_tree()