gzhu-biyesheji/tools/parse_markdown.py
2025-03-16 00:32:28 +08:00

59 lines
1.7 KiB
Python

import re
class MarkdownNode:
def __init__(self, level=0, title="Root"):
self.level = level
self.title = title
self.content = "" # 使用字符串存储合并后的内容
self.children = []
def __repr__(self):
return f"({self.level}) {self.title}"
def add_child(self, child):
self.children.append(child)
def print_tree(self, indent=0):
prefix = "" * (indent - 1) + "└─ " if indent > 0 else ""
print(f"{prefix}{self.title}")
if self.content:
content_prefix = "" * indent + "├─ [内容]"
print(content_prefix)
for line in self.content.split('\n'):
print("" * indent + "" + line)
for child in self.children:
child.print_tree(indent + 1)
def parse_markdown(markdown):
lines = markdown.split('\n')
root = MarkdownNode()
stack = [root]
for line in lines:
if line.strip() == "":
continue
match = re.match(r'^(#+)\s*(.*)', line)
if match:
level = len(match.group(1))
title = match.group(2)
node = MarkdownNode(level, title)
while stack[-1].level >= level:
stack.pop()
stack[-1].add_child(node)
stack.append(node)
else:
if stack[-1].content:
stack[-1].content += '\n'
stack[-1].content += line
return root
if __name__=="__main__":
# 从文件读取 Markdown 内容
with open("example.md", "r", encoding="utf-8") as f:
markdown = f.read()
# 解析 Markdown 并打印树结构
root = parse_markdown(markdown)
root.print_tree()