import re import sys from pathlib import Path # 添加项目根目录到sys.path sys.path.append(str(Path(__file__).resolve().parent.parent)) from schema import MarkdownNode def process_markdown_file(file_path): with open(file_path, 'r', encoding='utf-8') as f: content = f.read() root = parse_markdown(content) results = [] def traverse(node, parent_titles): current_titles = parent_titles.copy() current_titles.append(node.title) if not node.children: # 叶子节点 if node.content: full_text = ' -> '.join(current_titles) + '\n' + node.content results.append(full_text) else: for child in node.children: traverse(child, current_titles) traverse(root, []) return results def add_child(parent, child): parent.children.append(child) def print_tree(node, indent=0): prefix = "│ " * (indent - 1) + "└─ " if indent > 0 else "" print(f"{prefix}{node.title}") if node.content: content_prefix = "│ " * indent + "├─ [内容]" print(content_prefix) for line in node.content.split('\n'): print("│ " * indent + "│ " + line) for child in node.children: print_tree(child, indent + 1) def parse_markdown(markdown): lines = markdown.split('\n') root = MarkdownNode() stack = [root] in_code_block = False for line in lines: if line.strip() == "": continue # 检测代码块开始/结束 if line.strip().startswith("```") or line.strip().startswith("~~~"): in_code_block = not in_code_block continue # 如果当前在代码块中,直接作为内容处理 if in_code_block: if stack[-1].content: stack[-1].content += '\n' stack[-1].content += line continue # 处理标题 match = re.match(r'^(#+)\s*(.*)', line) if match: level = len(match.group(1)) title = match.group(2) node = MarkdownNode(level=level, title=title, content="", children=[]) while stack[-1].level >= level: stack.pop() add_child(stack[-1], node) stack.append(node) else: if stack[-1].content: stack[-1].content += '\n' stack[-1].content += line return root if __name__=="__main__": # # 从文件读取 Markdown 内容 # with open("workdir/example.md", "r", encoding="utf-8") as f: # markdown = f.read() # # 解析 Markdown 并打印树结构 # root = parse_markdown(markdown) # print_tree(root) for i in process_markdown_file("workdir/example.md"): print("~"*20) print(i)