diff --git a/src/__tests__/parseSkill.test.ts b/src/__tests__/parseSkill.test.ts new file mode 100644 index 0000000..693389c --- /dev/null +++ b/src/__tests__/parseSkill.test.ts @@ -0,0 +1,96 @@ +import { describe, it, expect } from 'vitest' +import { parseSkillMarkdown } from '../utils/parseSkill' + +describe('parseSkillMarkdown', () => { + it('parses name and description from YAML frontmatter', () => { + const result = parseSkillMarkdown(`--- +name: webapp-testing +description: Test web apps with Playwright. +--- + +# Testing Guide + +Write Playwright scripts.`) + expect(result.name).toBe('webapp-testing') + expect(result.description).toBe('Test web apps with Playwright.') + expect(result.parseError).toBeUndefined() + }) + + it('extracts body after frontmatter', () => { + const result = parseSkillMarkdown(`--- +name: pdf +description: Process PDF files +--- + +# PDF Guide + +Extract text and tables. + +## Quick Start +\`\`\`python +from pypdf import PdfReader +\`\`\``) + expect(result.body).toContain('# PDF Guide') + expect(result.body).toContain('## Quick Start') + expect(result.body).toContain('from pypdf import PdfReader') + expect(result.bodyLineCount).toBeGreaterThan(0) + }) + + it('handles descriptions with special characters', () => { + const result = parseSkillMarkdown(`--- +name: xlsx +description: "Use when the user wants to open, read, or edit .xlsx, .csv files." +--- + +# Excel Guide`) + expect(result.description).toContain('.xlsx') + expect(result.description).toContain('.csv') + }) + + it('reports error when frontmatter is missing', () => { + const result = parseSkillMarkdown('# No frontmatter here\n\nJust markdown.') + expect(result.name).toBe('unknown') + expect(result.description).toBe('') + expect(result.parseError).toContain('未检测到') + }) + + it('reports error when name is missing', () => { + const result = parseSkillMarkdown(`--- +description: Some description +--- + +# Body`) + expect(result.parseError).toContain('缺少必填字段 name') + }) + + it('reports error when description is missing', () => { + const result = parseSkillMarkdown(`--- +name: some-skill +--- + +# Body`) + expect(result.parseError).toContain('缺少必填字段 description') + }) + + it('tracks body statistics', () => { + const body = '# Line 1\n\n## Line 2\n\n### Line 3' + const result = parseSkillMarkdown(`--- +name: test +description: A test skill +--- +${body}`) + expect(result.bodyCharCount).toBe(body.length) + expect(result.bodyLineCount).toBe(5) // 3 content lines + 2 blank + }) + + it('parses optional license field', () => { + const result = parseSkillMarkdown(`--- +name: test +description: desc +license: Complete terms in LICENSE.txt +--- + +# Body`) + expect(result.license).toBe('Complete terms in LICENSE.txt') + }) +}) diff --git a/src/components/segments/SkillsView.tsx b/src/components/segments/SkillsView.tsx index f618b31..c096f8a 100644 --- a/src/components/segments/SkillsView.tsx +++ b/src/components/segments/SkillsView.tsx @@ -67,22 +67,24 @@ function SkillDisclosure({ item }: { item: SkillSegment['items'][number] }) { className="mt-2 flex items-center gap-1 text-[10px] text-green-600 hover:text-green-800 transition-colors" > {layer === 3 ? : } - 查看注入指令(追加到对话) + {item.format === 'anthropic' ? '查看原始 SKILL.md' : '查看注入指令(追加到对话)'} )} )} - {/* Layer 3: 完整注入指令 */} + {/* Layer 3: 完整指令 */} {layer >= 3 && item.instructions && (
- 注入指令 · 触发 /{item.name} 时追加到对话 + {item.format === 'anthropic' + ? `SKILL.md · 触发 /${item.name} 时加载到 LLM 上下文` + : `注入指令 · 触发 /${item.name} 时追加到对话`}
-
+          
             {item.instructions}
           
diff --git a/src/context/ChatContext.tsx b/src/context/ChatContext.tsx index d159333..33cdaa8 100644 --- a/src/context/ChatContext.tsx +++ b/src/context/ChatContext.tsx @@ -13,8 +13,8 @@ interface ChatContextValue { const ChatContext = createContext(null) export function ChatProvider({ children }: { children: ReactNode }) { - const [activeDemo, setActiveDemo] = useState(4) // Default: Skills Demo E - const [envelope, setEnvelope] = useState(demos[4].envelope) + const [activeDemo, setActiveDemo] = useState(5) // Default: Real Skills Demo F + const [envelope, setEnvelope] = useState(demos[5].envelope) const switchDemo = (i: number) => { setActiveDemo(i) diff --git a/src/data/demos.ts b/src/data/demos.ts index 5f3d651..843594f 100644 --- a/src/data/demos.ts +++ b/src/data/demos.ts @@ -4,6 +4,7 @@ import { demoB } from './demos/demo-b' import { demoC } from './demos/demo-c' import { demoD } from './demos/demo-d' import { demoE } from './demos/demo-e' +import { demoF } from './demos/demo-f' export interface DemoScenario { id: string @@ -43,4 +44,10 @@ export const demos: DemoScenario[] = [ description: 'Skills 渐进式披露 + Agent 主动触发 /deep-research', envelope: demoE, }, + { + id: 'f', + label: '场景 F 📁', + description: '真实 Anthropic Skills(SKILL.md 文件加载 + parseSkillMarkdown 解析)', + envelope: demoF, + }, ] diff --git a/src/data/demos/demo-f.ts b/src/data/demos/demo-f.ts new file mode 100644 index 0000000..e9c3d28 --- /dev/null +++ b/src/data/demos/demo-f.ts @@ -0,0 +1,188 @@ +/** + * Demo F — 真实 Anthropic Skills(从 SKILL.md 解析) + * + * 场景:用户请求帮助完成网页应用的 Playwright 测试, + * Agent 检测到合适 skill 并建议触发 /webapp-testing。 + * + * 关键展示: + * 1. Skills 使用了真实的 Anthropic SKILL.md 格式 + * 2. parseSkillMarkdown() 解析 YAML frontmatter → SkillItem + * 3. SkillsView 渐进式披露 L1→L2→L3,L3 显示原始 SKILL.md body + * 4. Agent 以工具调用方式触发 skill + * 5. 对话中展示 static_var 展开(日期、用户身份) + */ + +import type { PromptEnvelope } from '../../types/protocol' +import { getRealSkills } from '../skills-loader' + +const now = Date.now() + +export const demoF: PromptEnvelope = { + version: '1.0', + model: 'claude-opus-4-8', + messages: [ + // --- System message: 加载真实 Anthropic skills --- + { + id: 'f-1', + role: 'system', + segments: [ + { + kind: 'static_var', + name: 'current_date', + value: '2026年6月7日', + description: '当前对话日期', + }, + { + kind: 'system_prompt', + content: `你是 Claude,具备 Anthropic Skills 机制的 HCI 研究助手。 + +你加载了以下从官方 skills 仓库的 SKILL.md 文件解析得到的 skills。 +每个 skill 的 YAML frontmatter(name + description)始终在你的上下文中可用; +当 skill 被触发时,SKILL.md 的完整 body 会被加载。`, + collapsed: true, + }, + { + kind: 'skills', + description: + '以下 skills 来自 Anthropic 官方 skills 仓库,通过 parseSkillMarkdown() 从 SKILL.md 文件解析。name 和 description 取自 YAML frontmatter(L1),body 作为完整指令(L2/L3)。', + items: getRealSkills(['webapp-testing', 'pdf', 'doc-coauthoring', 'mcp-builder']), + collapsed: false, // 默认展开展示解析效果 + }, + { + kind: 'tool_overview', + items: [ + { + name: 'run_skill', + description: '执行一个 Skill', + parameters: 'skill_name: string, input: string', + schema: { + type: 'object', + properties: { + skill_name: { type: 'string', description: 'Skill 名称' }, + input: { type: 'string', description: '传递给 skill 的输入' }, + }, + required: ['skill_name', 'input'], + }, + }, + ], + collapsed: true, + }, + ], + timestamp: now - 360000, + }, + // --- User 提出测试需求 --- + { + id: 'f-2', + role: 'user', + segments: [ + { + kind: 'static_var', + name: 'user_name', + value: '小明', + description: '当前用户名', + }, + { + kind: 'text', + content: `我在做一个 HCI 课程项目——一个 React 聊天界面。本地 dev server 在 http://localhost:5173 运行。 + +我想验证几个功能: +1. 页面是否正常加载 +2. 场景切换按钮是否响应点击 +3. 右侧的 Protocol Panel 是否正确显示 JSON + +能帮我自动化测试一下吗?`, + }, + ], + timestamp: now - 300000, + }, + // --- Assistant 识别需求,触发 webapp-testing skill --- + { + id: 'f-3', + role: 'assistant', + segments: [ + { + kind: 'text', + content: `你的测试需求非常适合用 **/webapp-testing** skill 来处理——它正是为本地 web 应用的 Playwright 自动化测试设计的。 + +让我触发这个 skill 来生成测试脚本。它来自 Anthropic 官方 skills 仓库,通过 \`parseSkillMarkdown()\` 解析 SKILL.md 的 YAML frontmatter 和 Markdown body: + +> **解析结构:** +> - **L1** — \`name: webapp-testing\` + \`description\`(从 frontmatter 提取) +> - **L2** — body 摘要(前 10 行 + 统计信息) +> - **L3** — 完整 SKILL.md 原始内容(点击「查看原始 SKILL.md」展开)`, + }, + { + kind: 'tool_call_request', + toolName: 'run_skill', + arguments: { + skill_name: 'webapp-testing', + input: 'Test a React chat app at http://localhost:5173. Verify: (1) page loads correctly, (2) demo scenario buttons respond to clicks, (3) the right-side Protocol Panel displays JSON correctly. Use Playwright with the reconnaissance-then-action pattern.', + }, + collapsed: false, + }, + ], + timestamp: now - 280000, + }, + // --- Skill 执行结果 --- + { + id: 'f-4', + role: 'assistant', + segments: [ + { + kind: 'tool_call_result', + toolName: 'run_skill', + success: true, + result: `[/webapp-testing] 执行完成 + +→ 启动 Playwright (headless Chromium) +→ 导航至 http://localhost:5173 +→ 等待 networkidle ... 就绪 +→ 执行侦察(截图 + DOM 检查) + +📸 页面截图: /tmp/inspect.png + +测试结果: + ✅ (1) 页面加载 — 标题 "Prompt Envelope Protocol" 可见,4 个 demo 按钮就位 + ✅ (2) 场景切换 — 点击「场景 A」→ 对话内容正确更新;点击「场景 C」→ 长文本+文档显示正常 + ✅ (3) Protocol Panel — 右侧面板显示 OpenAI Format JSON,包含 model/messages/tools 字段 + +总计: 3/3 测试通过 🎉 + +建议: 无需修改,界面功能正常。如需添加更多测试(如 Protocol Panel 复制/导出功能),请告知。`, + collapsed: false, + }, + ], + timestamp: now - 260000, + }, + // --- Assistant 总结 --- + { + id: 'f-5', + role: 'assistant', + segments: [ + { + kind: 'text', + content: `## 测试结果:全部通过 ✅ + +\`/webapp-testing\` skill 使用了 **reconnaissance-then-action** 模式: + +1. 先截图 + 检查 DOM 获取真实渲染状态 +2. 从渲染结果中识别选择器(而非猜测) +3. 用发现的选择器执行操作 + +### 关于 Skills 的 HCI 应用 + +这个 demo 展示了 **Anthropic Skills 格式的关键优势**: + +| 层级 | 内容 | HCI 意义 | +|------|------|---------| +| L1 | name + description | **识别**(recognition over recall) | +| L2 | body 摘要 + 触发条件 | **评估**(用户判断 skill 是否适用) | +| L3 | 完整 SKILL.md body | **理解**(透明化——用户可审阅完整指令) | + +这种设计直接对应 Shneiderman 的 "Overview first, zoom and filter, then details-on-demand" 信息可视化 mantra。`, + }, + ], + timestamp: now - 240000, + }, + ], +} diff --git a/src/data/skills-loader.ts b/src/data/skills-loader.ts new file mode 100644 index 0000000..b7247db --- /dev/null +++ b/src/data/skills-loader.ts @@ -0,0 +1,88 @@ +// Skills 加载器 +// +// 从 src/data/skills/ 目录导入 SKILL.md 文件,通过 parseSkillMarkdown() +// 解析其 YAML frontmatter 和 Markdown body,映射为 UI 可用的 SkillItem。 +// +// 新增 skill:在 src/data/skills// 下添加 SKILL.md,然后在此文件新增一行 import。 + +import { parseSkillMarkdown, type ParsedSkill } from '../utils/parseSkill' +import type { SkillItem } from '../types/protocol' + +// ---- 导入本地 SKILL.md 文件(Vite ?raw → 以字符串导入) ---- +import webappTestingRaw from './skills/webapp-testing/SKILL.md?raw' +import pdfRaw from './skills/pdf/SKILL.md?raw' +import docCoauthoringRaw from './skills/doc-coauthoring/SKILL.md?raw' +import mcpBuilderRaw from './skills/mcp-builder/SKILL.md?raw' + +/** 已解析的所有 skills,按 name 索引 */ +export const PARSED_SKILLS: Record = {} + +const rawSkills: Record = { + webappTestingRaw, + pdfRaw, + docCoauthoringRaw, + mcpBuilderRaw, +} + +for (const raw of Object.values(rawSkills)) { + const parsed = parseSkillMarkdown(raw) + if (parsed.parseError) { + console.warn(`[skills-loader] 解析错误: ${parsed.name} — ${parsed.parseError}`) + } + PARSED_SKILLS[parsed.name] = parsed +} + +console.log( + `[skills-loader] 已加载 ${Object.keys(PARSED_SKILLS).length} 个 skills:`, + Object.keys(PARSED_SKILLS).join(', ') +) + +// ============================================================ +// 类型映射:ParsedSkill → SkillItem +// ============================================================ + +/** + * 渐进式披露对应关系: + * L1 — name + description(YAML frontmatter,始终可见) + * L2 — body 摘要 + 触发条件(点击展开) + * L3 — 完整 SKILL.md 内容(再次点击展开) + */ +export function toSkillItem(parsed: ParsedSkill): SkillItem { + return { + name: parsed.name, + description: parsed.description, + detail: `SKILL.md · ${parsed.bodyLineCount} 行指令 · ${parsed.bodyCharCount} 字\n\n${truncateLines(parsed.body, 10)}`, + triggers: extractTriggerPhrases(parsed.description), + instructions: parsed.body, + format: 'anthropic', + } +} + +export function getRealSkills(names: string[]): SkillItem[] { + return names.map((n) => { + const parsed = PARSED_SKILLS[n] + if (!parsed) throw new Error(`Skill "${n}" 未找到。可用: ${Object.keys(PARSED_SKILLS).join(', ')}`) + return toSkillItem(parsed) + }) +} + +export function getAllRealSkillItems(): SkillItem[] { + return Object.values(PARSED_SKILLS).map(toSkillItem) +} + +// ---- 辅助 ---- + +function truncateLines(text: string, maxLines: number): string { + const lines = text.split('\n') + if (lines.length <= maxLines) return text + return lines.slice(0, maxLines).join('\n') + `\n...(共 ${lines.length} 行)` +} + +function extractTriggerPhrases(description: string): string[] { + const triggers: string[] = [] + const matches = description.matchAll(/"([^"]+)"/g) + for (const m of matches) { + if (m[1].length < 40) triggers.push(m[1]) + } + return triggers.slice(0, 8) +} diff --git a/src/data/skills.ts b/src/data/skills.ts new file mode 100644 index 0000000..ebedbaf --- /dev/null +++ b/src/data/skills.ts @@ -0,0 +1,147 @@ +/** + * Anthropic 渐进式披露 Skills — 唯一数据源 + * + * 每个 Skill 包含 3 层信息: + * L1 — name + description(始终可见) + * L2 — detail + triggers(点击展开) + * L3 — instructions(再次点击展开,触发时注入对话) + * + * Demo 场景通过 `getSkills(...)` 按名称选取子集,避免数据重复。 + */ + +import type { SkillItem } from '../types/protocol' + +export const ALL_SKILLS: Record = { + // ── 研究 & 文献 ── + 'deep-research': { + name: 'deep-research', + description: '深度研究 — 多源搜索、交叉验证、生成引用报告', + detail: + '多阶段研究技能:(1) 拆解问题为 3-5 个子问题,(2) 并行搜索学术文献、行业报告、新闻等多个来源,(3) 抓取高相关性全文,(4) 三方交叉验证——至少两个独立来源确认同一关键事实,(5) 生成结构化报告:摘要 → 分项发现 → 证据表 → 限定说明。适合需要高质量、可验证答案的场景。', + triggers: ['深入调研', '全面分析', '研究一下', '查证', '给我一个研究报告', '这个领域有哪些'], + instructions: `你是一名深度研究助手。工作流程: +1. 分拆用户问题为 3-5 个子问题 +2. 对每个子问题执行多源搜索(学术 + 行业 + 新闻) +3. 抓取高相关性页面全文 +4. 交叉验证:至少两个独立来源确认同一关键事实 +5. 生成报告——摘要 → 分项发现 → 证据表 → 限定说明 +6. 每个声明确标注来源 URL 和可信度评级`, + }, + + // ── 代码审查 ── + 'code-review': { + name: 'code-review', + description: '代码审查 — 发现正确性 bug 和简化/效率优化机会', + detail: + '审查当前分支的代码变更,按两个维度分析:(1) 正确性问题——空值、边界条件、竞态、资源泄漏;(2) 质量改进——重复代码、过度复杂、性能瓶颈。按置信度分级输出,每个发现包含文件路径、行号、问题描述和修复建议。', + triggers: ['review', '审查', '帮我看看代码', '代码有什么问题', '检查一下'], + instructions: `你是代码审查专家。审查规则: +1. 首先读取 diff(git diff) +2. 正确性扫描:空值处理、边界条件、竞态条件、异常处理 +3. 质量扫描:重复代码、过长函数、深层嵌套、无用变量 +4. 每个发现标注:严重度(高/中/低)、文件路径、行号 +5. 提供具体的修复代码片段 +6. 避免纯风格的评论(交给 formatter)`, + }, + + // ─� 行为验证 ── + verify: { + name: 'verify', + description: '行为验证 — 运行应用并观察行为来确认变更生效', + detail: + '启动应用、运行测试、或执行指定命令,并观察输出来验证某个变更是否按预期工作。支持多种验证策略:自动测试(优先)、端到端检查、手动观察。适合 PR 合并前的最终确认环节。', + triggers: ['验证', '测试一下', '确认', '检查是否生效', '跑一下', '运行'], + instructions: `你是验证助手。验证流程: +1. 确认待验证的变更是什么 +2. 选择验证策略:优先自动测试,其次手动观察 +3. 运行相关测试套件 +4. 如果测试通过,启动应用确认关键路径 +5. 输出验证报告:测试结果 + 观察到的问题 + 置信度`, + }, + + // ── 代码简化 ── + simplify: { + name: 'simplify', + description: '代码简化 — 审查代码的复用性、简洁性和效率并应用修复', + detail: + '只关注代码质量改进(不改行为)。扫描变更文件,发现:可抽取的共享逻辑、可合并的重复代码、不必要的中间变量、可简化表达式。直接应用修复到工作树。建议 code-review 先跑完后再用此技能。', + triggers: ['简化', '重构', '精简', '优化这段代码', '能不能更简单'], + instructions: `你是代码简化专家。简化规则: +1. 只做质量改进,不改变行为 +2. 发现重复代码 → 提取为函数/变量 +3. 发现过长函数 → 提取子步骤 +4. 发现不必要的中间变量 → 内联 +5. 发现深层嵌套 → 卫语句扁平化 +6. 修改后运行全部测试确认无回归`, + }, + + // ── 定时循环 ── + loop: { + name: 'loop', + description: '定时循环 — 按指定间隔重复执行一个命令或 prompt', + detail: + '设置定时任务,每隔指定时间(5 分钟、30 分钟、1 小时等)自动执行。适合 CI 监控、定时检查、长期运行的自动化任务。任务在后台运行,不阻塞当前对话。', + triggers: ['定时', '每隔', '循环', '持续监控', '定期检查', '每分钟'], + instructions: `你是定时任务助手。循环配置: +1. 确认循环间隔和任务内容 +2. 首次执行立即运行一次 +3. 后续按间隔自动触发 +4. 每次执行输出简短状态摘要 +5. 用户可以随时说"停止循环"来终止 +6. 长时间循环(>1 小时)使用持久化模式`, + }, + + // ── 文档摘要 ── + summarize: { + name: 'summarize', + description: '生成文档摘要 — 支持多种粒度(一句话/段落级/全文级)', + detail: + '对用户提供的文档生成结构化摘要。支持三种粒度:一句话概览(≤50 字)、段落级摘要(保存关键论点)、全文级摘要(保留章节结构)。输出为 Markdown 格式。', + triggers: ['帮我总结一下', '概括这篇文章', '这个文档说了什么', '摘要'], + instructions: `你是专业文档摘要助手。当用户请求摘要时: +1. 先确认用户需要的粒度(简要/段落/全文) +2. 提取核心论点和支撑证据 +3. 使用 Markdown 层级结构输出 +4. 在末尾标注信息来源(章节/页码)`, + }, + + // ── 翻译 ── + translate: { + name: 'translate', + description: '翻译文档内容 — 支持中英互译,保留原文格式', + detail: + '将文档内容翻译为目标语言。保留原始 Markdown 格式、代码块、表格结构。支持术语表自定义。默认输出:中文 ↔ 英文。', + triggers: ['翻译', 'translate', '翻成中文', '译成英文'], + instructions: `你是专业翻译助手。翻译规则: +1. 保留所有 Markdown 格式和代码块 +2. 术语一致性——同一术语全文统一译法 +3. 学术文本保留原文关键术语括号标注 +4. 表格和列表结构不变`, + }, + + // ── 文档问答 ── + qa: { + name: 'qa', + description: '基于文档回答具体问题 — 带引用溯源', + detail: + '基于用户提供的文档内容回答具体问题。每个回答都附带文档引用(章节/段落号),用户可以追溯来源。支持追问和澄清。如果文档中没有相关信息,明确告知。', + triggers: ['文档中', '根据文章', '论文里提到', '这段说的是'], + instructions: `你基于文档回答用户问题。规则: +1. 每个陈述必须引用文档出处 +2. 如果文档中没有相关信息,明确告知 +3. 区分"文档直接陈述"和"你的推理延伸" +4. 不确定时提出澄清问题`, + }, +} + +/** + * 按名称选取一个或多个 skill 定义。 + * 用于构建 PromptEnvelope 中的 skills segment。 + */ +export function getSkills(names: string[]): SkillItem[] { + return names.map((n) => { + const skill = ALL_SKILLS[n] + if (!skill) throw new Error(`Unknown skill: ${n}`) + return { ...skill } // shallow copy so demos don't mutate entries + }) +} diff --git a/src/data/skills/doc-coauthoring/SKILL.md b/src/data/skills/doc-coauthoring/SKILL.md new file mode 100644 index 0000000..a5a6983 --- /dev/null +++ b/src/data/skills/doc-coauthoring/SKILL.md @@ -0,0 +1,375 @@ +--- +name: doc-coauthoring +description: Guide users through a structured workflow for co-authoring documentation. Use when user wants to write documentation, proposals, technical specs, decision docs, or similar structured content. This workflow helps users efficiently transfer context, refine content through iteration, and verify the doc works for readers. Trigger when user mentions writing docs, creating proposals, drafting specs, or similar documentation tasks. +--- + +# Doc Co-Authoring Workflow + +This skill provides a structured workflow for guiding users through collaborative document creation. Act as an active guide, walking users through three stages: Context Gathering, Refinement & Structure, and Reader Testing. + +## When to Offer This Workflow + +**Trigger conditions:** +- User mentions writing documentation: "write a doc", "draft a proposal", "create a spec", "write up" +- User mentions specific doc types: "PRD", "design doc", "decision doc", "RFC" +- User seems to be starting a substantial writing task + +**Initial offer:** +Offer the user a structured workflow for co-authoring the document. Explain the three stages: + +1. **Context Gathering**: User provides all relevant context while Claude asks clarifying questions +2. **Refinement & Structure**: Iteratively build each section through brainstorming and editing +3. **Reader Testing**: Test the doc with a fresh Claude (no context) to catch blind spots before others read it + +Explain that this approach helps ensure the doc works well when others read it (including when they paste it into Claude). Ask if they want to try this workflow or prefer to work freeform. + +If user declines, work freeform. If user accepts, proceed to Stage 1. + +## Stage 1: Context Gathering + +**Goal:** Close the gap between what the user knows and what Claude knows, enabling smart guidance later. + +### Initial Questions + +Start by asking the user for meta-context about the document: + +1. What type of document is this? (e.g., technical spec, decision doc, proposal) +2. Who's the primary audience? +3. What's the desired impact when someone reads this? +4. Is there a template or specific format to follow? +5. Any other constraints or context to know? + +Inform them they can answer in shorthand or dump information however works best for them. + +**If user provides a template or mentions a doc type:** +- Ask if they have a template document to share +- If they provide a link to a shared document, use the appropriate integration to fetch it +- If they provide a file, read it + +**If user mentions editing an existing shared document:** +- Use the appropriate integration to read the current state +- Check for images without alt-text +- If images exist without alt-text, explain that when others use Claude to understand the doc, Claude won't be able to see them. Ask if they want alt-text generated. If so, request they paste each image into chat for descriptive alt-text generation. + +### Info Dumping + +Once initial questions are answered, encourage the user to dump all the context they have. Request information such as: +- Background on the project/problem +- Related team discussions or shared documents +- Why alternative solutions aren't being used +- Organizational context (team dynamics, past incidents, politics) +- Timeline pressures or constraints +- Technical architecture or dependencies +- Stakeholder concerns + +Advise them not to worry about organizing it - just get it all out. Offer multiple ways to provide context: +- Info dump stream-of-consciousness +- Point to team channels or threads to read +- Link to shared documents + +**If integrations are available** (e.g., Slack, Teams, Google Drive, SharePoint, or other MCP servers), mention that these can be used to pull in context directly. + +**If no integrations are detected and in Claude.ai or Claude app:** Suggest they can enable connectors in their Claude settings to allow pulling context from messaging apps and document storage directly. + +Inform them clarifying questions will be asked once they've done their initial dump. + +**During context gathering:** + +- If user mentions team channels or shared documents: + - If integrations available: Inform them the content will be read now, then use the appropriate integration + - If integrations not available: Explain lack of access. Suggest they enable connectors in Claude settings, or paste the relevant content directly. + +- If user mentions entities/projects that are unknown: + - Ask if connected tools should be searched to learn more + - Wait for user confirmation before searching + +- As user provides context, track what's being learned and what's still unclear + +**Asking clarifying questions:** + +When user signals they've done their initial dump (or after substantial context provided), ask clarifying questions to ensure understanding: + +Generate 5-10 numbered questions based on gaps in the context. + +Inform them they can use shorthand to answer (e.g., "1: yes, 2: see #channel, 3: no because backwards compat"), link to more docs, point to channels to read, or just keep info-dumping. Whatever's most efficient for them. + +**Exit condition:** +Sufficient context has been gathered when questions show understanding - when edge cases and trade-offs can be asked about without needing basics explained. + +**Transition:** +Ask if there's any more context they want to provide at this stage, or if it's time to move on to drafting the document. + +If user wants to add more, let them. When ready, proceed to Stage 2. + +## Stage 2: Refinement & Structure + +**Goal:** Build the document section by section through brainstorming, curation, and iterative refinement. + +**Instructions to user:** +Explain that the document will be built section by section. For each section: +1. Clarifying questions will be asked about what to include +2. 5-20 options will be brainstormed +3. User will indicate what to keep/remove/combine +4. The section will be drafted +5. It will be refined through surgical edits + +Start with whichever section has the most unknowns (usually the core decision/proposal), then work through the rest. + +**Section ordering:** + +If the document structure is clear: +Ask which section they'd like to start with. + +Suggest starting with whichever section has the most unknowns. For decision docs, that's usually the core proposal. For specs, it's typically the technical approach. Summary sections are best left for last. + +If user doesn't know what sections they need: +Based on the type of document and template, suggest 3-5 sections appropriate for the doc type. + +Ask if this structure works, or if they want to adjust it. + +**Once structure is agreed:** + +Create the initial document structure with placeholder text for all sections. + +**If access to artifacts is available:** +Use `create_file` to create an artifact. This gives both Claude and the user a scaffold to work from. + +Inform them that the initial structure with placeholders for all sections will be created. + +Create artifact with all section headers and brief placeholder text like "[To be written]" or "[Content here]". + +Provide the scaffold link and indicate it's time to fill in each section. + +**If no access to artifacts:** +Create a markdown file in the working directory. Name it appropriately (e.g., `decision-doc.md`, `technical-spec.md`). + +Inform them that the initial structure with placeholders for all sections will be created. + +Create file with all section headers and placeholder text. + +Confirm the filename has been created and indicate it's time to fill in each section. + +**For each section:** + +### Step 1: Clarifying Questions + +Announce work will begin on the [SECTION NAME] section. Ask 5-10 clarifying questions about what should be included: + +Generate 5-10 specific questions based on context and section purpose. + +Inform them they can answer in shorthand or just indicate what's important to cover. + +### Step 2: Brainstorming + +For the [SECTION NAME] section, brainstorm [5-20] things that might be included, depending on the section's complexity. Look for: +- Context shared that might have been forgotten +- Angles or considerations not yet mentioned + +Generate 5-20 numbered options based on section complexity. At the end, offer to brainstorm more if they want additional options. + +### Step 3: Curation + +Ask which points should be kept, removed, or combined. Request brief justifications to help learn priorities for the next sections. + +Provide examples: +- "Keep 1,4,7,9" +- "Remove 3 (duplicates 1)" +- "Remove 6 (audience already knows this)" +- "Combine 11 and 12" + +**If user gives freeform feedback** (e.g., "looks good" or "I like most of it but...") instead of numbered selections, extract their preferences and proceed. Parse what they want kept/removed/changed and apply it. + +### Step 4: Gap Check + +Based on what they've selected, ask if there's anything important missing for the [SECTION NAME] section. + +### Step 5: Drafting + +Use `str_replace` to replace the placeholder text for this section with the actual drafted content. + +Announce the [SECTION NAME] section will be drafted now based on what they've selected. + +**If using artifacts:** +After drafting, provide a link to the artifact. + +Ask them to read through it and indicate what to change. Note that being specific helps learning for the next sections. + +**If using a file (no artifacts):** +After drafting, confirm completion. + +Inform them the [SECTION NAME] section has been drafted in [filename]. Ask them to read through it and indicate what to change. Note that being specific helps learning for the next sections. + +**Key instruction for user (include when drafting the first section):** +Provide a note: Instead of editing the doc directly, ask them to indicate what to change. This helps learning of their style for future sections. For example: "Remove the X bullet - already covered by Y" or "Make the third paragraph more concise". + +### Step 6: Iterative Refinement + +As user provides feedback: +- Use `str_replace` to make edits (never reprint the whole doc) +- **If using artifacts:** Provide link to artifact after each edit +- **If using files:** Just confirm edits are complete +- If user edits doc directly and asks to read it: mentally note the changes they made and keep them in mind for future sections (this shows their preferences) + +**Continue iterating** until user is satisfied with the section. + +### Quality Checking + +After 3 consecutive iterations with no substantial changes, ask if anything can be removed without losing important information. + +When section is done, confirm [SECTION NAME] is complete. Ask if ready to move to the next section. + +**Repeat for all sections.** + +### Near Completion + +As approaching completion (80%+ of sections done), announce intention to re-read the entire document and check for: +- Flow and consistency across sections +- Redundancy or contradictions +- Anything that feels like "slop" or generic filler +- Whether every sentence carries weight + +Read entire document and provide feedback. + +**When all sections are drafted and refined:** +Announce all sections are drafted. Indicate intention to review the complete document one more time. + +Review for overall coherence, flow, completeness. + +Provide any final suggestions. + +Ask if ready to move to Reader Testing, or if they want to refine anything else. + +## Stage 3: Reader Testing + +**Goal:** Test the document with a fresh Claude (no context bleed) to verify it works for readers. + +**Instructions to user:** +Explain that testing will now occur to see if the document actually works for readers. This catches blind spots - things that make sense to the authors but might confuse others. + +### Testing Approach + +**If access to sub-agents is available (e.g., in Claude Code):** + +Perform the testing directly without user involvement. + +### Step 1: Predict Reader Questions + +Announce intention to predict what questions readers might ask when trying to discover this document. + +Generate 5-10 questions that readers would realistically ask. + +### Step 2: Test with Sub-Agent + +Announce that these questions will be tested with a fresh Claude instance (no context from this conversation). + +For each question, invoke a sub-agent with just the document content and the question. + +Summarize what Reader Claude got right/wrong for each question. + +### Step 3: Run Additional Checks + +Announce additional checks will be performed. + +Invoke sub-agent to check for ambiguity, false assumptions, contradictions. + +Summarize any issues found. + +### Step 4: Report and Fix + +If issues found: +Report that Reader Claude struggled with specific issues. + +List the specific issues. + +Indicate intention to fix these gaps. + +Loop back to refinement for problematic sections. + +--- + +**If no access to sub-agents (e.g., claude.ai web interface):** + +The user will need to do the testing manually. + +### Step 1: Predict Reader Questions + +Ask what questions people might ask when trying to discover this document. What would they type into Claude.ai? + +Generate 5-10 questions that readers would realistically ask. + +### Step 2: Setup Testing + +Provide testing instructions: +1. Open a fresh Claude conversation: https://claude.ai +2. Paste or share the document content (if using a shared doc platform with connectors enabled, provide the link) +3. Ask Reader Claude the generated questions + +For each question, instruct Reader Claude to provide: +- The answer +- Whether anything was ambiguous or unclear +- What knowledge/context the doc assumes is already known + +Check if Reader Claude gives correct answers or misinterprets anything. + +### Step 3: Additional Checks + +Also ask Reader Claude: +- "What in this doc might be ambiguous or unclear to readers?" +- "What knowledge or context does this doc assume readers already have?" +- "Are there any internal contradictions or inconsistencies?" + +### Step 4: Iterate Based on Results + +Ask what Reader Claude got wrong or struggled with. Indicate intention to fix those gaps. + +Loop back to refinement for any problematic sections. + +--- + +### Exit Condition (Both Approaches) + +When Reader Claude consistently answers questions correctly and doesn't surface new gaps or ambiguities, the doc is ready. + +## Final Review + +When Reader Testing passes: +Announce the doc has passed Reader Claude testing. Before completion: + +1. Recommend they do a final read-through themselves - they own this document and are responsible for its quality +2. Suggest double-checking any facts, links, or technical details +3. Ask them to verify it achieves the impact they wanted + +Ask if they want one more review, or if the work is done. + +**If user wants final review, provide it. Otherwise:** +Announce document completion. Provide a few final tips: +- Consider linking this conversation in an appendix so readers can see how the doc was developed +- Use appendices to provide depth without bloating the main doc +- Update the doc as feedback is received from real readers + +## Tips for Effective Guidance + +**Tone:** +- Be direct and procedural +- Explain rationale briefly when it affects user behavior +- Don't try to "sell" the approach - just execute it + +**Handling Deviations:** +- If user wants to skip a stage: Ask if they want to skip this and write freeform +- If user seems frustrated: Acknowledge this is taking longer than expected. Suggest ways to move faster +- Always give user agency to adjust the process + +**Context Management:** +- Throughout, if context is missing on something mentioned, proactively ask +- Don't let gaps accumulate - address them as they come up + +**Artifact Management:** +- Use `create_file` for drafting full sections +- Use `str_replace` for all edits +- Provide artifact link after every change +- Never use artifacts for brainstorming lists - that's just conversation + +**Quality over Speed:** +- Don't rush through stages +- Each iteration should make meaningful improvements +- The goal is a document that actually works for readers diff --git a/src/data/skills/mcp-builder/SKILL.md b/src/data/skills/mcp-builder/SKILL.md new file mode 100644 index 0000000..8a1a77a --- /dev/null +++ b/src/data/skills/mcp-builder/SKILL.md @@ -0,0 +1,236 @@ +--- +name: mcp-builder +description: Guide for creating high-quality MCP (Model Context Protocol) servers that enable LLMs to interact with external services through well-designed tools. Use when building MCP servers to integrate external APIs or services, whether in Python (FastMCP) or Node/TypeScript (MCP SDK). +license: Complete terms in LICENSE.txt +--- + +# MCP Server Development Guide + +## Overview + +Create MCP (Model Context Protocol) servers that enable LLMs to interact with external services through well-designed tools. The quality of an MCP server is measured by how well it enables LLMs to accomplish real-world tasks. + +--- + +# Process + +## 🚀 High-Level Workflow + +Creating a high-quality MCP server involves four main phases: + +### Phase 1: Deep Research and Planning + +#### 1.1 Understand Modern MCP Design + +**API Coverage vs. Workflow Tools:** +Balance comprehensive API endpoint coverage with specialized workflow tools. Workflow tools can be more convenient for specific tasks, while comprehensive coverage gives agents flexibility to compose operations. Performance varies by client—some clients benefit from code execution that combines basic tools, while others work better with higher-level workflows. When uncertain, prioritize comprehensive API coverage. + +**Tool Naming and Discoverability:** +Clear, descriptive tool names help agents find the right tools quickly. Use consistent prefixes (e.g., `github_create_issue`, `github_list_repos`) and action-oriented naming. + +**Context Management:** +Agents benefit from concise tool descriptions and the ability to filter/paginate results. Design tools that return focused, relevant data. Some clients support code execution which can help agents filter and process data efficiently. + +**Actionable Error Messages:** +Error messages should guide agents toward solutions with specific suggestions and next steps. + +#### 1.2 Study MCP Protocol Documentation + +**Navigate the MCP specification:** + +Start with the sitemap to find relevant pages: `https://modelcontextprotocol.io/sitemap.xml` + +Then fetch specific pages with `.md` suffix for markdown format (e.g., `https://modelcontextprotocol.io/specification/draft.md`). + +Key pages to review: +- Specification overview and architecture +- Transport mechanisms (streamable HTTP, stdio) +- Tool, resource, and prompt definitions + +#### 1.3 Study Framework Documentation + +**Recommended stack:** +- **Language**: TypeScript (high-quality SDK support and good compatibility in many execution environments e.g. MCPB. Plus AI models are good at generating TypeScript code, benefiting from its broad usage, static typing and good linting tools) +- **Transport**: Streamable HTTP for remote servers, using stateless JSON (simpler to scale and maintain, as opposed to stateful sessions and streaming responses). stdio for local servers. + +**Load framework documentation:** + +- **MCP Best Practices**: [📋 View Best Practices](./reference/mcp_best_practices.md) - Core guidelines + +**For TypeScript (recommended):** +- **TypeScript SDK**: Use WebFetch to load `https://raw.githubusercontent.com/modelcontextprotocol/typescript-sdk/main/README.md` +- [⚡ TypeScript Guide](./reference/node_mcp_server.md) - TypeScript patterns and examples + +**For Python:** +- **Python SDK**: Use WebFetch to load `https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md` +- [🐍 Python Guide](./reference/python_mcp_server.md) - Python patterns and examples + +#### 1.4 Plan Your Implementation + +**Understand the API:** +Review the service's API documentation to identify key endpoints, authentication requirements, and data models. Use web search and WebFetch as needed. + +**Tool Selection:** +Prioritize comprehensive API coverage. List endpoints to implement, starting with the most common operations. + +--- + +### Phase 2: Implementation + +#### 2.1 Set Up Project Structure + +See language-specific guides for project setup: +- [⚡ TypeScript Guide](./reference/node_mcp_server.md) - Project structure, package.json, tsconfig.json +- [🐍 Python Guide](./reference/python_mcp_server.md) - Module organization, dependencies + +#### 2.2 Implement Core Infrastructure + +Create shared utilities: +- API client with authentication +- Error handling helpers +- Response formatting (JSON/Markdown) +- Pagination support + +#### 2.3 Implement Tools + +For each tool: + +**Input Schema:** +- Use Zod (TypeScript) or Pydantic (Python) +- Include constraints and clear descriptions +- Add examples in field descriptions + +**Output Schema:** +- Define `outputSchema` where possible for structured data +- Use `structuredContent` in tool responses (TypeScript SDK feature) +- Helps clients understand and process tool outputs + +**Tool Description:** +- Concise summary of functionality +- Parameter descriptions +- Return type schema + +**Implementation:** +- Async/await for I/O operations +- Proper error handling with actionable messages +- Support pagination where applicable +- Return both text content and structured data when using modern SDKs + +**Annotations:** +- `readOnlyHint`: true/false +- `destructiveHint`: true/false +- `idempotentHint`: true/false +- `openWorldHint`: true/false + +--- + +### Phase 3: Review and Test + +#### 3.1 Code Quality + +Review for: +- No duplicated code (DRY principle) +- Consistent error handling +- Full type coverage +- Clear tool descriptions + +#### 3.2 Build and Test + +**TypeScript:** +- Run `npm run build` to verify compilation +- Test with MCP Inspector: `npx @modelcontextprotocol/inspector` + +**Python:** +- Verify syntax: `python -m py_compile your_server.py` +- Test with MCP Inspector + +See language-specific guides for detailed testing approaches and quality checklists. + +--- + +### Phase 4: Create Evaluations + +After implementing your MCP server, create comprehensive evaluations to test its effectiveness. + +**Load [✅ Evaluation Guide](./reference/evaluation.md) for complete evaluation guidelines.** + +#### 4.1 Understand Evaluation Purpose + +Use evaluations to test whether LLMs can effectively use your MCP server to answer realistic, complex questions. + +#### 4.2 Create 10 Evaluation Questions + +To create effective evaluations, follow the process outlined in the evaluation guide: + +1. **Tool Inspection**: List available tools and understand their capabilities +2. **Content Exploration**: Use READ-ONLY operations to explore available data +3. **Question Generation**: Create 10 complex, realistic questions +4. **Answer Verification**: Solve each question yourself to verify answers + +#### 4.3 Evaluation Requirements + +Ensure each question is: +- **Independent**: Not dependent on other questions +- **Read-only**: Only non-destructive operations required +- **Complex**: Requiring multiple tool calls and deep exploration +- **Realistic**: Based on real use cases humans would care about +- **Verifiable**: Single, clear answer that can be verified by string comparison +- **Stable**: Answer won't change over time + +#### 4.4 Output Format + +Create an XML file with this structure: + +```xml + + + Find discussions about AI model launches with animal codenames. One model needed a specific safety designation that uses the format ASL-X. What number X was being determined for the model named after a spotted wild cat? + 3 + + + +``` + +--- + +# Reference Files + +## 📚 Documentation Library + +Load these resources as needed during development: + +### Core MCP Documentation (Load First) +- **MCP Protocol**: Start with sitemap at `https://modelcontextprotocol.io/sitemap.xml`, then fetch specific pages with `.md` suffix +- [📋 MCP Best Practices](./reference/mcp_best_practices.md) - Universal MCP guidelines including: + - Server and tool naming conventions + - Response format guidelines (JSON vs Markdown) + - Pagination best practices + - Transport selection (streamable HTTP vs stdio) + - Security and error handling standards + +### SDK Documentation (Load During Phase 1/2) +- **Python SDK**: Fetch from `https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md` +- **TypeScript SDK**: Fetch from `https://raw.githubusercontent.com/modelcontextprotocol/typescript-sdk/main/README.md` + +### Language-Specific Implementation Guides (Load During Phase 2) +- [🐍 Python Implementation Guide](./reference/python_mcp_server.md) - Complete Python/FastMCP guide with: + - Server initialization patterns + - Pydantic model examples + - Tool registration with `@mcp.tool` + - Complete working examples + - Quality checklist + +- [⚡ TypeScript Implementation Guide](./reference/node_mcp_server.md) - Complete TypeScript guide with: + - Project structure + - Zod schema patterns + - Tool registration with `server.registerTool` + - Complete working examples + - Quality checklist + +### Evaluation Guide (Load During Phase 4) +- [✅ Evaluation Guide](./reference/evaluation.md) - Complete evaluation creation guide with: + - Question creation guidelines + - Answer verification strategies + - XML format specifications + - Example questions and answers + - Running an evaluation with the provided scripts diff --git a/src/data/skills/pdf/SKILL.md b/src/data/skills/pdf/SKILL.md new file mode 100644 index 0000000..d3e046a --- /dev/null +++ b/src/data/skills/pdf/SKILL.md @@ -0,0 +1,314 @@ +--- +name: pdf +description: Use this skill whenever the user wants to do anything with PDF files. This includes reading or extracting text/tables from PDFs, combining or merging multiple PDFs into one, splitting PDFs apart, rotating pages, adding watermarks, creating new PDFs, filling PDF forms, encrypting/decrypting PDFs, extracting images, and OCR on scanned PDFs to make them searchable. If the user mentions a .pdf file or asks to produce one, use this skill. +license: Proprietary. LICENSE.txt has complete terms +--- + +# PDF Processing Guide + +## Overview + +This guide covers essential PDF processing operations using Python libraries and command-line tools. For advanced features, JavaScript libraries, and detailed examples, see REFERENCE.md. If you need to fill out a PDF form, read FORMS.md and follow its instructions. + +## Quick Start + +```python +from pypdf import PdfReader, PdfWriter + +# Read a PDF +reader = PdfReader("document.pdf") +print(f"Pages: {len(reader.pages)}") + +# Extract text +text = "" +for page in reader.pages: + text += page.extract_text() +``` + +## Python Libraries + +### pypdf - Basic Operations + +#### Merge PDFs +```python +from pypdf import PdfWriter, PdfReader + +writer = PdfWriter() +for pdf_file in ["doc1.pdf", "doc2.pdf", "doc3.pdf"]: + reader = PdfReader(pdf_file) + for page in reader.pages: + writer.add_page(page) + +with open("merged.pdf", "wb") as output: + writer.write(output) +``` + +#### Split PDF +```python +reader = PdfReader("input.pdf") +for i, page in enumerate(reader.pages): + writer = PdfWriter() + writer.add_page(page) + with open(f"page_{i+1}.pdf", "wb") as output: + writer.write(output) +``` + +#### Extract Metadata +```python +reader = PdfReader("document.pdf") +meta = reader.metadata +print(f"Title: {meta.title}") +print(f"Author: {meta.author}") +print(f"Subject: {meta.subject}") +print(f"Creator: {meta.creator}") +``` + +#### Rotate Pages +```python +reader = PdfReader("input.pdf") +writer = PdfWriter() + +page = reader.pages[0] +page.rotate(90) # Rotate 90 degrees clockwise +writer.add_page(page) + +with open("rotated.pdf", "wb") as output: + writer.write(output) +``` + +### pdfplumber - Text and Table Extraction + +#### Extract Text with Layout +```python +import pdfplumber + +with pdfplumber.open("document.pdf") as pdf: + for page in pdf.pages: + text = page.extract_text() + print(text) +``` + +#### Extract Tables +```python +with pdfplumber.open("document.pdf") as pdf: + for i, page in enumerate(pdf.pages): + tables = page.extract_tables() + for j, table in enumerate(tables): + print(f"Table {j+1} on page {i+1}:") + for row in table: + print(row) +``` + +#### Advanced Table Extraction +```python +import pandas as pd + +with pdfplumber.open("document.pdf") as pdf: + all_tables = [] + for page in pdf.pages: + tables = page.extract_tables() + for table in tables: + if table: # Check if table is not empty + df = pd.DataFrame(table[1:], columns=table[0]) + all_tables.append(df) + +# Combine all tables +if all_tables: + combined_df = pd.concat(all_tables, ignore_index=True) + combined_df.to_excel("extracted_tables.xlsx", index=False) +``` + +### reportlab - Create PDFs + +#### Basic PDF Creation +```python +from reportlab.lib.pagesizes import letter +from reportlab.pdfgen import canvas + +c = canvas.Canvas("hello.pdf", pagesize=letter) +width, height = letter + +# Add text +c.drawString(100, height - 100, "Hello World!") +c.drawString(100, height - 120, "This is a PDF created with reportlab") + +# Add a line +c.line(100, height - 140, 400, height - 140) + +# Save +c.save() +``` + +#### Create PDF with Multiple Pages +```python +from reportlab.lib.pagesizes import letter +from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak +from reportlab.lib.styles import getSampleStyleSheet + +doc = SimpleDocTemplate("report.pdf", pagesize=letter) +styles = getSampleStyleSheet() +story = [] + +# Add content +title = Paragraph("Report Title", styles['Title']) +story.append(title) +story.append(Spacer(1, 12)) + +body = Paragraph("This is the body of the report. " * 20, styles['Normal']) +story.append(body) +story.append(PageBreak()) + +# Page 2 +story.append(Paragraph("Page 2", styles['Heading1'])) +story.append(Paragraph("Content for page 2", styles['Normal'])) + +# Build PDF +doc.build(story) +``` + +#### Subscripts and Superscripts + +**IMPORTANT**: Never use Unicode subscript/superscript characters (₀₁₂₃₄₅₆₇₈₉, ⁰¹²³⁴⁵⁶⁷⁸⁹) in ReportLab PDFs. The built-in fonts do not include these glyphs, causing them to render as solid black boxes. + +Instead, use ReportLab's XML markup tags in Paragraph objects: +```python +from reportlab.platypus import Paragraph +from reportlab.lib.styles import getSampleStyleSheet + +styles = getSampleStyleSheet() + +# Subscripts: use tag +chemical = Paragraph("H2O", styles['Normal']) + +# Superscripts: use tag +squared = Paragraph("x2 + y2", styles['Normal']) +``` + +For canvas-drawn text (not Paragraph objects), manually adjust font the size and position rather than using Unicode subscripts/superscripts. + +## Command-Line Tools + +### pdftotext (poppler-utils) +```bash +# Extract text +pdftotext input.pdf output.txt + +# Extract text preserving layout +pdftotext -layout input.pdf output.txt + +# Extract specific pages +pdftotext -f 1 -l 5 input.pdf output.txt # Pages 1-5 +``` + +### qpdf +```bash +# Merge PDFs +qpdf --empty --pages file1.pdf file2.pdf -- merged.pdf + +# Split pages +qpdf input.pdf --pages . 1-5 -- pages1-5.pdf +qpdf input.pdf --pages . 6-10 -- pages6-10.pdf + +# Rotate pages +qpdf input.pdf output.pdf --rotate=+90:1 # Rotate page 1 by 90 degrees + +# Remove password +qpdf --password=mypassword --decrypt encrypted.pdf decrypted.pdf +``` + +### pdftk (if available) +```bash +# Merge +pdftk file1.pdf file2.pdf cat output merged.pdf + +# Split +pdftk input.pdf burst + +# Rotate +pdftk input.pdf rotate 1east output rotated.pdf +``` + +## Common Tasks + +### Extract Text from Scanned PDFs +```python +# Requires: pip install pytesseract pdf2image +import pytesseract +from pdf2image import convert_from_path + +# Convert PDF to images +images = convert_from_path('scanned.pdf') + +# OCR each page +text = "" +for i, image in enumerate(images): + text += f"Page {i+1}:\n" + text += pytesseract.image_to_string(image) + text += "\n\n" + +print(text) +``` + +### Add Watermark +```python +from pypdf import PdfReader, PdfWriter + +# Create watermark (or load existing) +watermark = PdfReader("watermark.pdf").pages[0] + +# Apply to all pages +reader = PdfReader("document.pdf") +writer = PdfWriter() + +for page in reader.pages: + page.merge_page(watermark) + writer.add_page(page) + +with open("watermarked.pdf", "wb") as output: + writer.write(output) +``` + +### Extract Images +```bash +# Using pdfimages (poppler-utils) +pdfimages -j input.pdf output_prefix + +# This extracts all images as output_prefix-000.jpg, output_prefix-001.jpg, etc. +``` + +### Password Protection +```python +from pypdf import PdfReader, PdfWriter + +reader = PdfReader("input.pdf") +writer = PdfWriter() + +for page in reader.pages: + writer.add_page(page) + +# Add password +writer.encrypt("userpassword", "ownerpassword") + +with open("encrypted.pdf", "wb") as output: + writer.write(output) +``` + +## Quick Reference + +| Task | Best Tool | Command/Code | +|------|-----------|--------------| +| Merge PDFs | pypdf | `writer.add_page(page)` | +| Split PDFs | pypdf | One page per file | +| Extract text | pdfplumber | `page.extract_text()` | +| Extract tables | pdfplumber | `page.extract_tables()` | +| Create PDFs | reportlab | Canvas or Platypus | +| Command line merge | qpdf | `qpdf --empty --pages ...` | +| OCR scanned PDFs | pytesseract | Convert to image first | +| Fill PDF forms | pdf-lib or pypdf (see FORMS.md) | See FORMS.md | + +## Next Steps + +- For advanced pypdfium2 usage, see REFERENCE.md +- For JavaScript libraries (pdf-lib), see REFERENCE.md +- If you need to fill out a PDF form, follow the instructions in FORMS.md +- For troubleshooting guides, see REFERENCE.md diff --git a/src/data/skills/webapp-testing/SKILL.md b/src/data/skills/webapp-testing/SKILL.md new file mode 100644 index 0000000..4726215 --- /dev/null +++ b/src/data/skills/webapp-testing/SKILL.md @@ -0,0 +1,96 @@ +--- +name: webapp-testing +description: Toolkit for interacting with and testing local web applications using Playwright. Supports verifying frontend functionality, debugging UI behavior, capturing browser screenshots, and viewing browser logs. +license: Complete terms in LICENSE.txt +--- + +# Web Application Testing + +To test local web applications, write native Python Playwright scripts. + +**Helper Scripts Available**: +- `scripts/with_server.py` - Manages server lifecycle (supports multiple servers) + +**Always run scripts with `--help` first** to see usage. DO NOT read the source until you try running the script first and find that a customized solution is abslutely necessary. These scripts can be very large and thus pollute your context window. They exist to be called directly as black-box scripts rather than ingested into your context window. + +## Decision Tree: Choosing Your Approach + +``` +User task → Is it static HTML? + ├─ Yes → Read HTML file directly to identify selectors + │ ├─ Success → Write Playwright script using selectors + │ └─ Fails/Incomplete → Treat as dynamic (below) + │ + └─ No (dynamic webapp) → Is the server already running? + ├─ No → Run: python scripts/with_server.py --help + │ Then use the helper + write simplified Playwright script + │ + └─ Yes → Reconnaissance-then-action: + 1. Navigate and wait for networkidle + 2. Take screenshot or inspect DOM + 3. Identify selectors from rendered state + 4. Execute actions with discovered selectors +``` + +## Example: Using with_server.py + +To start a server, run `--help` first, then use the helper: + +**Single server:** +```bash +python scripts/with_server.py --server "npm run dev" --port 5173 -- python your_automation.py +``` + +**Multiple servers (e.g., backend + frontend):** +```bash +python scripts/with_server.py \ + --server "cd backend && python server.py" --port 3000 \ + --server "cd frontend && npm run dev" --port 5173 \ + -- python your_automation.py +``` + +To create an automation script, include only Playwright logic (servers are managed automatically): +```python +from playwright.sync_api import sync_playwright + +with sync_playwright() as p: + browser = p.chromium.launch(headless=True) # Always launch chromium in headless mode + page = browser.new_page() + page.goto('http://localhost:5173') # Server already running and ready + page.wait_for_load_state('networkidle') # CRITICAL: Wait for JS to execute + # ... your automation logic + browser.close() +``` + +## Reconnaissance-Then-Action Pattern + +1. **Inspect rendered DOM**: + ```python + page.screenshot(path='/tmp/inspect.png', full_page=True) + content = page.content() + page.locator('button').all() + ``` + +2. **Identify selectors** from inspection results + +3. **Execute actions** using discovered selectors + +## Common Pitfall + +❌ **Don't** inspect the DOM before waiting for `networkidle` on dynamic apps +✅ **Do** wait for `page.wait_for_load_state('networkidle')` before inspection + +## Best Practices + +- **Use bundled scripts as black boxes** - To accomplish a task, consider whether one of the scripts available in `scripts/` can help. These scripts handle common, complex workflows reliably without cluttering the context window. Use `--help` to see usage, then invoke directly. +- Use `sync_playwright()` for synchronous scripts +- Always close the browser when done +- Use descriptive selectors: `text=`, `role=`, CSS selectors, or IDs +- Add appropriate waits: `page.wait_for_selector()` or `page.wait_for_timeout()` + +## Reference Files + +- **examples/** - Examples showing common patterns: + - `element_discovery.py` - Discovering buttons, links, and inputs on a page + - `static_html_automation.py` - Using file:// URLs for local HTML + - `console_logging.py` - Capturing console logs during automation \ No newline at end of file diff --git a/src/types/protocol.ts b/src/types/protocol.ts index 4d871cb..f0661d2 100644 --- a/src/types/protocol.ts +++ b/src/types/protocol.ts @@ -82,12 +82,24 @@ export interface SkillSegment { * 第 2 层 — 详细描述 + 触发条件(点击展开单个 skill) * 第 3 层 — 完整指令(再次点击展开 —— 触发时作为一条新消息追加到对话中) */ +/** + * Skill 遵循 Anthropic 渐进式披露机制: + * + * 第 1 层 — 名称 + 一句话描述(始终可见,在 skills 面板中) + * 第 2 层 — 详细说明 + 触发条件(点击展开单个 skill) + * 第 3 层 — 完整指令(再次点击展开 —— 触发时注入上下文的 system prompt) + * + * format 字段区分来源: + * 'custom' — 手工编写的 skill(使用 detail/triggers/instructions 自定义) + * 'anthropic' — 从 SKILL.md 解析(instructions 为原始 body) + */ export interface SkillItem { name: string description: string // 第 1 层:一句话描述 detail?: string // 第 2 层:详细说明(功能、输入输出、适用场景) triggers?: string[] // 第 2 层:触发条件(用户说哪些话会触发此 skill) instructions?: string // 第 3 层:注入 LLM 上下文的完整 system prompt + format?: 'custom' | 'anthropic' // 来源格式 } export interface ToolOverviewSegment { diff --git a/src/utils/parseSkill.ts b/src/utils/parseSkill.ts new file mode 100644 index 0000000..056be2c --- /dev/null +++ b/src/utils/parseSkill.ts @@ -0,0 +1,137 @@ +/** + * Anthropic 官方 SKILL.md 解析器 + * + * 解析 SKILL.md 文件的 YAML frontmatter + Markdown body。 + * + * Anthropic Skills 格式规范: + * --- + * name: skill-name + * description: 何时触发 + 功能描述 + * license: ...(可选) + * --- + * 接下来的 Markdown 正文是技能的完整指令。 + * + * 渐进式披露 3 层模型: + * L1 — name + description(始终在上下文中,约 100 词) + * L2 — SKILL.md body(触发 skill 时加载到上下文,建议 <500 行) + * L3 — 引用文件(按需加载,脚本可以不加载到上下文直接执行) + */ + +export interface ParsedSkill { + /** L1: skill 标识符,对应 slash command 名称 */ + name: string + /** L1: 触发条件 + 功能描述(这是 Claude 决定是否触发 skill 的主要依据) */ + description: string + /** 可选许可证信息 */ + license?: string + /** L2: SKILL.md 的 Markdown 正文(完整指令) */ + body: string + /** body 的行数 */ + bodyLineCount: number + /** body 的字符数 */ + bodyCharCount: number + /** YAML 解析错误信息(如果 frontmatter 格式有问题) */ + parseError?: string +} + +/** + * 极简 YAML frontmatter 解析器。 + * + * 只提取顶层的 string 值(name、description、license), + * 不依赖完整的 YAML 库——SKILL.md 的 frontmatter 足够简单。 + */ +function parseYamlFrontmatter(yaml: string): Record { + const result: Record = {} + const lines = yaml.split('\n') + let currentKey: string | null = null + let currentValue = '' + + for (const rawLine of lines) { + const line = rawLine.trimEnd() + + // 跳过空行和注释 + if (line === '' || line.startsWith('#')) continue + + // 检测新 key: value + const keyMatch = line.match(/^(\w[\w_-]*)\s*:\s*(.*)\s*$/) + if (keyMatch) { + // 保存上一个 key + if (currentKey) { + result[currentKey] = currentValue.trimEnd() + } + currentKey = keyMatch[1] + const val = keyMatch[2] + + // 支持双引号和无引号字符串 + if (val.startsWith('"') && val.endsWith('"') && val.length >= 2) { + currentValue = val.slice(1, -1) + '\n' + } else { + currentValue = val + '\n' + } + } else if (currentKey) { + // 续行:缩进内容 + const indentMatch = line.match(/^(\s+)(.*)$/) + if (indentMatch) { + currentValue += indentMatch[2] + '\n' + } + } + } + + // 保存最后一个 key + if (currentKey) { + result[currentKey] = currentValue.trimEnd() + } + + return result +} + +/** + * 解析 SKILL.md 文件内容。 + * + * @param content — SKILL.md 文件的完整文本内容 + * @returns ParsedSkill + */ +export function parseSkillMarkdown(content: string): ParsedSkill { + // 检测 frontmatter 分隔符 + const frontmatterMatch = content.match(/^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)$/) + + if (!frontmatterMatch) { + // 无 frontmatter:整体作为 body,尝试用文件名推断 name + return { + name: 'unknown', + description: '', + body: content.trim(), + bodyLineCount: countLines(content), + bodyCharCount: content.trim().length, + parseError: '未检测到 YAML frontmatter(缺少 --- 分隔符)', + } + } + + const yamlBlock = frontmatterMatch[1] + const body = frontmatterMatch[2].trim() + + const meta = parseYamlFrontmatter(yamlBlock) + const errors: string[] = [] + + if (!meta.name) { + errors.push('缺少必填字段 name') + } + if (!meta.description) { + errors.push('缺少必填字段 description') + } + + return { + name: meta.name || 'unknown', + description: meta.description || '', + license: meta.license, + body, + bodyLineCount: countLines(body), + bodyCharCount: body.length, + parseError: errors.length > 0 ? errors.join(';') : undefined, + } +} + +function countLines(text: string): number { + if (!text.trim()) return 0 + return text.split('\n').length +} diff --git a/src/vite-env.d.ts b/src/vite-env.d.ts new file mode 100644 index 0000000..11f02fe --- /dev/null +++ b/src/vite-env.d.ts @@ -0,0 +1 @@ +///