From 2f86485d9c7250c1c4ca3663fcf23f7cd3c41064 Mon Sep 17 00:00:00 2001 From: claude-code-best Date: Sat, 9 May 2026 17:14:41 +0800 Subject: [PATCH 1/4] =?UTF-8?q?refactor:=20=E7=B2=BE=E7=AE=80=E7=B3=BB?= =?UTF-8?q?=E7=BB=9F=E6=8F=90=E7=A4=BA=E8=AF=8D=20=E2=80=94=20=E5=90=88?= =?UTF-8?q?=E5=B9=B6=E6=B2=9F=E9=80=9A=E9=A3=8E=E6=A0=BC=E6=AE=B5=E8=90=BD?= =?UTF-8?q?=E3=80=81=E7=B2=BE=E7=AE=80=20memory/=E5=B7=A5=E5=85=B7?= =?UTF-8?q?=E6=8F=8F=E8=BF=B0=E3=80=81=E6=88=AA=E6=96=AD=20gitStatus?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 合并 getOutputEfficiencySection + getSimpleToneAndStyleSection 为精简的 Communication style - 精简 auto memory 指令:删除 4 种类型的详细说明和示例,仅保留核心 description - 精简 Agent 工具:删除 forkExamples 和 currentExamples 大段示例 - 精简 Bash 工具:合并 sleep 相关指导 - 精简 EnterPlanMode/ExitPlanMode:删除详细 GOOD/BAD 示例 - gitStatus MAX_STATUS_CHARS 从 2000 降到 1000 - 同步更新 prompt engineering audit 测试断言 Co-Authored-By: glm-5-turbo --- .../tools/AgentTool/__tests__/prompt.test.ts | 9 -- .../src/tools/AgentTool/prompt.ts | 93 +--------------- .../src/tools/BashTool/prompt.ts | 4 +- .../src/tools/EnterPlanModeTool/prompt.ts | 104 +++--------------- .../src/tools/ExitPlanModeTool/prompt.ts | 6 - .../promptEngineeringAudit.runner.ts | 16 +-- src/constants/prompts.ts | 41 +++---- src/context.ts | 2 +- src/memdir/memoryTypes.ts | 103 ++--------------- 9 files changed, 52 insertions(+), 326 deletions(-) diff --git a/packages/builtin-tools/src/tools/AgentTool/__tests__/prompt.test.ts b/packages/builtin-tools/src/tools/AgentTool/__tests__/prompt.test.ts index 8b5c2f73fa..745a1c3b83 100644 --- a/packages/builtin-tools/src/tools/AgentTool/__tests__/prompt.test.ts +++ b/packages/builtin-tools/src/tools/AgentTool/__tests__/prompt.test.ts @@ -57,13 +57,4 @@ describe('prompt.ts fork-related text verification', () => { expect(bgCondition[0]).not.toContain('!forkEnabled') } }) - - test('fork example includes fork: true parameter', () => { - // The first fork example should have fork: true - const forkExampleBlock = promptSource.match( - /name: "ship-audit"[\s\S]*?Under 200 words/, - ) - expect(forkExampleBlock).not.toBeNull() - expect(forkExampleBlock![0]).toContain('fork: true') - }) }) diff --git a/packages/builtin-tools/src/tools/AgentTool/prompt.ts b/packages/builtin-tools/src/tools/AgentTool/prompt.ts index 52c2ea0309..b38b6c3baa 100644 --- a/packages/builtin-tools/src/tools/AgentTool/prompt.ts +++ b/packages/builtin-tools/src/tools/AgentTool/prompt.ts @@ -5,7 +5,6 @@ import { isEnvDefinedFalsy, isEnvTruthy } from 'src/utils/envUtils.js' import { isTeammate } from 'src/utils/teammate.js' import { isInProcessTeammate } from 'src/utils/teammateContext.js' import { FILE_READ_TOOL_NAME } from '../FileReadTool/prompt.js' -import { FILE_WRITE_TOOL_NAME } from '../FileWriteTool/prompt.js' import { GLOB_TOOL_NAME } from '../GlobTool/prompt.js' import { SEND_MESSAGE_TOOL_NAME } from '../SendMessageTool/constants.js' import { AGENT_TOOL_NAME } from './constants.js' @@ -84,11 +83,11 @@ export async function getPrompt( When you need to delegate work that benefits from full conversation context (e.g., continuing a multi-file refactor where the child needs the same system prompt and history), use \`fork: true\`. For most tasks, prefer specialized agent types (Explore, Plan, general-purpose). -**Don't peek.** The tool result includes an \`output_file\` path — do not Read or tail it unless the user explicitly asks for a progress check. You get a completion notification; trust it. Reading the transcript mid-flight pulls the fork's tool noise into your context, which defeats the point of forking. +**Don't peek.** The tool result includes an \`output_file\` path — do not Read or tail it unless the user explicitly asks for a progress check. You get a completion notification; trust it. -**Don't race.** After launching, you know nothing about what the fork found. Never fabricate or predict fork results in any format — not as prose, summary, or structured output. The notification arrives as a user-role message in a later turn; it is never something you write yourself. If the user asks a follow-up before the notification lands, tell them the fork is still running — give status, not a guess. +**Don't race.** After launching, you know nothing about what the fork found. Never fabricate or predict fork results. If the user asks a follow-up before the notification lands, tell them the fork is still running. -**Writing a fork prompt.** Since the fork inherits your context, the prompt is a *directive* — what to do, not what the situation is. Be specific about scope: what's in, what's out, what another agent is handling. Don't re-explain background. +**Writing a fork prompt.** Since the fork inherits your context, the prompt is a *directive* — what to do, not what the situation is. Be specific about scope. Don't re-explain background. ` : '' @@ -97,91 +96,13 @@ When you need to delegate work that benefits from full conversation context (e.g ## Writing the prompt ${forkEnabled ? 'When spawning an agent without `fork: true`, it starts with zero context. ' : ''}Brief the agent like a smart colleague who just walked into the room — it hasn't seen this conversation, doesn't know what you've tried, doesn't understand why this task matters. -- Explain what you're trying to accomplish and why. -- Describe what you've already learned or ruled out. -- Give enough context about the surrounding problem that the agent can make judgment calls rather than just following a narrow instruction. +- Explain what you're trying to accomplish and why, what you've already learned or ruled out, and enough context for the agent to make judgment calls. - If you need a short response, say so ("report in under 200 words"). - Lookups: hand over the exact command. Investigations: hand over the question — prescribed steps become dead weight when the premise is wrong. ${forkEnabled ? 'For non-fork agents, terse' : 'Terse'} command-style prompts produce shallow, generic work. -**Never delegate understanding.** Don't write "based on your findings, fix the bug" or "based on the research, implement it." Those phrases push synthesis onto the agent instead of doing it yourself. Write prompts that prove you understood: include file paths, line numbers, what specifically to change. -` - - const forkExamples = `Example usage: - - -user: "What's left on this branch before we can ship?" -assistant: Forking this \u2014 it's a survey question. I want the punch list, not the git output in my context. -${AGENT_TOOL_NAME}({ - name: "ship-audit", - description: "Branch ship-readiness audit", - fork: true, - prompt: "Audit what's left before this branch can ship. Check: uncommitted changes, commits ahead of main, whether tests exist, whether the GrowthBook gate is wired up, whether CI-relevant files changed. Report a punch list \u2014 done vs. missing. Under 200 words." -}) -assistant: Ship-readiness audit running. - -Turn ends here. The coordinator knows nothing about the findings yet. What follows is a SEPARATE turn \u2014 the notification arrives from outside, as a user-role message. It is not something the coordinator writes. - -[later turn \u2014 notification arrives as user message] -assistant: Audit's back. Three blockers: no tests for the new prompt path, GrowthBook gate wired but not in build_flags.yaml, and one uncommitted file. - - - -user: "so is the gate wired up or not" - -User asks mid-wait. The audit fork was launched to answer exactly this, and it hasn't returned. The coordinator does not have this answer. Give status, not a fabricated result. - -assistant: Still waiting on the audit \u2014 that's one of the things it's checking. Should land shortly. - - - -user: "Can you get a second opinion on whether this migration is safe?" -assistant: I'll ask the code-reviewer agent — it won't see my analysis, so it can give an independent read. - -A subagent_type is specified, so the agent starts fresh. It needs full context in the prompt. The briefing explains what to assess and why. - -${AGENT_TOOL_NAME}({ - name: "migration-review", - description: "Independent migration review", - subagent_type: "code-reviewer", - prompt: "Review migration 0042_user_schema.sql for safety. Context: we're adding a NOT NULL column to a 50M-row table. Existing rows get a backfill default. I want a second opinion on whether the backfill approach is safe under concurrent writes — I've checked locking behavior but want independent verification. Report: is this safe, and if not, what specifically breaks?" -}) - -` - - const currentExamples = `Example usage: - - -"test-runner": use this agent after you are done writing code to run tests -"greeting-responder": use this agent to respond to user greetings with a friendly joke - - - -user: "Please write a function that checks if a number is prime" -assistant: I'm going to use the ${FILE_WRITE_TOOL_NAME} tool to write the following code: - -function isPrime(n) { - if (n <= 1) return false - for (let i = 2; i * i <= n; i++) { - if (n % i === 0) return false - } - return true -} - - -Since a significant piece of code was written and the task was completed, now use the test-runner agent to run the tests - -assistant: Uses the ${AGENT_TOOL_NAME} tool to launch the test-runner agent - - - -user: "Hello" - -Since the user is greeting, use the greeting-responder agent to respond with a friendly joke - -assistant: "I'm going to use the ${AGENT_TOOL_NAME} tool to launch the greeting-responder agent" - +**Never delegate understanding.** Don't write "based on your findings, fix the bug" or "based on the research, implement it." Write prompts that prove you understood: include file paths, line numbers, what specifically to change. ` // When the gate is on, the agent list lives in an agent_listing_delta @@ -273,7 +194,5 @@ Usage notes: ? ` - The name, team_name, and mode parameters are not available in this context — teammates cannot spawn other teammates. Omit them to spawn a subagent.` : '' - }${whenToForkSection}${writingThePromptSection} - -${forkEnabled ? forkExamples : currentExamples}` + }${whenToForkSection}${writingThePromptSection}` } diff --git a/packages/builtin-tools/src/tools/BashTool/prompt.ts b/packages/builtin-tools/src/tools/BashTool/prompt.ts index 185401deca..a5ce8efb65 100644 --- a/packages/builtin-tools/src/tools/BashTool/prompt.ts +++ b/packages/builtin-tools/src/tools/BashTool/prompt.ts @@ -314,15 +314,13 @@ export function getSimplePrompt(): string { 'Use the Monitor tool to stream events from a background process (each stdout line is a notification). For one-shot "wait until done," use Bash with run_in_background instead.', ] : []), - 'If your command is long running and you would like to be notified when it finishes — use `run_in_background`. No sleep needed.', + 'For long-running commands, use `run_in_background` — you will be notified when it completes. Do not poll.', 'Do not retry failing commands in a sleep loop — diagnose the root cause.', - 'If waiting for a background task you started with `run_in_background`, you will be notified when it completes — do not poll.', ...(feature('MONITOR_TOOL') ? [ '`sleep N` as the first command with N ≥ 2 is blocked. If you need a delay (rate limiting, deliberate pacing), keep it under 2 seconds.', ] : [ - 'If you must poll an external process, use a check command (e.g. `gh run view`) rather than sleeping first.', 'If you must sleep, keep the duration short (1-5 seconds) to avoid blocking the user.', ]), ] diff --git a/packages/builtin-tools/src/tools/EnterPlanModeTool/prompt.ts b/packages/builtin-tools/src/tools/EnterPlanModeTool/prompt.ts index 71126cb67c..88a86b4e8c 100644 --- a/packages/builtin-tools/src/tools/EnterPlanModeTool/prompt.ts +++ b/packages/builtin-tools/src/tools/EnterPlanModeTool/prompt.ts @@ -26,33 +26,13 @@ function getEnterPlanModeToolPromptExternal(): string { **Prefer using EnterPlanMode** for implementation tasks unless they're simple. Use it when ANY of these conditions apply: -1. **New Feature Implementation**: Adding meaningful new functionality - - Example: "Add a logout button" - where should it go? What should happen on click? - - Example: "Add form validation" - what rules? What error messages? - -2. **Multiple Valid Approaches**: The task can be solved in several different ways - - Example: "Add caching to the API" - could use Redis, in-memory, file-based, etc. - - Example: "Improve performance" - many optimization strategies possible - -3. **Code Modifications**: Changes that affect existing behavior or structure - - Example: "Update the login flow" - what exactly should change? - - Example: "Refactor this component" - what's the target architecture? - -4. **Architectural Decisions**: The task requires choosing between patterns or technologies - - Example: "Add real-time updates" - WebSockets vs SSE vs polling - - Example: "Implement state management" - Redux vs Context vs custom solution - -5. **Multi-File Changes**: The task will likely touch more than 2-3 files - - Example: "Refactor the authentication system" - - Example: "Add a new API endpoint with tests" - -6. **Unclear Requirements**: You need to explore before understanding the full scope - - Example: "Make the app faster" - need to profile and identify bottlenecks - - Example: "Fix the bug in checkout" - need to investigate root cause - -7. **User Preferences Matter**: The implementation could reasonably go multiple ways - - If you would use ${ASK_USER_QUESTION_TOOL_NAME} to clarify the approach, use EnterPlanMode instead - - Plan mode lets you explore first, then present options with context +1. **New Feature Implementation** — Adding meaningful new functionality where the implementation path isn't obvious +2. **Multiple Valid Approaches** — The task can be solved in several different ways +3. **Code Modifications** — Changes that affect existing behavior or structure, where the user should approve the approach +4. **Architectural Decisions** — The task requires choosing between patterns or technologies +5. **Multi-File Changes** — The task will likely touch more than 2-3 files +6. **Unclear Requirements** — You need to explore before understanding the full scope +7. **User Preferences Matter** — If you would use ${ASK_USER_QUESTION_TOOL_NAME} to clarify the approach, use EnterPlanMode instead ## When NOT to Use This Tool @@ -62,35 +42,7 @@ Only skip EnterPlanMode for simple tasks: - Tasks where the user has given very specific, detailed instructions - Pure research/exploration tasks (use the Agent tool with explore agent instead) -${whatHappens}## Examples - -### GOOD - Use EnterPlanMode: -User: "Add user authentication to the app" -- Requires architectural decisions (session vs JWT, where to store tokens, middleware structure) - -User: "Optimize the database queries" -- Multiple approaches possible, need to profile first, significant impact - -User: "Implement dark mode" -- Architectural decision on theme system, affects many components - -User: "Add a delete button to the user profile" -- Seems simple but involves: where to place it, confirmation dialog, API call, error handling, state updates - -User: "Update the error handling in the API" -- Affects multiple files, user should approve the approach - -### BAD - Don't use EnterPlanMode: -User: "Fix the typo in the README" -- Straightforward, no planning needed - -User: "Add a console.log to debug this function" -- Simple, obvious implementation - -User: "What files handle routing?" -- Research task, not implementation planning - -## Important Notes +${whatHappens}## Important Notes - This tool REQUIRES user approval - they must consent to entering plan mode - If unsure whether to use it, err on the side of planning - it's better to get alignment upfront than to redo work @@ -111,53 +63,23 @@ function getEnterPlanModeToolPromptAnt(): string { Plan mode is valuable when the implementation approach is genuinely unclear. Use it when: -1. **Significant Architectural Ambiguity**: Multiple reasonable approaches exist and the choice meaningfully affects the codebase - - Example: "Add caching to the API" - Redis vs in-memory vs file-based - - Example: "Add real-time updates" - WebSockets vs SSE vs polling - -2. **Unclear Requirements**: You need to explore and clarify before you can make progress - - Example: "Make the app faster" - need to profile and identify bottlenecks - - Example: "Refactor this module" - need to understand what the target architecture should be - -3. **High-Impact Restructuring**: The task will significantly restructure existing code and getting buy-in first reduces risk - - Example: "Redesign the authentication system" - - Example: "Migrate from one state management approach to another" +1. **Significant Architectural Ambiguity** — Multiple reasonable approaches exist and the choice meaningfully affects the codebase +2. **Unclear Requirements** — You need to explore and clarify before you can make progress +3. **High-Impact Restructuring** — The task will significantly restructure existing code and getting buy-in first reduces risk ## When NOT to Use This Tool Skip plan mode when you can reasonably infer the right approach: - The task is straightforward even if it touches multiple files - The user's request is specific enough that the implementation path is clear -- You're adding a feature with an obvious implementation pattern (e.g., adding a button, a new endpoint following existing conventions) +- You're adding a feature with an obvious implementation pattern - Bug fixes where the fix is clear once you understand the bug - Research/exploration tasks (use the Agent tool instead) - The user says something like "can we work on X" or "let's do X" — just get started When in doubt, prefer starting work and using ${ASK_USER_QUESTION_TOOL_NAME} for specific questions over entering a full planning phase. -${whatHappens}## Examples - -### GOOD - Use EnterPlanMode: -User: "Add user authentication to the app" -- Genuinely ambiguous: session vs JWT, where to store tokens, middleware structure - -User: "Redesign the data pipeline" -- Major restructuring where the wrong approach wastes significant effort - -### BAD - Don't use EnterPlanMode: -User: "Add a delete button to the user profile" -- Implementation path is clear; just do it - -User: "Can we work on the search feature?" -- User wants to get started, not plan - -User: "Update the error handling in the API" -- Start working; ask specific questions if needed - -User: "Fix the typo in the README" -- Straightforward, no planning needed - -## Important Notes +${whatHappens}## Important Notes - This tool REQUIRES user approval - they must consent to entering plan mode ` diff --git a/packages/builtin-tools/src/tools/ExitPlanModeTool/prompt.ts b/packages/builtin-tools/src/tools/ExitPlanModeTool/prompt.ts index f7ebc7338f..34406ad762 100644 --- a/packages/builtin-tools/src/tools/ExitPlanModeTool/prompt.ts +++ b/packages/builtin-tools/src/tools/ExitPlanModeTool/prompt.ts @@ -20,10 +20,4 @@ Ensure your plan is complete and unambiguous: - Once your plan is finalized, use THIS tool to request approval **Important:** Do NOT use ${ASK_USER_QUESTION_TOOL_NAME} to ask "Is this plan okay?" or "Should I proceed?" - that's exactly what THIS tool does. ExitPlanMode inherently requests user approval of your plan. - -## Examples - -1. Initial task: "Search for and understand the implementation of vim mode in the codebase" - Do not use the exit plan mode tool because you are not planning the implementation steps of a task. -2. Initial task: "Help me implement yank mode for vim" - Use the exit plan mode tool after you have finished planning the implementation steps of the task. -3. Initial task: "Add a new feature to handle user authentication" - If unsure about auth method (OAuth, JWT, etc.), use ${ASK_USER_QUESTION_TOOL_NAME} first, then use exit plan mode tool after clarifying the approach. ` diff --git a/src/constants/promptEngineeringAudit.runner.ts b/src/constants/promptEngineeringAudit.runner.ts index b8f3f787a2..4af7e7f52a 100644 --- a/src/constants/promptEngineeringAudit.runner.ts +++ b/src/constants/promptEngineeringAudit.runner.ts @@ -424,8 +424,8 @@ describe('Opus 4.7 Prompt Engineering Audit', () => { test('includes anti-postamble guidance', async () => { const prompt = await getFullPrompt() - expect(prompt).toContain('Do not restate') - expect(prompt).toContain('the user can read the diff') + expect(prompt).toContain("don't restate") + expect(prompt).toContain('report the outcome') }) test('discourages offering unchosen approach', async () => { @@ -505,19 +505,18 @@ describe('Opus 4.7 Prompt Engineering Audit', () => { describe('#11 Formatting discipline', () => { test('prompt contains prose-first guidance (existing)', async () => { const prompt = await getFullPrompt() - expect(prompt).toContain('direct answer in prose') + expect(prompt).toContain('prose paragraphs') }) test('discourages over-formatting', async () => { const prompt = await getFullPrompt() expect(prompt).toContain('over-formatting') - expect(prompt).toContain('natural language') + expect(prompt).toContain('simple answers') }) test('bullet points must be 1-2 sentences, not fragments', async () => { const prompt = await getFullPrompt() expect(prompt).toContain('1-2 sentences') - expect(prompt).toContain('not sentence fragments') }) }) @@ -613,7 +612,8 @@ describe('Opus 4.7 Prompt Engineering Audit', () => { describe('#15 Conversation end respect', () => { test('discourages "anything else?" appendages', async () => { const prompt = await getFullPrompt() - expect(prompt).toContain('the user will ask if they need more') + expect(prompt).toContain('Do not append') + expect(prompt).toContain('Is there anything else?') }) }) @@ -656,7 +656,7 @@ describe('Opus 4.7 Prompt Engineering Audit', () => { test('no-machinery-narration: describe in user terms', async () => { const prompt = await getFullPrompt() expect(prompt).toContain("Don't narrate internal machinery") - expect(prompt).toContain('Describe the action in user terms') + expect(prompt).toContain('describe the action in user terms') }) test('tool_discovery: search before saying unavailable', async () => { @@ -669,7 +669,7 @@ describe('Opus 4.7 Prompt Engineering Audit', () => { test('false-claims mitigation: report outcomes faithfully', async () => { const prompt = await getFullPrompt() - expect(prompt).toContain('Report outcomes faithfully') + expect(prompt).toContain('report the outcome') }) test('CYBER_RISK_INSTRUCTION: allows security testing', async () => { diff --git a/src/constants/prompts.ts b/src/constants/prompts.ts index 80194feadc..cca0a4264f 100644 --- a/src/constants/prompts.ts +++ b/src/constants/prompts.ts @@ -380,41 +380,29 @@ function getSessionSpecificGuidanceSection( // (upstream ant-only version). The short "Output efficiency" fallback was a // placeholder for external users; the detailed version produces better UX. function getOutputEfficiencySection(): string { - return `# Communicating with the user -When sending user-facing text, you're writing for a person, not logging to a console. Assume users can't see most tool calls or thinking - only your text output. Before your first tool call, briefly state what you're about to do. While working, give short updates at key moments: when you find something load-bearing (a bug, a root cause), when changing direction, when you've made progress without an update. + return `# Communication style +Write for a person, not a console. Assume users can't see most tool calls or thinking — only your text output. Before your first tool call, briefly state what you're about to do. While working, give short updates at key moments: when you find something load-bearing, when changing direction, or when you've made progress without an update. -Don't narrate internal machinery. Don't say "let me call Grep", "I'll use SearchExtraTools", "let me snip context", or similar tool-name preambles. Describe the action in user terms ("let me search for the handler", "let me check the current state"), not in terms of which tool you're about to invoke. Don't justify why you're searching — just search. Don't say "Let me search for that file" before a Grep call; the user sees the tool call and doesn't need a preview. +Don't narrate internal machinery. Don't say "let me call Grep" or "I'll use SearchExtraTools" — describe the action in user terms, not in tool names. Don't justify why you're searching — just search. -When making updates, assume the person has stepped away and lost the thread. They don't know codenames, abbreviations, or shorthand you created along the way, and didn't track your process. Write so they can pick back up cold: use complete, grammatically correct sentences without unexplained jargon. Expand technical terms. Err on the side of more explanation. Attend to cues about the user's level of expertise; if they seem like an expert, tilt a bit more concise, while if they seem like they're new, be more explanatory. +When making updates, assume the person has stepped away and lost the thread. Write so they can pick back up cold: complete sentences, no unexplained jargon, expand technical terms. Err on the side of more explanation; attend to the user's expertise level. -Write user-facing text in flowing prose while eschewing fragments, excessive em dashes, symbols and notation, or similarly hard-to-parse content. Only use tables when appropriate; for example to hold short enumerable facts (file names, line numbers, pass/fail), or communicate quantitative data. Don't pack explanatory reasoning into table cells -- explain before or after. Avoid semantic backtracking: structure each sentence so a person can read it linearly, building up meaning without having to re-parse what came before. +Write in flowing prose. Avoid over-formatting: simple answers get prose paragraphs, not headers and bullet lists. Only use bullet points for genuinely independent items that are harder to follow as prose — and each bullet should be at least 1-2 sentences. -What's most important is the reader understanding your output without mental overhead or follow-ups, not how terse you are. If the user has to reread a summary or ask you to explain, that will more than eat up the time savings from a shorter first read. Match responses to the task: a simple question gets a direct answer in prose, not headers and numbered sections. While keeping communication clear, also keep it concise, direct, and free of fluff. Avoid filler or stating the obvious. Get straight to the point. Don't overemphasize unimportant trivia about your process or use superlatives to oversell small wins or losses. Use inverted pyramid when appropriate (leading with the action), and if something about your reasoning or process is so important that it absolutely must be in user-facing text, save it for the end. +After creating or editing a file, state what you did in one sentence — don't restate the contents or walk through changes. After running a command, report the outcome — don't re-explain what it does. Don't offer unchosen approaches unless asked. -Avoid over-formatting. For simple answers, use prose paragraphs, not headers and bullet lists. Inside explanatory text, list items inline in natural language: "the main causes are X, Y, and Z" — not a bulleted list. Only reach for bullet points when the response genuinely has multiple independent items that would be harder to follow as prose. When you do use bullet points, each bullet should be at least 1-2 sentences — not sentence fragments or single words. +When the task is done, report the result. Do not append "Is there anything else?" or "Let me know if you need anything else." -After creating or editing a file, state what you did in one sentence. Do not restate the file's contents or walk through every change — the user can read the diff. After running a command, report the outcome; do not re-explain what the command does. Do not offer the unchosen approach ("I could have also done X") unless the user asks — select and produce, don't narrate the decision. +If you need to ask the user a question, limit to one question per response. Address the request first, then ask. -When the task is done, report the result. Do not append "Is there anything else?" or "Let me know if you need anything else" — the user will ask if they need more. +If asked to explain something, start with a one-sentence high-level summary. If the user wants more depth, they'll ask. -If you need to ask the user a question, limit to one question per response. Address the request as best you can first, then ask the single most important clarifying question. +Only use emojis if the user explicitly requests it. +Avoid making negative assumptions about the user's abilities or judgment. When pushing back, do so constructively — explain the concern and suggest an alternative. +When referencing code, include file_path:line_number. For GitHub issues/PRs, use owner/repo#123 format. +Do not use a colon before tool calls — "Let me read the file:" should be "Let me read the file." with a period. -If asked to explain something, start with a one-sentence high-level summary before diving into details. If the user wants more depth, they'll ask. - -These user-facing text instructions do not apply to code or tool calls.` -} - -function getSimpleToneAndStyleSection(): string { - const items = [ - `Only use emojis if the user explicitly requests it. Avoid using emojis in all communication unless asked.`, - // Warm tone (#12): constructive pushback, no condescension - `Avoid making negative assumptions about the user's abilities or judgment. When pushing back on an approach, do so constructively — explain the concern and suggest an alternative, rather than just saying "that's wrong."`, - `When referencing specific functions or pieces of code include the pattern file_path:line_number to allow the user to easily navigate to the source code location.`, - `When referencing GitHub issues or pull requests, use the owner/repo#123 format (e.g. anthropics/claude-code#100) so they render as clickable links.`, - `Do not use a colon before tool calls. Your tool calls may not be shown directly in the output, so text like "Let me read the file:" followed by a read tool call should just be "Let me read the file." with a period.`, - ].filter(item => item !== null) - - return [`# Tone and style`, ...prependBullets(items)].join(`\n`) +These instructions do not apply to code or tool calls.` } export async function getSystemPrompt( @@ -532,7 +520,6 @@ ${CYBER_RISK_INSTRUCTION}`, : null, getActionsSection(), getUsingYourToolsSection(enabledTools), - getSimpleToneAndStyleSection(), getOutputEfficiencySection(), // === BOUNDARY MARKER - DO NOT MOVE OR REMOVE === ...(shouldUseGlobalCacheScope() ? [SYSTEM_PROMPT_DYNAMIC_BOUNDARY] : []), diff --git a/src/context.ts b/src/context.ts index 423414d13d..d1ffb8f04a 100644 --- a/src/context.ts +++ b/src/context.ts @@ -17,7 +17,7 @@ import { getBranch, getDefaultBranch, getIsGit, gitExe } from './utils/git.js' import { shouldIncludeGitInstructions } from './utils/gitSettings.js' import { logError } from './utils/log.js' -const MAX_STATUS_CHARS = 2000 +const MAX_STATUS_CHARS = 1000 // System prompt injection for cache breaking (ant-only, ephemeral debugging state) let systemPromptInjection: string | null = null diff --git a/src/memdir/memoryTypes.ts b/src/memdir/memoryTypes.ts index 99b44830ac..11b132909e 100644 --- a/src/memdir/memoryTypes.ts +++ b/src/memdir/memoryTypes.ts @@ -43,63 +43,22 @@ export const TYPES_SECTION_COMBINED: readonly string[] = [ '', ' user', ' always private', - " Contain information about the user's role, goals, responsibilities, and knowledge. Great user memories help you tailor your future behavior to the user's preferences and perspective. Your goal in reading and writing these memories is to build up an understanding of who the user is and how you can be most helpful to them specifically. For example, you should collaborate with a senior software engineer differently than a student who is coding for the very first time. Keep in mind, that the aim here is to be helpful to the user. Avoid writing memories about the user that could be viewed as a negative judgement or that are not relevant to the work you're trying to accomplish together.", - " When you learn any details about the user's role, preferences, responsibilities, or knowledge", - " When your work should be informed by the user's profile or perspective. For example, if the user is asking you to explain a part of the code, you should answer that question in a way that is tailored to the specific details that they will find most valuable or that helps them build their mental model in relation to domain knowledge they already have.", - ' ', - " user: I'm a data scientist investigating what logging we have in place", - ' assistant: [saves private user memory: user is a data scientist, currently focused on observability/logging]', - '', - " user: I've been writing Go for ten years but this is my first time touching the React side of this repo", - " assistant: [saves private user memory: deep Go expertise, new to React and this project's frontend — frame frontend explanations in terms of backend analogues]", - ' ', + " The user's role, goals, preferences, responsibilities, and knowledge. Use these to tailor your behavior to the user.", '', '', ' feedback', ' default to private. Save as team only when the guidance is clearly a project-wide convention that every contributor should follow (e.g., a testing policy, a build invariant), not a personal style preference.', - " Guidance the user has given you about how to approach work — both what to avoid and what to keep doing. These are a very important type of memory to read and write as they allow you to remain coherent and responsive to the way you should approach work in the project. Record from failure AND success: if you only save corrections, you will avoid past mistakes but drift away from approaches the user has already validated, and may grow overly cautious. Before saving a private feedback memory, check that it doesn't contradict a team feedback memory — if it does, either don't save it or note the override explicitly.", - ' Any time the user corrects your approach ("no not that", "don\'t", "stop doing X") OR confirms a non-obvious approach worked ("yes exactly", "perfect, keep doing that", accepting an unusual choice without pushback). Corrections are easy to notice; confirmations are quieter — watch for them. In both cases, save what is applicable to future conversations, especially if surprising or not obvious from the code. Include *why* so you can judge edge cases later.', - ' Let these memories guide your behavior so that the user and other users in the project do not need to offer the same guidance twice.', - ' Lead with the rule itself, then a **Why:** line (the reason the user gave — often a past incident or strong preference) and a **How to apply:** line (when/where this guidance kicks in). Knowing *why* lets you judge edge cases instead of blindly following the rule.', - ' ', - " user: don't mock the database in these tests — we got burned last quarter when mocked tests passed but the prod migration failed", - ' assistant: [saves team feedback memory: integration tests must hit a real database, not mocks. Reason: prior incident where mock/prod divergence masked a broken migration. Team scope: this is a project testing policy, not a personal preference]', - '', - ' user: stop summarizing what you just did at the end of every response, I can read the diff', - " assistant: [saves private feedback memory: this user wants terse responses with no trailing summaries. Private because it's a communication preference, not a project convention]", - '', - " user: yeah the single bundled PR was the right call here, splitting this one would've just been churn", - ' assistant: [saves private feedback memory: for refactors in this area, user prefers one bundled PR over many small ones. Confirmed after I chose this approach — a validated judgment call, not a correction]', - ' ', + ' Guidance from the user about how to approach work — what to avoid and what to keep doing. Record from failure AND success. Include *why* so you can judge edge cases later. Structure content as: rule/fact, then **Why:** and **How to apply:** lines.', '', '', ' project', ' private or team, but strongly bias toward team', - ' Information that you learn about ongoing work, goals, initiatives, bugs, or incidents within the project that is not otherwise derivable from the code or git history. Project memories help you understand the broader context and motivation behind the work users are working on within this working directory.', - ' When you learn who is doing what, why, or by when. These states change relatively quickly so try to keep your understanding of this up to date. Always convert relative dates in user messages to absolute dates when saving (e.g., "Thursday" → "2026-03-05"), so the memory remains interpretable after time passes.', - " Use these memories to more fully understand the details and nuance behind the user's request, anticipate coordination issues across users, make better informed suggestions.", - ' Lead with the fact or decision, then a **Why:** line (the motivation — often a constraint, deadline, or stakeholder ask) and a **How to apply:** line (how this should shape your suggestions). Project memories decay fast, so the why helps future-you judge whether the memory is still load-bearing.', - ' ', - " user: we're freezing all non-critical merges after Thursday — mobile team is cutting a release branch", - ' assistant: [saves team project memory: merge freeze begins 2026-03-05 for mobile release cut. Flag any non-critical PR work scheduled after that date]', - '', - " user: the reason we're ripping out the old auth middleware is that legal flagged it for storing session tokens in a way that doesn't meet the new compliance requirements", - ' assistant: [saves team project memory: auth middleware rewrite is driven by legal/compliance requirements around session token storage, not tech-debt cleanup — scope decisions should favor compliance over ergonomics]', - ' ', + ' Information about ongoing work, goals, initiatives, bugs, or incidents not derivable from code or git history. Convert relative dates to absolute dates when saving (e.g., "Thursday" → "2026-03-05").', '', '', ' reference', ' usually team', - ' Stores pointers to where information can be found in external systems. These memories allow you to remember where to look to find up-to-date information outside of the project directory.', - ' When you learn about resources in external systems and their purpose. For example, that bugs are tracked in a specific project in Linear or that feedback can be found in a specific Slack channel.', - ' When the user references an external system or information that may be in an external system.', - ' ', - ' user: check the Linear project "INGEST" if you want context on these tickets, that\'s where we track all pipeline bugs', - ' assistant: [saves team reference memory: pipeline bugs are tracked in Linear project "INGEST"]', - '', - " user: the Grafana board at grafana.internal/d/api-latency is what oncall watches — if you're touching request handling, that's the thing that'll page someone", - ' assistant: [saves team reference memory: grafana.internal/d/api-latency is the oncall latency dashboard — check it when editing request-path code]', - ' ', + ' Pointers to external systems where information can be found (e.g., Linear projects, Slack channels, Grafana dashboards).', '', '', '', @@ -107,71 +66,27 @@ export const TYPES_SECTION_COMBINED: readonly string[] = [ /** * `## Types of memory` section for INDIVIDUAL-ONLY mode (single directory). - * No tags. Examples use plain `[saves X memory: …]`. Prose that - * only makes sense with a private/team split is reworded. + * No tags. Prose that only makes sense with a private/team split is reworded. */ export const TYPES_SECTION_INDIVIDUAL: readonly string[] = [ '## Types of memory', '', - 'There are several discrete types of memory that you can store in your memory system:', - '', '', '', ' user', - " Contain information about the user's role, goals, responsibilities, and knowledge. Great user memories help you tailor your future behavior to the user's preferences and perspective. Your goal in reading and writing these memories is to build up an understanding of who the user is and how you can be most helpful to them specifically. For example, you should collaborate with a senior software engineer differently than a student who is coding for the very first time. Keep in mind, that the aim here is to be helpful to the user. Avoid writing memories about the user that could be viewed as a negative judgement or that are not relevant to the work you're trying to accomplish together.", - " When you learn any details about the user's role, preferences, responsibilities, or knowledge", - " When your work should be informed by the user's profile or perspective. For example, if the user is asking you to explain a part of the code, you should answer that question in a way that is tailored to the specific details that they will find most valuable or that helps them build their mental model in relation to domain knowledge they already have.", - ' ', - " user: I'm a data scientist investigating what logging we have in place", - ' assistant: [saves user memory: user is a data scientist, currently focused on observability/logging]', - '', - " user: I've been writing Go for ten years but this is my first time touching the React side of this repo", - " assistant: [saves user memory: deep Go expertise, new to React and this project's frontend — frame frontend explanations in terms of backend analogues]", - ' ', + " The user's role, goals, preferences, responsibilities, and knowledge. Use these to tailor your behavior to the user.", '', '', ' feedback', - ' Guidance the user has given you about how to approach work — both what to avoid and what to keep doing. These are a very important type of memory to read and write as they allow you to remain coherent and responsive to the way you should approach work in the project. Record from failure AND success: if you only save corrections, you will avoid past mistakes but drift away from approaches the user has already validated, and may grow overly cautious.', - ' Any time the user corrects your approach ("no not that", "don\'t", "stop doing X") OR confirms a non-obvious approach worked ("yes exactly", "perfect, keep doing that", accepting an unusual choice without pushback). Corrections are easy to notice; confirmations are quieter — watch for them. In both cases, save what is applicable to future conversations, especially if surprising or not obvious from the code. Include *why* so you can judge edge cases later.', - ' Let these memories guide your behavior so that the user does not need to offer the same guidance twice.', - ' Lead with the rule itself, then a **Why:** line (the reason the user gave — often a past incident or strong preference) and a **How to apply:** line (when/where this guidance kicks in). Knowing *why* lets you judge edge cases instead of blindly following the rule.', - ' ', - " user: don't mock the database in these tests — we got burned last quarter when mocked tests passed but the prod migration failed", - ' assistant: [saves feedback memory: integration tests must hit a real database, not mocks. Reason: prior incident where mock/prod divergence masked a broken migration]', - '', - ' user: stop summarizing what you just did at the end of every response, I can read the diff', - ' assistant: [saves feedback memory: this user wants terse responses with no trailing summaries]', - '', - " user: yeah the single bundled PR was the right call here, splitting this one would've just been churn", - ' assistant: [saves feedback memory: for refactors in this area, user prefers one bundled PR over many small ones. Confirmed after I chose this approach — a validated judgment call, not a correction]', - ' ', + ' Guidance from the user about how to approach work — what to avoid and what to keep doing. Record from failure AND success. Include *why* so you can judge edge cases later. Structure content as: rule/fact, then **Why:** and **How to apply:** lines.', '', '', ' project', - ' Information that you learn about ongoing work, goals, initiatives, bugs, or incidents within the project that is not otherwise derivable from the code or git history. Project memories help you understand the broader context and motivation behind the work the user is doing within this working directory.', - ' When you learn who is doing what, why, or by when. These states change relatively quickly so try to keep your understanding of this up to date. Always convert relative dates in user messages to absolute dates when saving (e.g., "Thursday" → "2026-03-05"), so the memory remains interpretable after time passes.', - " Use these memories to more fully understand the details and nuance behind the user's request and make better informed suggestions.", - ' Lead with the fact or decision, then a **Why:** line (the motivation — often a constraint, deadline, or stakeholder ask) and a **How to apply:** line (how this should shape your suggestions). Project memories decay fast, so the why helps future-you judge whether the memory is still load-bearing.', - ' ', - " user: we're freezing all non-critical merges after Thursday — mobile team is cutting a release branch", - ' assistant: [saves project memory: merge freeze begins 2026-03-05 for mobile release cut. Flag any non-critical PR work scheduled after that date]', - '', - " user: the reason we're ripping out the old auth middleware is that legal flagged it for storing session tokens in a way that doesn't meet the new compliance requirements", - ' assistant: [saves project memory: auth middleware rewrite is driven by legal/compliance requirements around session token storage, not tech-debt cleanup — scope decisions should favor compliance over ergonomics]', - ' ', + ' Information about ongoing work, goals, initiatives, bugs, or incidents not derivable from code or git history. Convert relative dates to absolute dates when saving (e.g., "Thursday" → "2026-03-05").', '', '', ' reference', - ' Stores pointers to where information can be found in external systems. These memories allow you to remember where to look to find up-to-date information outside of the project directory.', - ' When you learn about resources in external systems and their purpose. For example, that bugs are tracked in a specific project in Linear or that feedback can be found in a specific Slack channel.', - ' When the user references an external system or information that may be in an external system.', - ' ', - ' user: check the Linear project "INGEST" if you want context on these tickets, that\'s where we track all pipeline bugs', - ' assistant: [saves reference memory: pipeline bugs are tracked in Linear project "INGEST"]', - '', - " user: the Grafana board at grafana.internal/d/api-latency is what oncall watches — if you're touching request handling, that's the thing that'll page someone", - ' assistant: [saves reference memory: grafana.internal/d/api-latency is the oncall latency dashboard — check it when editing request-path code]', - ' ', + ' Pointers to external systems where information can be found (e.g., Linear projects, Slack channels, Grafana dashboards).', '', '', '', From 84f12f34bdacd484eb21d0e60903675e53233592 Mon Sep 17 00:00:00 2001 From: claude-code-best Date: Sat, 9 May 2026 17:50:15 +0800 Subject: [PATCH 2/4] =?UTF-8?q?fix:=20=E6=8F=90=E5=8D=87=20CLAUDE.md=20?= =?UTF-8?q?=E6=8C=87=E4=BB=A4=E6=9D=83=E9=87=8D=20=E2=80=94=20=E7=8B=AC?= =?UTF-8?q?=E7=AB=8B=20project-instructions=20+=20deferred=20tools=20?= =?UTF-8?q?=E4=BD=8D=E7=BD=AE=E8=B0=83=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - prependUserContext: 将 claudeMd 从通用 提取为独立的 用户消息,不带免责声明,置于消息列表最前面 - queryModel: deferred tools 消息从 prepend 改为 append,避免抢占 project-instructions 的最高权重位置;标签规范化为 Co-Authored-By: glm-5-turbo --- src/services/api/claude.ts | 6 ++++-- src/utils/api.ts | 39 +++++++++++++++++++++++++++----------- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/src/services/api/claude.ts b/src/services/api/claude.ts index aba74a0f93..9b82f428ed 100644 --- a/src/services/api/claude.ts +++ b/src/services/api/claude.ts @@ -1407,12 +1407,14 @@ async function* queryModel( .sort() .join('\n') if (deferredToolList) { + // Append to the end of the messages array (not prepend) so it + // never抢占 (CLAUDE.md) at the front. messagesForAPI = [ + ...messagesForAPI, createUserMessage({ - content: `\n${deferredToolList}\n\nTo invoke any tool listed above, use ExecuteExtraTool with {"tool_name": "", "params": {...}}. This is the ONLY way to call deferred tools — do not read source code or analyze implementation, just call ExecuteExtraTool directly.`, + content: `\n\n${deferredToolList}\n\nTo invoke any tool listed above, use ExecuteExtraTool with {"tool_name": "", "params": {...}}. This is the ONLY way to call deferred tools — do not read source code or analyze implementation, just call ExecuteExtraTool directly.\n`, isMeta: true, }), - ...messagesForAPI, ] } } diff --git a/src/utils/api.ts b/src/utils/api.ts index 5ee820c8a7..12c11ecdf3 100644 --- a/src/utils/api.ts +++ b/src/utils/api.ts @@ -452,19 +452,36 @@ export function prependUserContext( return messages } - return [ - createUserMessage({ - content: `\nAs you answer the user's questions, you can use the following context:\n${Object.entries( - context, - ) - .map(([key, value]) => `# ${key}\n${value}`) - .join('\n')} + // Extract claudeMd as a dedicated high-weight user message so it isn't + // buried inside the generic with the "may or may not be + // relevant" disclaimer, which would degrade its instructional weight. + const { claudeMd, ...rest } = context + const result: Message[] = [] + + if (claudeMd) { + result.push( + createUserMessage({ + content: `\n${claudeMd}\n\n`, + isMeta: true, + }), + ) + } + + const restEntries = Object.entries(rest) + if (restEntries.length > 0) { + result.push( + createUserMessage({ + content: `\nAs you answer the user's questions, you can use the following context:\n${restEntries + .map(([key, value]) => `# ${key}\n${value}`) + .join('\n')} IMPORTANT: this context may or may not be relevant to your tasks. You should not respond to this context unless it is highly relevant to your task.\n\n`, - isMeta: true, - }), - ...messages, - ] + isMeta: true, + }), + ) + } + + return [...result, ...messages] } /** From 0707284939e4868a9ec962cebed66ed32f7c2efa Mon Sep 17 00:00:00 2001 From: claude-code-best Date: Sat, 9 May 2026 17:50:55 +0800 Subject: [PATCH 3/4] =?UTF-8?q?docs:=20=E6=9B=B4=E6=96=B0=20CLAUDE.md=20?= =?UTF-8?q?=E2=80=94=20=E5=90=8C=E6=AD=A5=20workspace=20=E5=8C=85=E6=95=B0?= =?UTF-8?q?=E9=87=8F=E3=80=81feature=20flags=E3=80=81=E5=B7=A5=E5=85=B7?= =?UTF-8?q?=E7=9B=AE=E5=BD=95=E7=AD=89=E5=8F=98=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: glm-5-turbo --- CLAUDE.md | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 15554d0cfc..4dfc532e2f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -82,11 +82,11 @@ bun run docs:dev - **Vendor 路径解析**: 构建后 chunk 文件位于 `dist/` 或 `dist/chunks/` 下,vendor 二进制在 `dist/vendor/`。`src/utils/ripgrep.ts` 和 `packages/audio-capture-napi/src/index.ts` 均通过 `import.meta.url` 路径中 `lastIndexOf('dist')` 定位 dist 根目录,再拼接 `vendor/` 子路径,确保不同构建产物层级下路径一致。 - **Dev mode**: `scripts/dev.ts` 通过 Bun `-d` flag 注入 `MACRO.*` defines,运行 `src/entrypoints/cli.tsx`。默认启用全部 feature。 - **Module system**: ESM (`"type": "module"`), TSX with `react-jsx` transform. -- **Monorepo**: Bun workspaces — 15 个 workspace packages + 若干辅助目录 in `packages/` resolved via `workspace:*`。 +- **Monorepo**: Bun workspaces — 17 个 workspace packages + 若干辅助目录 in `packages/` resolved via `workspace:*`。 - **Lint/Format**: Biome (`biome.json`)。覆盖 `src/`、`scripts/`、`packages/` 全项目(含 `packages/@ant/`)。`bun run lint` / `bun run lint:fix` / `bun run format` / `bun run check` / `bun run check:fix`。42 条规则因 decompiled 代码被关闭,仅保留 `recommended` 基线。 - **Pre-commit**: husky + lint-staged。提交时自动对暂存文件执行 `biome check --fix`(TS/JS)和 `biome format --write`(JSON)。 - **CI Lint**: `ci.yml` 在依赖安装后、类型检查前执行 `bunx biome ci .`,lint 或格式化不达标则 CI 失败。 -- **Defines**: 集中管理在 `scripts/defines.ts`。当前版本 `2.1.888`。 +- **Defines**: 集中管理在 `scripts/defines.ts`。当前版本 `2.2.1`。 - **CI**: GitHub Actions — `ci.yml`(lint + 构建 + 测试)、`release-rcs.yml`(RCS 发布)、`update-contributors.yml`(自动更新贡献者)。 ### Entry & Bootstrap @@ -104,7 +104,7 @@ bun run docs:dev - `environment-runner` / `self-hosted-runner` — BYOC runner - `--tmux` + `--worktree` 组合 - 默认路径:加载 `main.tsx` 启动完整 CLI -2. **`src/main.tsx`** (~6981 行) — Commander.js CLI definition。注册大量 subcommands:`mcp` (serve/add/remove/list...)、`server`、`ssh`、`open`、`auth`、`plugin`、`agents`、`auto-mode`、`doctor`、`update` 等。主 `.action()` 处理器负责权限、MCP、会话恢复、REPL/Headless 模式分发。 +2. **`src/main.tsx`** (~5674 行) — Commander.js CLI definition。注册大量 subcommands:`mcp` (serve/add/remove/list...)、`server`、`ssh`、`open`、`auth`、`plugin`、`agents`、`auto-mode`、`doctor`、`update` 等。主 `.action()` 处理器负责权限、MCP、会话恢复、REPL/Headless 模式分发。 3. **`src/entrypoints/init.ts`** — One-time initialization (telemetry, config, trust dialog)。 ### Core Loop @@ -123,17 +123,18 @@ bun run docs:dev - **`src/Tool.ts`** — Tool interface definition (`Tool` type) and utilities (`findToolByName`, `toolMatchesName`). - **`src/tools.ts`** — Tool registry. Assembles the tool list; tools are imported from `@claude-code-best/builtin-tools` package. Some tools are conditionally loaded via `feature()` flags or `process.env.USER_TYPE`. -- **`src/constants/tools.ts`** — `CORE_TOOLS` 白名单常量(约 29 个核心工具名),用于 `isDeferredTool` 白名单制判定。 -- **`packages/builtin-tools/src/tools/`** — 59 个子目录(含 shared/testing 等工具目录),通过 `@claude-code-best/builtin-tools` 包导出。主要分类: +- **`src/constants/tools.ts`** — `CORE_TOOLS` 白名单常量(38 个核心工具名),用于 `isDeferredTool` 白名单制判定。 +- **`packages/builtin-tools/src/tools/`** — 60 个工具目录(含 shared/testing 等工具目录),通过 `@claude-code-best/builtin-tools` 包导出。主要分类: - **文件操作**: FileEditTool, FileReadTool, FileWriteTool, GlobTool, GrepTool - **Shell/执行**: BashTool, PowerShellTool, REPLTool - **Agent 系统**: AgentTool, TaskCreateTool, TaskUpdateTool, TaskListTool, TaskGetTool - **规划**: EnterPlanModeTool, ExitPlanModeV2Tool, VerifyPlanExecutionTool - **Web/MCP**: WebFetchTool, WebSearchTool, MCPTool, McpAuthTool - **调度**: CronCreateTool, CronDeleteTool, CronListTool + - **工具发现**: SearchExtraToolsTool, ExecuteExtraTool, SyntheticOutput(CORE_TOOLS,用于延迟工具按需加载) - **其他**: LSPTool, ConfigTool, SkillTool, EnterWorktreeTool, ExitWorktreeTool 等 - **`src/tools/shared/`** / **`packages/builtin-tools/src/tools/shared/`** — Tool 共享工具函数。 -- **`src/services/searchExtraTools/`** — TF-IDF 工具索引模块(`toolIndex.ts`),为延迟工具提供语义搜索能力。复用 `localSearch.ts` 的 TF-IDF 算法函数(`computeWeightedTf`、`computeIdf`、`cosineSimilarity` 已导出)。修改这些函数时需同步检查工具索引测试。`SearchExtraToolsTool.mapToolResultToToolResultBlockParam` 新增可选第三个参数 `context?: { mainLoopModel?: string }`,用于判断当前模型是否支持 `tool_reference`。不支持时回退到文本输出,引导模型使用 ExecuteTool。调用方(`src/services/api/claude.ts` 的 tool_result 处理逻辑)需传入 context 参数。`prefetch.ts` 的 `extractQueryFromMessages` 复用了 `skillSearch/prefetch.ts` 的同名导出函数,修改 skill prefetch 的该函数时需同步检查工具预取行为。工具预取使用独立的 `discoveredToolsThisSession` Set,与 skill prefetch 的去重集合互不影响。 +- **`src/services/searchExtraTools/`** — TF-IDF 工具索引模块(`toolIndex.ts`),为延迟工具提供语义搜索能力。复用 `localSearch.ts` 的 TF-IDF 算法函数(`computeWeightedTf`、`computeIdf`、`cosineSimilarity` 已导出)。修改这些函数时需同步检查工具索引测试。`prefetch.ts` 的 `extractQueryFromMessages` 复用了 `skillSearch/prefetch.ts` 的同名导出函数,修改 skill prefetch 的该函数时需同步检查工具预取行为。工具预取使用独立的 `discoveredToolsThisSession` Set,与 skill prefetch 的去重集合互不影响。 ### UI Layer (Ink) @@ -168,18 +169,16 @@ bun run docs:dev | `packages/builtin-tools/` | 内置工具集(60 个 tool 实现,通过 `@claude-code-best/builtin-tools` 导出) | | `packages/agent-tools/` | Agent 工具集 | | `packages/acp-link/` | ACP 代理服务器(WebSocket → ACP agent 桥接) | -| `packages/cc-knowledge/` | Claude Code 知识库(非 workspace 包) | -| `packages/langfuse-dashboard/` | Langfuse 可观测性面板(非 workspace 包) | | `packages/mcp-client/` | MCP 客户端库 | -| `packages/mcp-server/` | MCP 服务端库(非 workspace 包) | | `packages/remote-control-server/` | 自托管 Remote Control Server(Docker 部署,含 Web UI)— Web UI 已重构为 React + Vite + Radix UI,支持 ACP agent 接入 | -| `packages/swarm/` | Swarm 解耦模块(非 workspace 包) | -| `packages/shell/` | Shell 抽象(非 workspace 包) | | `packages/audio-capture-napi/` | 原生音频捕获(已恢复) | | `packages/color-diff-napi/` | 颜色差异计算(完整实现,11 tests) | | `packages/image-processor-napi/` | 图像处理(已恢复) | | `packages/modifiers-napi/` | 键盘修饰键检测(macOS FFI 实现) | | `packages/url-handler-napi/` | URL scheme 处理(环境变量 + CLI 参数读取) | +| `packages/weixin/` | 微信集成(非 workspace 包) | + +辅助目录(无 package.json,非 workspace 包): `langfuse-dashboard`(Langfuse 面板)、`shared-web-ui`(共享 Web UI 组件)、`highlight-code`(代码高亮)、`claude-pencil`(编辑器)、`vscode-ide-bridge`(VS Code 桥接)、`pokemon`(示例/测试)。 ### Bridge / Remote Control @@ -210,12 +209,18 @@ Feature flags control which functionality is enabled at runtime. 代码中统一 **启用方式**: 环境变量 `FEATURE_=1`。例如 `FEATURE_BUDDY=1 bun run dev`。 -**Build 默认 features**(19 个,见 `build.ts`): +**Build 默认 features**(65+ 个,见 `build.ts` 中 `DEFAULT_BUILD_FEATURES`): - 基础: `BUDDY`, `TRANSCRIPT_CLASSIFIER`, `BRIDGE_MODE`, `AGENT_TRIGGERS_REMOTE`, `CHICAGO_MCP`, `VOICE_MODE` - 统计/缓存: `SHOT_STATS`, `PROMPT_CACHE_BREAK_DETECTION`, `TOKEN_BUDGET` - P0 本地: `AGENT_TRIGGERS`, `ULTRATHINK`, `BUILTIN_EXPLORE_PLAN_AGENTS`, `LODESTONE` - P1 API 依赖: `EXTRACT_MEMORIES`, `VERIFICATION_AGENT`, `KAIROS_BRIEF`, `AWAY_SUMMARY`, `ULTRAPLAN` -- P2: `DAEMON` +- P2: `DAEMON`, `ACP` +- 工作流: `WORKFLOW_SCRIPTS`, `HISTORY_SNIP`, `MONITOR_TOOL`, `KAIROS` +- 多 worker: `COORDINATOR_MODE`, `BG_SESSIONS`, `TEMPLATES` +- 连接器: `CONNECTOR_TEXT`, `COMMIT_ATTRIBUTION`, `DIRECT_CONNECT` +- 实验性: `EXPERIMENTAL_SKILL_SEARCH`, `EXPERIMENTAL_SEARCH_EXTRA_TOOLS` +- 模式: `POOR`, `SSH_REMOTE` +- 已禁用: `CONTEXT_COLLAPSE`, `FORK_SUBAGENT`, `UDS_INBOX`, `LAN_PIPES`, `REVIEW_ARTIFACT`, `TEAMMEM`, `SKILL_LEARNING` **Dev mode 默认**: 全部启用(见 `scripts/dev.ts`)。 @@ -265,6 +270,7 @@ Feature flags control which functionality is enabled at runtime. 代码中统一 | Voice Mode | Restored — Push-to-Talk 语音输入(需 Anthropic OAuth) | | OpenAI/Gemini/Grok 兼容层 | Restored | | Remote Control Server | Restored — 自托管 RCS + Web UI | +| `packages/shell/`, `packages/swarm/`, `packages/mcp-server/`, `packages/cc-knowledge/` | Removed — 功能合并或废弃 | | Analytics / GrowthBook / Sentry | Empty implementations | | Magic Docs / LSP Server | Restored — Magic Docs 自动更新 + LSP 服务器管理器 | | Plugins / Marketplace | Restored — 插件安装/卸载/启用/禁用 + Marketplace 浏览 | @@ -281,7 +287,7 @@ Feature flags control which functionality is enabled at runtime. 代码中统一 - **框架**: `bun:test`(内置断言 + mock) - **单元测试**: 就近放置于 `src/**/__tests__/`,文件名 `.test.ts` -- **集成测试**: `tests/integration/` — 4 个文件(cli-arguments, context-build, message-pipeline, tool-chain) +- **集成测试**: `tests/integration/` — 6 个文件(cli-arguments, context-build, message-pipeline, tool-chain, autonomy-lifecycle-user-flow, dependency-overrides) - **共享 mock/fixture**: `tests/mocks/`(api-responses, file-system, fixtures/) - **命名**: `describe("functionName")` + `test("behavior description")`,英文 - **包测试**: `packages/` 下各包也有独立测试(如 `color-diff-napi` 11 tests) From 2006ab25ff75e83af69e74fe1478c485998bf364 Mon Sep 17 00:00:00 2001 From: claude-code-best Date: Sat, 9 May 2026 22:02:04 +0800 Subject: [PATCH 4/4] =?UTF-8?q?fix:=20=E6=B7=BB=E5=8A=A0=20React=20Error?= =?UTF-8?q?=20Boundary=20=E9=98=B2=E6=AD=A2=E7=94=9F=E4=BA=A7=E7=8E=AF?= =?UTF-8?q?=E5=A2=83=E6=B8=B2=E6=9F=93=E5=B4=A9=E6=BA=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 增强 SentryErrorBoundary 组件,捕获渲染错误时输出诊断信息 (错误消息 + component stack)到 stderr 和终端,而非静默返回 null。在 replLauncher 根节点和 Messages 组件层级包裹 Error Boundary,防止 Ink 内部的 Error Boundary 直接终止进程。 Co-Authored-By: glm-5-turbo --- src/components/Messages.tsx | 5 ++- src/components/SentryErrorBoundary.ts | 38 ---------------- src/components/SentryErrorBoundary.tsx | 62 ++++++++++++++++++++++++++ src/replLauncher.tsx | 9 ++-- 4 files changed, 71 insertions(+), 43 deletions(-) delete mode 100644 src/components/SentryErrorBoundary.ts create mode 100644 src/components/SentryErrorBoundary.tsx diff --git a/src/components/Messages.tsx b/src/components/Messages.tsx index 638fe6c42f..c7d4eb671d 100644 --- a/src/components/Messages.tsx +++ b/src/components/Messages.tsx @@ -1,5 +1,6 @@ import { feature } from 'bun:bundle'; import chalk from 'chalk'; +import { SentryErrorBoundary } from './SentryErrorBoundary.js'; import type { UUID } from 'crypto'; import type { RefObject } from 'react'; import * as React from 'react'; @@ -890,7 +891,7 @@ const MessagesImpl = ({ ); return ( - <> + {/* Logo */} {!hideLogo && !(renderRange && renderRange[0] > 0) && } @@ -977,7 +978,7 @@ const MessagesImpl = ({ /> )} - + ); }; diff --git a/src/components/SentryErrorBoundary.ts b/src/components/SentryErrorBoundary.ts deleted file mode 100644 index 7380a62b01..0000000000 --- a/src/components/SentryErrorBoundary.ts +++ /dev/null @@ -1,38 +0,0 @@ -import * as React from 'react' -import { captureException } from 'src/utils/sentry.js' - -interface Props { - children: React.ReactNode - /** Optional label for identifying which component boundary caught the error */ - name?: string -} - -interface State { - hasError: boolean -} - -export class SentryErrorBoundary extends React.Component { - constructor(props: Props) { - super(props) - this.state = { hasError: false } - } - - static getDerivedStateFromError(): State { - return { hasError: true } - } - - componentDidCatch(error: Error, errorInfo: React.ErrorInfo): void { - captureException(error, { - componentBoundary: this.props.name || 'SentryErrorBoundary', - componentStack: errorInfo.componentStack, - }) - } - - render(): React.ReactNode { - if (this.state.hasError) { - return null - } - - return this.props.children - } -} diff --git a/src/components/SentryErrorBoundary.tsx b/src/components/SentryErrorBoundary.tsx new file mode 100644 index 0000000000..a1a7ace0ee --- /dev/null +++ b/src/components/SentryErrorBoundary.tsx @@ -0,0 +1,62 @@ +import * as React from 'react'; +import { Box, Text } from '@anthropic/ink'; +import { captureException } from 'src/utils/sentry.js'; +import { logError } from 'src/utils/log.js'; + +interface Props { + children: React.ReactNode; + /** Optional label for identifying which component boundary caught the error */ + name?: string; +} + +interface State { + hasError: boolean; + error: Error | null; + errorInfo: React.ErrorInfo | null; +} + +export class SentryErrorBoundary extends React.Component { + constructor(props: Props) { + super(props); + this.state = { hasError: false, error: null, errorInfo: null }; + } + + static getDerivedStateFromError(error: Error): Pick { + return { hasError: true, error }; + } + + componentDidCatch(error: Error, errorInfo: React.ErrorInfo): void { + this.setState({ errorInfo }); + + // Log to stderr so the diagnostic info is visible even in production builds + const boundary = this.props.name || 'SentryErrorBoundary'; + const lines = ['', `[ErrorBoundary:${boundary}] React rendering error caught`, ` Message: ${error.message}`]; + if (errorInfo.componentStack) { + lines.push(` Component stack:\n${errorInfo.componentStack}`); + } + // eslint-disable-next-line no-console -- intentional stderr diagnostic output + console.error(lines.join('\n')); + + logError(error); + captureException(error, { + componentBoundary: boundary, + componentStack: errorInfo.componentStack, + }); + } + + render(): React.ReactNode { + if (this.state.hasError) { + return ( + + + React Rendering Error + + {this.state.error?.message} + {this.props.name && Boundary: {this.props.name}} + + ); + } + + return this.props.children; + } +} diff --git a/src/replLauncher.tsx b/src/replLauncher.tsx index 0d27afe12d..040636719d 100644 --- a/src/replLauncher.tsx +++ b/src/replLauncher.tsx @@ -18,11 +18,14 @@ export async function launchRepl( renderAndRun: (root: Root, element: React.ReactNode) => Promise, ): Promise { const { App } = await import('./components/App.js'); + const { SentryErrorBoundary } = await import('./components/SentryErrorBoundary.js'); const { REPL } = await import('./screens/REPL.js'); await renderAndRun( root, - - - , + + + + + , ); }